Re: [PATCH 2/2] powernv:idle:Implement lite variant of power_enter_stop

2016-09-19 Thread Balbir Singh


On 16/09/16 19:47, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" 
> 
> This patch adds a function named power_enter_stop_lite() that can
> execute a stop instruction when ESL and EC bits are set to zero in the
> PSSCR.  The function handles the wake-up from idle at the instruction
> immediately after the stop instruction.
> 
> If the flag OPAL_PM_WAKEUP_AT_NEXT_INST[1] is set in the device tree
> for a stop state, then use the lite variant for that particular stop
> state.
> 
> [1] : The corresponding patch in skiboot that defines
>   OPAL_PM_WAKEUP_AT_NEXT_INST and enables it in the device tree
>   can be found here:
>   https://lists.ozlabs.org/pipermail/skiboot/2016-September/004805.html
> 
> Signed-off-by: Gautham R. Shenoy 
> ---
>  arch/powerpc/include/asm/opal-api.h   |  1 +
>  arch/powerpc/include/asm/processor.h  |  3 ++-
>  arch/powerpc/kernel/idle_book3s.S | 28 +---
>  arch/powerpc/platforms/powernv/idle.c | 17 ++---
>  arch/powerpc/platforms/powernv/smp.c  |  2 +-
>  drivers/cpuidle/cpuidle-powernv.c | 24 ++--
>  6 files changed, 65 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index 0e2e57b..6e5741e 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -179,6 +179,7 @@
>  #define OPAL_PM_TIMEBASE_STOP0x0002
>  #define OPAL_PM_LOSE_HYP_CONTEXT 0x2000
>  #define OPAL_PM_LOSE_FULL_CONTEXT0x4000
> +#define OPAL_PM_WAKEUP_AT_NEXT_INST  0x8000
>  #define OPAL_PM_NAP_ENABLED  0x0001
>  #define OPAL_PM_SLEEP_ENABLED0x0002
>  #define OPAL_PM_WINKLE_ENABLED   0x0004
> diff --git a/arch/powerpc/include/asm/processor.h 
> b/arch/powerpc/include/asm/processor.h
> index 68e3bf5..e0549a0 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -460,7 +460,8 @@ extern int powersave_nap; /* set if nap mode can be used 
> in idle loop */
>  extern unsigned long power7_nap(int check_irq);
>  extern unsigned long power7_sleep(void);
>  extern unsigned long power7_winkle(void);
> -extern unsigned long power9_idle_stop(unsigned long stop_level);
> +extern unsigned long power9_idle_stop(unsigned long stop_level,
> + unsigned long exec_lite);
>  
>  extern void flush_instruction_cache(void);
>  extern void hard_reset_now(void);
> diff --git a/arch/powerpc/kernel/idle_book3s.S 
> b/arch/powerpc/kernel/idle_book3s.S
> index 32d666b..47ee106 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -43,6 +43,8 @@
>  #define PSSCR_HV_TEMPLATEPSSCR_ESL | PSSCR_EC | \
>   PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
>   PSSCR_MTL_MASK
> +#define PSSCR_HV_TEMPLATE_LITE   PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
> +  PSSCR_MTL_MASK
>  
>   .text
>  
> @@ -246,6 +248,20 @@ enter_winkle:
>  
>   IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
>  
> +
> +/*
> + * power_enter_stop_lite : This will resume the wake up from
> + * idle at the subsequent instruction.
> + *
> + * Caller should set ESL=EC=0 in PSSCR before calling
> + * this function.
> + *
> + */
> +power_enter_stop_lite:
> + IDLE_STATE_ENTER_SEQ(PPC_STOP)
> +7:   li  r3,0  /* Since we didn't lose state, return 0 */
> + b   pnv_wakeup_noloss
> +
>  /*
>   * r3 - requested stop state
>   */
> @@ -333,13 +349,19 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 
> 66);\
>  
>  /*
>   * r3 - requested stop state
> + * r4 - Indicates if the lite variant with ESL=EC=0 should be executed.
>   */
>  _GLOBAL(power9_idle_stop)
> - LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE)
> - or  r4,r4,r3
> + cmpdi   r4, 1
> + bne 4f
> + LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE_LITE)
> + LOAD_REG_ADDR(r5,power_enter_stop_lite)
> + b   5f
> +4:   LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE)
> + LOAD_REG_ADDR(r5,power_enter_stop)
> +5:   or  r4,r4,r3
>   mtspr   SPRN_PSSCR, r4
>   li  r4, 1
> - LOAD_REG_ADDR(r5,power_enter_stop)
>   b   pnv_powersave_common
>   /* No return */
>  /*
> diff --git a/arch/powerpc/platforms/powernv/idle.c 
> b/arch/powerpc/platforms/powernv/idle.c
> index 479c256..c3d3fed 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -244,8 +244,15 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
>  static void power9_idle(void)
>  {
>   /* Requesting stop state 0 */
> - power9_idle_stop(0);
> + power9_idle_stop(0, 0);
>  }
> +
> +static void power9_idle_lite(void)
> +{
> + /* Requesting stop state 0 with ESL=EC=0 */
> + power9_idle_stop(0, 1);
> +}
> +
>  

Re: [PATCH 2/2] powernv:idle:Implement lite variant of power_enter_stop

2016-09-19 Thread Balbir Singh


On 16/09/16 19:47, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" 
> 
> This patch adds a function named power_enter_stop_lite() that can
> execute a stop instruction when ESL and EC bits are set to zero in the
> PSSCR.  The function handles the wake-up from idle at the instruction
> immediately after the stop instruction.
> 
> If the flag OPAL_PM_WAKEUP_AT_NEXT_INST[1] is set in the device tree
> for a stop state, then use the lite variant for that particular stop
> state.
> 
> [1] : The corresponding patch in skiboot that defines
>   OPAL_PM_WAKEUP_AT_NEXT_INST and enables it in the device tree
>   can be found here:
>   https://lists.ozlabs.org/pipermail/skiboot/2016-September/004805.html
> 
> Signed-off-by: Gautham R. Shenoy 
> ---
>  arch/powerpc/include/asm/opal-api.h   |  1 +
>  arch/powerpc/include/asm/processor.h  |  3 ++-
>  arch/powerpc/kernel/idle_book3s.S | 28 +---
>  arch/powerpc/platforms/powernv/idle.c | 17 ++---
>  arch/powerpc/platforms/powernv/smp.c  |  2 +-
>  drivers/cpuidle/cpuidle-powernv.c | 24 ++--
>  6 files changed, 65 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/opal-api.h 
> b/arch/powerpc/include/asm/opal-api.h
> index 0e2e57b..6e5741e 100644
> --- a/arch/powerpc/include/asm/opal-api.h
> +++ b/arch/powerpc/include/asm/opal-api.h
> @@ -179,6 +179,7 @@
>  #define OPAL_PM_TIMEBASE_STOP0x0002
>  #define OPAL_PM_LOSE_HYP_CONTEXT 0x2000
>  #define OPAL_PM_LOSE_FULL_CONTEXT0x4000
> +#define OPAL_PM_WAKEUP_AT_NEXT_INST  0x8000
>  #define OPAL_PM_NAP_ENABLED  0x0001
>  #define OPAL_PM_SLEEP_ENABLED0x0002
>  #define OPAL_PM_WINKLE_ENABLED   0x0004
> diff --git a/arch/powerpc/include/asm/processor.h 
> b/arch/powerpc/include/asm/processor.h
> index 68e3bf5..e0549a0 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -460,7 +460,8 @@ extern int powersave_nap; /* set if nap mode can be used 
> in idle loop */
>  extern unsigned long power7_nap(int check_irq);
>  extern unsigned long power7_sleep(void);
>  extern unsigned long power7_winkle(void);
> -extern unsigned long power9_idle_stop(unsigned long stop_level);
> +extern unsigned long power9_idle_stop(unsigned long stop_level,
> + unsigned long exec_lite);
>  
>  extern void flush_instruction_cache(void);
>  extern void hard_reset_now(void);
> diff --git a/arch/powerpc/kernel/idle_book3s.S 
> b/arch/powerpc/kernel/idle_book3s.S
> index 32d666b..47ee106 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -43,6 +43,8 @@
>  #define PSSCR_HV_TEMPLATEPSSCR_ESL | PSSCR_EC | \
>   PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
>   PSSCR_MTL_MASK
> +#define PSSCR_HV_TEMPLATE_LITE   PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
> +  PSSCR_MTL_MASK
>  
>   .text
>  
> @@ -246,6 +248,20 @@ enter_winkle:
>  
>   IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
>  
> +
> +/*
> + * power_enter_stop_lite : This will resume the wake up from
> + * idle at the subsequent instruction.
> + *
> + * Caller should set ESL=EC=0 in PSSCR before calling
> + * this function.
> + *
> + */
> +power_enter_stop_lite:
> + IDLE_STATE_ENTER_SEQ(PPC_STOP)
> +7:   li  r3,0  /* Since we didn't lose state, return 0 */
> + b   pnv_wakeup_noloss
> +
>  /*
>   * r3 - requested stop state
>   */
> @@ -333,13 +349,19 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 
> 66);\
>  
>  /*
>   * r3 - requested stop state
> + * r4 - Indicates if the lite variant with ESL=EC=0 should be executed.
>   */
>  _GLOBAL(power9_idle_stop)
> - LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE)
> - or  r4,r4,r3
> + cmpdi   r4, 1
> + bne 4f
> + LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE_LITE)
> + LOAD_REG_ADDR(r5,power_enter_stop_lite)
> + b   5f
> +4:   LOAD_REG_IMMEDIATE(r4, PSSCR_HV_TEMPLATE)
> + LOAD_REG_ADDR(r5,power_enter_stop)
> +5:   or  r4,r4,r3
>   mtspr   SPRN_PSSCR, r4
>   li  r4, 1
> - LOAD_REG_ADDR(r5,power_enter_stop)
>   b   pnv_powersave_common
>   /* No return */
>  /*
> diff --git a/arch/powerpc/platforms/powernv/idle.c 
> b/arch/powerpc/platforms/powernv/idle.c
> index 479c256..c3d3fed 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -244,8 +244,15 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
>  static void power9_idle(void)
>  {
>   /* Requesting stop state 0 */
> - power9_idle_stop(0);
> + power9_idle_stop(0, 0);
>  }
> +
> +static void power9_idle_lite(void)
> +{
> + /* Requesting stop state 0 with ESL=EC=0 */
> + power9_idle_stop(0, 1);
> +}
> +
>  /*
>   * First deep stop state. Used to figure out 

Re: "CodingStyle: Clarify and complete chapter 7" in docs-next (was Re: [PATCH 03/47] block-rbd: Adjust the position of a jump label in rbd_header_from_disk())

2016-09-19 Thread Julia Lawall


On Mon, 19 Sep 2016, Joe Perches wrote:

> On Tue, 2016-09-20 at 01:11 +0100, Al Viro wrote:
> > IMO what we need is to go through all rules in CodingStyle and if for
> > some rule there is no overwhelming majority in the core kernel, well,
> > the list has grown way too large and could use massive trimming.
>
> I'm in complete agreement.
>
> I also think that checkpatch's ERROR/WARNING/CHECK message naming is
> far too severe and injunctive and could use a renaming to something
> more silly, bug related and less commanding like FLEAS/GNATS/NITS.

I think it is better to be clear.  CHECK was never really clear to me,
especially if you see it in isolation, on a file that doesn't also have
ERROR or WARNING.  NITS is a common word in this context, but not FLEAS
and GNATS, as far as I know.

There could also be a severity level: high medium and low.

julia

>
> --
> To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>


Re: "CodingStyle: Clarify and complete chapter 7" in docs-next (was Re: [PATCH 03/47] block-rbd: Adjust the position of a jump label in rbd_header_from_disk())

2016-09-19 Thread Julia Lawall


On Mon, 19 Sep 2016, Joe Perches wrote:

> On Tue, 2016-09-20 at 01:11 +0100, Al Viro wrote:
> > IMO what we need is to go through all rules in CodingStyle and if for
> > some rule there is no overwhelming majority in the core kernel, well,
> > the list has grown way too large and could use massive trimming.
>
> I'm in complete agreement.
>
> I also think that checkpatch's ERROR/WARNING/CHECK message naming is
> far too severe and injunctive and could use a renaming to something
> more silly, bug related and less commanding like FLEAS/GNATS/NITS.

I think it is better to be clear.  CHECK was never really clear to me,
especially if you see it in isolation, on a file that doesn't also have
ERROR or WARNING.  NITS is a common word in this context, but not FLEAS
and GNATS, as far as I know.

There could also be a severity level: high medium and low.

julia

>
> --
> To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>


Re: [PATCH] nfs: cover ->migratepage with CONFIG_MIGRATION

2016-09-19 Thread Chao Yu
On 2016/9/20 20:51, kbuild test robot wrote:
>>> fs/nfs/file.c:547:17: error: 'nfs_migrate_page' undeclared here (not in a 
>>> function)
>  .migratepage = nfs_migrate_page,

Oops :(, sorry for my mistake, let me fix this.

Thanks,



Re: [PATCH] nfs: cover ->migratepage with CONFIG_MIGRATION

2016-09-19 Thread Chao Yu
On 2016/9/20 20:51, kbuild test robot wrote:
>>> fs/nfs/file.c:547:17: error: 'nfs_migrate_page' undeclared here (not in a 
>>> function)
>  .migratepage = nfs_migrate_page,

Oops :(, sorry for my mistake, let me fix this.

Thanks,



[PATCH 2/3] lib/ioremap.c: avoid endless loop under ioremapping improper ranges

2016-09-19 Thread zijun_hu
From: zijun_hu 

for ioremap_page_range(), endless loop maybe happen if either of parameter
addr and end is not page aligned, in order to fix this issue and hint range
parameter requirements BUG_ON() checkup are performed firstly

for ioremap_pte_range(), loop end condition is optimized due to lack of
relevant macro pte_addr_end()

Signed-off-by: zijun_hu 
---
 lib/ioremap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/ioremap.c b/lib/ioremap.c
index 86c8911..0058cc8 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -64,7 +64,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
BUG_ON(!pte_none(*pte));
set_pte_at(_mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
-   } while (pte++, addr += PAGE_SIZE, addr != end);
+   } while (pte++, addr += PAGE_SIZE, addr < end);
return 0;
 }
 
@@ -129,6 +129,7 @@ int ioremap_page_range(unsigned long addr,
int err;
 
BUG_ON(addr >= end);
+   BUG_ON(!PAGE_ALIGNED(addr | end));
 
start = addr;
phys_addr -= addr;
-- 
1.9.1



[PATCH 2/3] lib/ioremap.c: avoid endless loop under ioremapping improper ranges

2016-09-19 Thread zijun_hu
From: zijun_hu 

for ioremap_page_range(), endless loop maybe happen if either of parameter
addr and end is not page aligned, in order to fix this issue and hint range
parameter requirements BUG_ON() checkup are performed firstly

for ioremap_pte_range(), loop end condition is optimized due to lack of
relevant macro pte_addr_end()

Signed-off-by: zijun_hu 
---
 lib/ioremap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/ioremap.c b/lib/ioremap.c
index 86c8911..0058cc8 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -64,7 +64,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
BUG_ON(!pte_none(*pte));
set_pte_at(_mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
-   } while (pte++, addr += PAGE_SIZE, addr != end);
+   } while (pte++, addr += PAGE_SIZE, addr < end);
return 0;
 }
 
@@ -129,6 +129,7 @@ int ioremap_page_range(unsigned long addr,
int err;
 
BUG_ON(addr >= end);
+   BUG_ON(!PAGE_ALIGNED(addr | end));
 
start = addr;
phys_addr -= addr;
-- 
1.9.1



Re: [INFO] ratio of const vs dynamic usercopy

2016-09-19 Thread kbuild test robot
Hi Kees,

[auto build test WARNING on linus/master]
[also build test WARNING on v4.8-rc7 next-20160919]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]
[Suggest to use git(>=2.9.0) format-patch --base= (or --base=auto for 
convenience) to record what (public, well-known) commit your patch series was 
built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:
https://github.com/0day-ci/linux/commits/Kees-Cook/ratio-of-const-vs-dynamic-usercopy/20160920-130133
config: i386-randconfig-s0-09191616 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/preempt.h:6:0,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from include/linux/mm_types.h:8,
from include/linux/kmemcheck.h:4,
from include/linux/skbuff.h:18,
from include/linux/if_arp.h:26,
from net/ipx/af_ipx.c:33:
   In function 'check_object_size',
   inlined from 'copy_to_user' at arch/x86/include/asm/uaccess.h:742:3,
   inlined from 'ipx_getsockopt' at net/ipx/af_ipx.c:1316:2:
>> include/linux/thread_info.h:129:3: warning: call to '__check_object_size' 
>> declared with attribute warning: dynamic usercopy
  __check_object_size(ptr, n, to_user);
  ^~~~
   In function 'check_object_size',
   inlined from 'copy_from_user' at arch/x86/include/asm/uaccess.h:722:3,
   inlined from 'ipxitf_ioctl' at net/ipx/af_ipx.c:1129:3,
   inlined from 'ipx_ioctl' at net/ipx/af_ipx.c:1872:6:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
   In function 'check_object_size',
   inlined from 'copy_from_user' at arch/x86/include/asm/uaccess.h:722:3,
   inlined from 'ipxitf_ioctl' at net/ipx/af_ipx.c:1154:3,
   inlined from 'ipx_ioctl' at net/ipx/af_ipx.c:1872:6:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
   In function 'check_object_size',
   inlined from 'copy_to_user' at arch/x86/include/asm/uaccess.h:742:3,
   inlined from 'ipxitf_ioctl' at net/ipx/af_ipx.c:1172:3,
   inlined from 'ipx_ioctl' at net/ipx/af_ipx.c:1872:6:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
   In function 'check_object_size',
   inlined from 'copy_to_user' at arch/x86/include/asm/uaccess.h:742:3,
   inlined from 'ipxcfg_get_config_data' at net/ipx/af_ipx.c:105:9,
   inlined from 'ipx_ioctl' at net/ipx/af_ipx.c:1875:6:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
--
   In file included from arch/x86/include/asm/preempt.h:6:0,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from include/linux/mmzone.h:7,
from include/linux/gfp.h:5,
from include/linux/slab.h:14,
from net/ipx/ipx_route.c:12:
   In function 'check_object_size',
   inlined from 'copy_from_user' at arch/x86/include/asm/uaccess.h:722:3,
   inlined from 'ipxrtr_ioctl' at net/ipx/ipx_route.c:264:2:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
--
   In file included from arch/x86/include/asm/preempt.h:6:0,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from include/linux/seqlock.h:35,
from include/linux/time.h:5,
from include/uapi/linux/timex.h:56,
from include/linux/timex.h:56,
from include/linux/sched.h:19,
from include/linux/uaccess.h:4,
from security/keys/encrypted-keys/encrypted.c:17:
   In function 'check_object_size',
   inlined from 'copy_to_user' at arch/x86/include/asm/uaccess.h:742:3,
   inlined from 'encrypte

Re: [INFO] ratio of const vs dynamic usercopy

2016-09-19 Thread kbuild test robot
Hi Kees,

[auto build test WARNING on linus/master]
[also build test WARNING on v4.8-rc7 next-20160919]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]
[Suggest to use git(>=2.9.0) format-patch --base= (or --base=auto for 
convenience) to record what (public, well-known) commit your patch series was 
built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:
https://github.com/0day-ci/linux/commits/Kees-Cook/ratio-of-const-vs-dynamic-usercopy/20160920-130133
config: i386-randconfig-s0-09191616 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from arch/x86/include/asm/preempt.h:6:0,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from include/linux/mm_types.h:8,
from include/linux/kmemcheck.h:4,
from include/linux/skbuff.h:18,
from include/linux/if_arp.h:26,
from net/ipx/af_ipx.c:33:
   In function 'check_object_size',
   inlined from 'copy_to_user' at arch/x86/include/asm/uaccess.h:742:3,
   inlined from 'ipx_getsockopt' at net/ipx/af_ipx.c:1316:2:
>> include/linux/thread_info.h:129:3: warning: call to '__check_object_size' 
>> declared with attribute warning: dynamic usercopy
  __check_object_size(ptr, n, to_user);
  ^~~~
   In function 'check_object_size',
   inlined from 'copy_from_user' at arch/x86/include/asm/uaccess.h:722:3,
   inlined from 'ipxitf_ioctl' at net/ipx/af_ipx.c:1129:3,
   inlined from 'ipx_ioctl' at net/ipx/af_ipx.c:1872:6:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
   In function 'check_object_size',
   inlined from 'copy_from_user' at arch/x86/include/asm/uaccess.h:722:3,
   inlined from 'ipxitf_ioctl' at net/ipx/af_ipx.c:1154:3,
   inlined from 'ipx_ioctl' at net/ipx/af_ipx.c:1872:6:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
   In function 'check_object_size',
   inlined from 'copy_to_user' at arch/x86/include/asm/uaccess.h:742:3,
   inlined from 'ipxitf_ioctl' at net/ipx/af_ipx.c:1172:3,
   inlined from 'ipx_ioctl' at net/ipx/af_ipx.c:1872:6:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
   In function 'check_object_size',
   inlined from 'copy_to_user' at arch/x86/include/asm/uaccess.h:742:3,
   inlined from 'ipxcfg_get_config_data' at net/ipx/af_ipx.c:105:9,
   inlined from 'ipx_ioctl' at net/ipx/af_ipx.c:1875:6:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
--
   In file included from arch/x86/include/asm/preempt.h:6:0,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from include/linux/mmzone.h:7,
from include/linux/gfp.h:5,
from include/linux/slab.h:14,
from net/ipx/ipx_route.c:12:
   In function 'check_object_size',
   inlined from 'copy_from_user' at arch/x86/include/asm/uaccess.h:722:3,
   inlined from 'ipxrtr_ioctl' at net/ipx/ipx_route.c:264:2:
>> include/linux/thread_info.h:131:3: warning: call to 
>> '__skip_check_object_size' declared with attribute warning: builtin-const 
>> usercopy
  __skip_check_object_size();
  ^~
--
   In file included from arch/x86/include/asm/preempt.h:6:0,
from include/linux/preempt.h:59,
from include/linux/spinlock.h:50,
from include/linux/seqlock.h:35,
from include/linux/time.h:5,
from include/uapi/linux/timex.h:56,
from include/linux/timex.h:56,
from include/linux/sched.h:19,
from include/linux/uaccess.h:4,
from security/keys/encrypted-keys/encrypted.c:17:
   In function 'check_object_size',
   inlined from 'copy_to_user' at arch/x86/include/asm/uaccess.h:742:3,
   inlined from 'encrypte

Re: [PATCH 2/6] kvm: x86: drop read_tsc_offset()

2016-09-19 Thread Paolo Bonzini


On 20/09/2016 00:18, Jim Mattson wrote:
> Hmmm. Yes, I think it does. With this patch series,
> vcpu->arch.tsc_offset appears to contain L1's TSC offset (perhaps
> making vmx->nested.vmcs01_tsc_offset redundant).
> 
> However, this unfortunately limits the newly added functionality to
> merging host and *L1* guest traces. It doesn't work with L2 (or
> deeper) guests. Or perhaps I'm missing something?

You can merge L1/L2 first and then host/L1.

Paolo


Re: [PATCH 2/6] kvm: x86: drop read_tsc_offset()

2016-09-19 Thread Paolo Bonzini


On 20/09/2016 00:18, Jim Mattson wrote:
> Hmmm. Yes, I think it does. With this patch series,
> vcpu->arch.tsc_offset appears to contain L1's TSC offset (perhaps
> making vmx->nested.vmcs01_tsc_offset redundant).
> 
> However, this unfortunately limits the newly added functionality to
> merging host and *L1* guest traces. It doesn't work with L2 (or
> deeper) guests. Or perhaps I'm missing something?

You can merge L1/L2 first and then host/L1.

Paolo


Re: [PATCH -v3 00/10] THP swap: Delay splitting THP during swapping out

2016-09-19 Thread Minchan Kim
Hi Huang,

On Tue, Sep 20, 2016 at 10:54:35AM +0800, Huang, Ying wrote:
> Hi, Minchan,
> 
> Minchan Kim  writes:
> > Hi Huang,
> >
> > On Sun, Sep 18, 2016 at 09:53:39AM +0800, Huang, Ying wrote:
> >> Minchan Kim  writes:
> >> 
> >> > On Tue, Sep 13, 2016 at 04:53:49PM +0800, Huang, Ying wrote:
> >> >> Minchan Kim  writes:
> >> >> > On Tue, Sep 13, 2016 at 02:40:00PM +0800, Huang, Ying wrote:
> >> >> >> Minchan Kim  writes:
> >> >> >> 
> >> >> >> > Hi Huang,
> >> >> >> >
> >> >> >> > On Fri, Sep 09, 2016 at 01:35:12PM -0700, Huang, Ying wrote:
> >> >> >> >
> 
> [snip]
> 
> >> >> > 1. If we solve batching swapout, then how is THP split for swapout 
> >> >> > bad?
> >> >> > 2. Also, how is current conservatie swapin from khugepaged bad?
> >> >> >
> >> >> > I think it's one of decision point for the motivation of your work
> >> >> > and for 1, we need batching swapout feature.
> >> >> >
> >> >> > I am saying again that I'm not against your goal but only concern
> >> >> > is approach. If you don't agree, please ignore me.
> >> >> 
> >> >> I am glad to discuss my final goal, that is, swapping out/in the full
> >> >> THP without splitting.  Why I want to do that is copied as below,
> >> >
> >> > Yes, it's your *final* goal but what if it couldn't be acceptable
> >> > on second step you mentioned above, for example?
> >> >
> >> > Unncessary binded implementation to rejected work.
> >> 
> >> So I want to discuss my final goal.  If people accept my final goal,
> >> this is resolved.  If people don't accept, I will reconsider it.
> >
> > No.
> >
> > Please keep it in mind. There are lots of factors the project would
> > be broken during going on by several reasons because we are human being
> > so we can simply miss something clear and realize it later that it's
> > not feasible. Otherwise, others can show up with better idea for the
> > goal or fix other subsystem which can affect your goals.
> > I don't want to say such boring theoretical stuffs any more.
> >
> > My point is patchset should be self-contained if you really want to go
> > with step-by-step approach because we are likely to miss something
> > *easily*.
> >
> >> 
> >> > If you want to achieve your goal step by step, please consider if
> >> > one of step you are thinking could be rejected but steps already
> >> > merged should be self-contained without side-effect.
> >> 
> >> What is the side-effect or possible regressions of the step 1 as in this
> >
> > Adding code complexity for unproved feature.
> >
> > When I read your steps, your *most important* goal is to avoid split/
> > collapsing anon THP page for swap out/in. As a bonus with the approach,
> > we could increase swapout/in bandwidth, too. Do I understand correctly?
> 
> It's hard to say what is the *most important* goal.  But it is clear
> that to improve swapout/in performance isn't the only goal.  The other
> goal to avoid split/collapsing THP page for swap out/in is very
> important too.

Okay, then, couldn't you focus a goal in patchset? After solving a problem,
then next one. What's the problem?
One of your goal is swapout performance and it's same with Tim's work.
That's why I wanted to make your patchset based on Tim's work. But if you
want your patch first, please make patchset independent with your other goal
so everyone can review easily and focus on *a* problem.
In your patchset, THP split delaying part could be folded into in your second
patchset which is to avoid THP split/collapsing.

> 
> > However, swap-in/out bandwidth enhance is common requirement for both
> > normal and THP page and with Tim's work, we could enhance swapout path.
> >
> > So, I think you should give us to number about how THP split is bad
> > for the swapout bandwidth even though we applied Tim's work.
> > If it's serious, next approach is yours that we could tweak swap code
> > be aware of a THP to avoid splitting a THP.
> 
> It's not only about CPU cycles spent in splitting and collapsing THP,
> but also how to make THP work effectively on systems with swap turned
> on.
> 
> To avoid disturbing user applications etc., THP collapsing doesn't work
> aggressively to collapse anonymous pages into THP.  This means, once the
> THP is split, it will take quite long time (wall time, instead of CPU
> cycles) to be collapsed to become a THP, especially on machines with
> large memory size.  And on systems with swap turned on, THP will be
> split during swap out/in now.  If much swapping out/in is triggered
> during system running, it is possible that many THP is split, and have
> no chance to be collapsed.  Even if the THP that has been split gets
> opportunity to be collapsed again, the applications lose the opportunity
> to take advantage of the THP for quite long time too.  And the memory
> will be fragmented during the process, this makes it hard to allocate
> new THP.  The end result is that THP usage is very low in this
> 

Re: [PATCH -v3 00/10] THP swap: Delay splitting THP during swapping out

2016-09-19 Thread Minchan Kim
Hi Huang,

On Tue, Sep 20, 2016 at 10:54:35AM +0800, Huang, Ying wrote:
> Hi, Minchan,
> 
> Minchan Kim  writes:
> > Hi Huang,
> >
> > On Sun, Sep 18, 2016 at 09:53:39AM +0800, Huang, Ying wrote:
> >> Minchan Kim  writes:
> >> 
> >> > On Tue, Sep 13, 2016 at 04:53:49PM +0800, Huang, Ying wrote:
> >> >> Minchan Kim  writes:
> >> >> > On Tue, Sep 13, 2016 at 02:40:00PM +0800, Huang, Ying wrote:
> >> >> >> Minchan Kim  writes:
> >> >> >> 
> >> >> >> > Hi Huang,
> >> >> >> >
> >> >> >> > On Fri, Sep 09, 2016 at 01:35:12PM -0700, Huang, Ying wrote:
> >> >> >> >
> 
> [snip]
> 
> >> >> > 1. If we solve batching swapout, then how is THP split for swapout 
> >> >> > bad?
> >> >> > 2. Also, how is current conservatie swapin from khugepaged bad?
> >> >> >
> >> >> > I think it's one of decision point for the motivation of your work
> >> >> > and for 1, we need batching swapout feature.
> >> >> >
> >> >> > I am saying again that I'm not against your goal but only concern
> >> >> > is approach. If you don't agree, please ignore me.
> >> >> 
> >> >> I am glad to discuss my final goal, that is, swapping out/in the full
> >> >> THP without splitting.  Why I want to do that is copied as below,
> >> >
> >> > Yes, it's your *final* goal but what if it couldn't be acceptable
> >> > on second step you mentioned above, for example?
> >> >
> >> > Unncessary binded implementation to rejected work.
> >> 
> >> So I want to discuss my final goal.  If people accept my final goal,
> >> this is resolved.  If people don't accept, I will reconsider it.
> >
> > No.
> >
> > Please keep it in mind. There are lots of factors the project would
> > be broken during going on by several reasons because we are human being
> > so we can simply miss something clear and realize it later that it's
> > not feasible. Otherwise, others can show up with better idea for the
> > goal or fix other subsystem which can affect your goals.
> > I don't want to say such boring theoretical stuffs any more.
> >
> > My point is patchset should be self-contained if you really want to go
> > with step-by-step approach because we are likely to miss something
> > *easily*.
> >
> >> 
> >> > If you want to achieve your goal step by step, please consider if
> >> > one of step you are thinking could be rejected but steps already
> >> > merged should be self-contained without side-effect.
> >> 
> >> What is the side-effect or possible regressions of the step 1 as in this
> >
> > Adding code complexity for unproved feature.
> >
> > When I read your steps, your *most important* goal is to avoid split/
> > collapsing anon THP page for swap out/in. As a bonus with the approach,
> > we could increase swapout/in bandwidth, too. Do I understand correctly?
> 
> It's hard to say what is the *most important* goal.  But it is clear
> that to improve swapout/in performance isn't the only goal.  The other
> goal to avoid split/collapsing THP page for swap out/in is very
> important too.

Okay, then, couldn't you focus a goal in patchset? After solving a problem,
then next one. What's the problem?
One of your goal is swapout performance and it's same with Tim's work.
That's why I wanted to make your patchset based on Tim's work. But if you
want your patch first, please make patchset independent with your other goal
so everyone can review easily and focus on *a* problem.
In your patchset, THP split delaying part could be folded into in your second
patchset which is to avoid THP split/collapsing.

> 
> > However, swap-in/out bandwidth enhance is common requirement for both
> > normal and THP page and with Tim's work, we could enhance swapout path.
> >
> > So, I think you should give us to number about how THP split is bad
> > for the swapout bandwidth even though we applied Tim's work.
> > If it's serious, next approach is yours that we could tweak swap code
> > be aware of a THP to avoid splitting a THP.
> 
> It's not only about CPU cycles spent in splitting and collapsing THP,
> but also how to make THP work effectively on systems with swap turned
> on.
> 
> To avoid disturbing user applications etc., THP collapsing doesn't work
> aggressively to collapse anonymous pages into THP.  This means, once the
> THP is split, it will take quite long time (wall time, instead of CPU
> cycles) to be collapsed to become a THP, especially on machines with
> large memory size.  And on systems with swap turned on, THP will be
> split during swap out/in now.  If much swapping out/in is triggered
> during system running, it is possible that many THP is split, and have
> no chance to be collapsed.  Even if the THP that has been split gets
> opportunity to be collapsed again, the applications lose the opportunity
> to take advantage of the THP for quite long time too.  And the memory
> will be fragmented during the process, this makes it hard to allocate
> new THP.  The end result is that THP usage is very low in this
> situation.  One solution is to avoid to split/collapse THP during swap
> out/in.

[PATCH 1/3] linux/mm.h: canonicalize macro PAGE_ALIGNED() definition

2016-09-19 Thread zijun_hu
From: zijun_hu 

canonicalize macro PAGE_ALIGNED() definition

Signed-off-by: zijun_hu 
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ef815b9..ec68186 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -126,7 +126,7 @@ extern int overcommit_kbytes_handler(struct ctl_table *, 
int, void __user *,
 #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
 
 /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE 
*/
-#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE)
+#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
 
 /*
  * Linux kernel virtual memory manager primitives.
-- 
1.9.1



[PATCH 1/3] linux/mm.h: canonicalize macro PAGE_ALIGNED() definition

2016-09-19 Thread zijun_hu
From: zijun_hu 

canonicalize macro PAGE_ALIGNED() definition

Signed-off-by: zijun_hu 
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ef815b9..ec68186 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -126,7 +126,7 @@ extern int overcommit_kbytes_handler(struct ctl_table *, 
int, void __user *,
 #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
 
 /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE 
*/
-#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)addr, PAGE_SIZE)
+#define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE)
 
 /*
  * Linux kernel virtual memory manager primitives.
-- 
1.9.1



Re: [PATCH v3 15/15] lockdep: Crossrelease feature documentation

2016-09-19 Thread Byungchul Park
On Fri, Sep 16, 2016 at 10:47:06AM -0500, Nilay Vaish wrote:
> > +==
> > +Background
> > +==
> > +
> > +What causes deadlock
> > +
> > +
> > +A deadlock occurs when a context is waiting for an event to be issued
> > +which cannot be issued because the context or another context who can
> > +issue the event is also waiting for an event to be issued which cannot
> > +be issued.
> 
> I think 'some event happened' and 'context triggered an event' is
> better than 'some event issued' or 'context issued an event'.  I think
> 'happen' and 'trigger' are more widely used words when we talk about
> events.  For example, I would prefer the following version of the
> above:
> 
> A deadlock occurs when a context is waiting for an event to happen,
> which cannot happen because the context which can trigger the event is
> also waiting for an event to happen which cannot happen either.

Looks good.

> 
> > +Single context or more than one context both waiting for an
> > +event and issuing an event may paricipate in a deadlock.
> 
> I am not able to make sense of the line above.

I meant that only one context can be in deadlock by itself, like

A
=
lock a <= waiting
lock b
lock a
unlock a <= triggering
unlock b
unlock a

and more than one context also can be in deadlock, like

A   B
=   =
lock b  lock a
lock a  lock b
unlock aunlock b
unlock bunlock a

Is there any alterative to describ it?

> 
> > +
> > +For example,
> > +
> > +A context who can issue event D is waiting for event A to be issued.
> > +A context who can issue event A is waiting for event B to be issued.
> > +A context who can issue event B is waiting for event C to be issued.
> > +A context who can issue event C is waiting for event D to be issued.
> > +
> > +A deadlock occurs when these four operations are run at a time because
> > +event D cannot be issued if event A isn't issued which in turn cannot be
> > +issued if event B isn't issued which in turn cannot be issued if event C
> > +isn't issued which in turn cannot be issued if event D isn't issued. No
> > +event can be issued since any of them never meets its precondition.
> > +
> > +We can easily recognize that each wait operation creates a dependency
> > +between two issuings e.g. between issuing D and issuing A like, 'event D
> > +cannot be issued if event A isn't issued', in other words, 'issuing
> > +event D depends on issuing event A'. So the whole example can be
> > +rewritten in terms of dependency,
> > +
> > +Do an operation making 'event D cannot be issued if event A isn't issued'.
> > +Do an operation making 'event A cannot be issued if event B isn't issued'.
> > +Do an operation making 'event B cannot be issued if event C isn't issued'.
> > +Do an operation making 'event C cannot be issued if event D isn't issued'.
> > +
> > +or,
> 
> I think we can remove the text above.  The example only needs to be
> provided once.

I tried not to miss any subtle desciption AFAP. I thought and decided that
I need to explain what a dependecy is, without any hole in logic.

> 
> > +
> > +Do an operation making 'issuing event D depends on issuing event A'.
> > +Do an operation making 'issuing event A depends on issuing event B'.
> > +Do an operation making 'issuing event B depends on issuing event C'.
> > +Do an operation making 'issuing event C depends on issuing event D'.
> > +
> > +What causes a deadlock is a set of dependencies a chain of which forms a
> > +cycle, which means that issuing event D depending on issuing event A
> > +depending on issuing event B depending on issuing event C depending on
> > +issuing event D, finally depends on issuing event D itself, which means
> > +no event can be issued.
> > +
> > +Any set of operations creating dependencies causes a deadlock. The set
> > +of lock operations e.g. acquire and release is an example. Waiting for a
> > +lock to be released corresponds to waiting for an event and releasing a
> > +lock corresponds to issuing an event. So the description of dependency
> > +above can be altered to one in terms of lock.
> > +
> > +In terms of event, issuing event A depends on issuing event B if,
> > +
> > +   Event A cannot be issued if event B isn't issued.
> > +
> > +In terms of lock, releasing lock A depends on releasing lock B if,
> > +
> > +   Lock A cannot be released if lock B isn't released.
> > +
> > +CONCLUSION
> > +
> > +A set of dependencies a chain of which forms a cycle, causes a deadlock,
> 
> I think 'a chain of' is not required in the sentence above.

Do you think so? Actually a chain forms a cycle. I thought dependencies
are not stuffs making a cycle.

> 
> > +no matter what creates the dependencies.
> > +
> > +
> > +What lockdep detects
> > +
> > +
> > +A deadlock actually occurs only when all operations creating problematic
> 
> Instead of 'problematic', I would use 'cyclic'.

I'd like to highlight _problematic_. Is it 

Re: [PATCH v3 15/15] lockdep: Crossrelease feature documentation

2016-09-19 Thread Byungchul Park
On Fri, Sep 16, 2016 at 10:47:06AM -0500, Nilay Vaish wrote:
> > +==
> > +Background
> > +==
> > +
> > +What causes deadlock
> > +
> > +
> > +A deadlock occurs when a context is waiting for an event to be issued
> > +which cannot be issued because the context or another context who can
> > +issue the event is also waiting for an event to be issued which cannot
> > +be issued.
> 
> I think 'some event happened' and 'context triggered an event' is
> better than 'some event issued' or 'context issued an event'.  I think
> 'happen' and 'trigger' are more widely used words when we talk about
> events.  For example, I would prefer the following version of the
> above:
> 
> A deadlock occurs when a context is waiting for an event to happen,
> which cannot happen because the context which can trigger the event is
> also waiting for an event to happen which cannot happen either.

Looks good.

> 
> > +Single context or more than one context both waiting for an
> > +event and issuing an event may paricipate in a deadlock.
> 
> I am not able to make sense of the line above.

I meant that only one context can be in deadlock by itself, like

A
=
lock a <= waiting
lock b
lock a
unlock a <= triggering
unlock b
unlock a

and more than one context also can be in deadlock, like

A   B
=   =
lock b  lock a
lock a  lock b
unlock aunlock b
unlock bunlock a

Is there any alterative to describ it?

> 
> > +
> > +For example,
> > +
> > +A context who can issue event D is waiting for event A to be issued.
> > +A context who can issue event A is waiting for event B to be issued.
> > +A context who can issue event B is waiting for event C to be issued.
> > +A context who can issue event C is waiting for event D to be issued.
> > +
> > +A deadlock occurs when these four operations are run at a time because
> > +event D cannot be issued if event A isn't issued which in turn cannot be
> > +issued if event B isn't issued which in turn cannot be issued if event C
> > +isn't issued which in turn cannot be issued if event D isn't issued. No
> > +event can be issued since any of them never meets its precondition.
> > +
> > +We can easily recognize that each wait operation creates a dependency
> > +between two issuings e.g. between issuing D and issuing A like, 'event D
> > +cannot be issued if event A isn't issued', in other words, 'issuing
> > +event D depends on issuing event A'. So the whole example can be
> > +rewritten in terms of dependency,
> > +
> > +Do an operation making 'event D cannot be issued if event A isn't issued'.
> > +Do an operation making 'event A cannot be issued if event B isn't issued'.
> > +Do an operation making 'event B cannot be issued if event C isn't issued'.
> > +Do an operation making 'event C cannot be issued if event D isn't issued'.
> > +
> > +or,
> 
> I think we can remove the text above.  The example only needs to be
> provided once.

I tried not to miss any subtle desciption AFAP. I thought and decided that
I need to explain what a dependecy is, without any hole in logic.

> 
> > +
> > +Do an operation making 'issuing event D depends on issuing event A'.
> > +Do an operation making 'issuing event A depends on issuing event B'.
> > +Do an operation making 'issuing event B depends on issuing event C'.
> > +Do an operation making 'issuing event C depends on issuing event D'.
> > +
> > +What causes a deadlock is a set of dependencies a chain of which forms a
> > +cycle, which means that issuing event D depending on issuing event A
> > +depending on issuing event B depending on issuing event C depending on
> > +issuing event D, finally depends on issuing event D itself, which means
> > +no event can be issued.
> > +
> > +Any set of operations creating dependencies causes a deadlock. The set
> > +of lock operations e.g. acquire and release is an example. Waiting for a
> > +lock to be released corresponds to waiting for an event and releasing a
> > +lock corresponds to issuing an event. So the description of dependency
> > +above can be altered to one in terms of lock.
> > +
> > +In terms of event, issuing event A depends on issuing event B if,
> > +
> > +   Event A cannot be issued if event B isn't issued.
> > +
> > +In terms of lock, releasing lock A depends on releasing lock B if,
> > +
> > +   Lock A cannot be released if lock B isn't released.
> > +
> > +CONCLUSION
> > +
> > +A set of dependencies a chain of which forms a cycle, causes a deadlock,
> 
> I think 'a chain of' is not required in the sentence above.

Do you think so? Actually a chain forms a cycle. I thought dependencies
are not stuffs making a cycle.

> 
> > +no matter what creates the dependencies.
> > +
> > +
> > +What lockdep detects
> > +
> > +
> > +A deadlock actually occurs only when all operations creating problematic
> 
> Instead of 'problematic', I would use 'cyclic'.

I'd like to highlight _problematic_. Is it 

[PATCH 3/3] perf report: Fix output of 'pid' sort key

2016-09-19 Thread Namhyung Kim
The thread->comm can be changed during the lifetime due to prctl() or
exec().  For this reason each hist entry has a pointer to a comm at the
time as well as a pointer to the thread.  So it should use the he->comm
instead of thread__comm(he->thread) which always returns the latest
comm.  This can be seen using following example:

  $ perf report --hierarchy -s comm,pid

Before:
3.86%   2.53% sh
   1.05%   0.45% 776:sh
   0.67%   0.49%8190:sh
   0.65%   0.37%8194:sh
   0.28%   0.21%8192:awk
   0.24%   0.22%8191:acpi
   0.24%   0.21%8196:awk
   0.22%   0.15%8193:tr
   0.21%   0.14%8195:netctl-auto
   0.10%   0.30%8319:date
   0.10%   0.00%8320:xsetroot
   0.09%   0.00%8321:sleep

After:
3.86%   2.53% sh
   1.05%   0.45% 776:sh
   0.67%   0.49%8190:sh
   0.65%   0.37%8194:sh
   0.28%   0.21%8192:sh
   0.24%   0.22%8191:sh
   0.24%   0.21%8196:sh
   0.22%   0.15%8193:sh
   0.21%   0.14%8195:sh
   0.10%   0.30%8319:sh
   0.10%   0.00%8320:sh
   0.09%   0.00%8321:sh

Cc: Frederic Weisbecker 
Fixes: 4dfced359fbc ("perf tools: Get current comm instead of last one")
Signed-off-by: Namhyung Kim 
---
 tools/perf/util/sort.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 1884d7f9b9d2..549dfbdbe9c3 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -77,7 +77,7 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry 
*right)
 static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
   size_t size, unsigned int width)
 {
-   const char *comm = thread__comm_str(he->thread);
+   const char *comm = comm__str(he->comm);
 
width = max(7U, width) - 8;
return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid,
-- 
2.9.3



[PATCH 1/3] perf ui/tui: Reset output width for hierarchy

2016-09-19 Thread Namhyung Kim
When --hierarchy option is used, each entry has its own hpp_list to show
the result.  But it missed to update width of each column.

Before:

  - 46.29% 48.12%netctl-auto
 + 31.44% 29.25%[kernel.vmlinux]
 + 8.52% 11.55%libc-2.22.so
 + 5.19% 6.91%bash
  + 10.75% 11.83%wpa_cli
  + 8.25% 2.23%swapper
  + 6.45% 5.40%tr
  + 4.81% 8.09%awk
  + 4.15% 2.85%firefox
  + 3.86% 2.53%sh

After:

  -  46.29%  48.12%netctl-auto
  +  31.44%  29.25%[kernel.vmlinux]
  +   8.52%  11.55%libc-2.22.so
  +   5.19%   6.91%bash
  +  10.75%  11.83%wpa_cli
  +   8.25%   2.23%swapper
  +   6.45%   5.40%tr
  +   4.81%   8.09%awk
  +   4.15%   2.85%firefox
  +   3.86%   2.53%sh

Fixes: 1b2dbbf41a0f ("perf hists: Use own hpp_list for hierarchy mode")
Signed-off-by: Namhyung Kim 
---
 tools/perf/ui/browsers/hists.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 35e44b1879e3..49db16334814 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2067,6 +2067,7 @@ void hist_browser__init(struct hist_browser *browser,
struct hists *hists)
 {
struct perf_hpp_fmt *fmt;
+   struct perf_hpp_list_node *node;
 
browser->hists  = hists;
browser->b.refresh  = hist_browser__refresh;
@@ -2079,6 +2080,11 @@ void hist_browser__init(struct hist_browser *browser,
perf_hpp__reset_width(fmt, hists);
++browser->b.columns;
}
+   /* hierarchy entries have their own hpp list */
+   list_for_each_entry(node, >hpp_formats, list) {
+   perf_hpp_list__for_each_format(>hpp, fmt)
+   perf_hpp__reset_width(fmt, hists);
+   }
 }
 
 struct hist_browser *hist_browser__new(struct hists *hists)
-- 
2.9.3



[PATCH 2/3] perf hists: Factor out hists__reset_column_width()

2016-09-19 Thread Namhyung Kim
The stdio and tui has same code to reset hpp format column width.
Factor it out as a new function.

Suggested-by: Jiri Olsa 
Signed-off-by: Namhyung Kim 
---
 tools/perf/ui/browsers/hists.c | 12 +++-
 tools/perf/ui/hist.c   | 15 +++
 tools/perf/ui/stdio/hist.c | 10 +-
 tools/perf/util/hist.h |  1 +
 4 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 49db16334814..a6d5d248b8fb 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2067,7 +2067,6 @@ void hist_browser__init(struct hist_browser *browser,
struct hists *hists)
 {
struct perf_hpp_fmt *fmt;
-   struct perf_hpp_list_node *node;
 
browser->hists  = hists;
browser->b.refresh  = hist_browser__refresh;
@@ -2076,15 +2075,10 @@ void hist_browser__init(struct hist_browser *browser,
browser->b.use_navkeypressed= true;
browser->show_headers   = symbol_conf.show_hist_headers;
 
-   hists__for_each_format(hists, fmt) {
-   perf_hpp__reset_width(fmt, hists);
+   hists__for_each_format(hists, fmt)
++browser->b.columns;
-   }
-   /* hierarchy entries have their own hpp list */
-   list_for_each_entry(node, >hpp_formats, list) {
-   perf_hpp_list__for_each_format(>hpp, fmt)
-   perf_hpp__reset_width(fmt, hists);
-   }
+
+   hists__reset_column_width(hists);
 }
 
 struct hist_browser *hist_browser__new(struct hists *hists)
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index b47fafc8ee2a..60c4a4d08374 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -699,6 +699,21 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, 
struct hists *hists)
}
 }
 
+void hists__reset_column_width(struct hists *hists)
+{
+   struct perf_hpp_fmt *fmt;
+   struct perf_hpp_list_node *node;
+
+   hists__for_each_format(hists, fmt)
+   perf_hpp__reset_width(fmt, hists);
+
+   /* hierarchy entries have their own hpp list */
+   list_for_each_entry(node, >hpp_formats, list) {
+   perf_hpp_list__for_each_format(>hpp, fmt)
+   perf_hpp__reset_width(fmt, hists);
+   }
+}
+
 void perf_hpp__set_user_width(const char *width_list_str)
 {
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index a57131e61fe3..8e1840bff29d 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -717,8 +717,6 @@ size_t hists__fprintf(struct hists *hists, bool 
show_header, int max_rows,
  int max_cols, float min_pcnt, FILE *fp,
  bool use_callchain)
 {
-   struct perf_hpp_fmt *fmt;
-   struct perf_hpp_list_node *node;
struct rb_node *nd;
size_t ret = 0;
const char *sep = symbol_conf.field_sep;
@@ -729,13 +727,7 @@ size_t hists__fprintf(struct hists *hists, bool 
show_header, int max_rows,
 
init_rem_hits();
 
-   hists__for_each_format(hists, fmt)
-   perf_hpp__reset_width(fmt, hists);
-   /* hierarchy entries have their own hpp list */
-   list_for_each_entry(node, >hpp_formats, list) {
-   perf_hpp_list__for_each_format(>hpp, fmt)
-   perf_hpp__reset_width(fmt, hists);
-   }
+   hists__reset_column_width(hists);
 
if (symbol_conf.col_width_list_str)
perf_hpp__set_user_width(symbol_conf.col_width_list_str);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index a002c93fe422..defa957f27df 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -368,6 +368,7 @@ static inline bool perf_hpp__should_skip(struct 
perf_hpp_fmt *format,
 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 void perf_hpp__set_user_width(const char *width_list_str);
+void hists__reset_column_width(struct hists *hists);
 
 typedef u64 (*hpp_field_fn)(struct hist_entry *he);
 typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
-- 
2.9.3



[PATCH 3/3] perf report: Fix output of 'pid' sort key

2016-09-19 Thread Namhyung Kim
The thread->comm can be changed during the lifetime due to prctl() or
exec().  For this reason each hist entry has a pointer to a comm at the
time as well as a pointer to the thread.  So it should use the he->comm
instead of thread__comm(he->thread) which always returns the latest
comm.  This can be seen using following example:

  $ perf report --hierarchy -s comm,pid

Before:
3.86%   2.53% sh
   1.05%   0.45% 776:sh
   0.67%   0.49%8190:sh
   0.65%   0.37%8194:sh
   0.28%   0.21%8192:awk
   0.24%   0.22%8191:acpi
   0.24%   0.21%8196:awk
   0.22%   0.15%8193:tr
   0.21%   0.14%8195:netctl-auto
   0.10%   0.30%8319:date
   0.10%   0.00%8320:xsetroot
   0.09%   0.00%8321:sleep

After:
3.86%   2.53% sh
   1.05%   0.45% 776:sh
   0.67%   0.49%8190:sh
   0.65%   0.37%8194:sh
   0.28%   0.21%8192:sh
   0.24%   0.22%8191:sh
   0.24%   0.21%8196:sh
   0.22%   0.15%8193:sh
   0.21%   0.14%8195:sh
   0.10%   0.30%8319:sh
   0.10%   0.00%8320:sh
   0.09%   0.00%8321:sh

Cc: Frederic Weisbecker 
Fixes: 4dfced359fbc ("perf tools: Get current comm instead of last one")
Signed-off-by: Namhyung Kim 
---
 tools/perf/util/sort.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 1884d7f9b9d2..549dfbdbe9c3 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -77,7 +77,7 @@ sort__thread_cmp(struct hist_entry *left, struct hist_entry 
*right)
 static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
   size_t size, unsigned int width)
 {
-   const char *comm = thread__comm_str(he->thread);
+   const char *comm = comm__str(he->comm);
 
width = max(7U, width) - 8;
return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid,
-- 
2.9.3



[PATCH 1/3] perf ui/tui: Reset output width for hierarchy

2016-09-19 Thread Namhyung Kim
When --hierarchy option is used, each entry has its own hpp_list to show
the result.  But it missed to update width of each column.

Before:

  - 46.29% 48.12%netctl-auto
 + 31.44% 29.25%[kernel.vmlinux]
 + 8.52% 11.55%libc-2.22.so
 + 5.19% 6.91%bash
  + 10.75% 11.83%wpa_cli
  + 8.25% 2.23%swapper
  + 6.45% 5.40%tr
  + 4.81% 8.09%awk
  + 4.15% 2.85%firefox
  + 3.86% 2.53%sh

After:

  -  46.29%  48.12%netctl-auto
  +  31.44%  29.25%[kernel.vmlinux]
  +   8.52%  11.55%libc-2.22.so
  +   5.19%   6.91%bash
  +  10.75%  11.83%wpa_cli
  +   8.25%   2.23%swapper
  +   6.45%   5.40%tr
  +   4.81%   8.09%awk
  +   4.15%   2.85%firefox
  +   3.86%   2.53%sh

Fixes: 1b2dbbf41a0f ("perf hists: Use own hpp_list for hierarchy mode")
Signed-off-by: Namhyung Kim 
---
 tools/perf/ui/browsers/hists.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 35e44b1879e3..49db16334814 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2067,6 +2067,7 @@ void hist_browser__init(struct hist_browser *browser,
struct hists *hists)
 {
struct perf_hpp_fmt *fmt;
+   struct perf_hpp_list_node *node;
 
browser->hists  = hists;
browser->b.refresh  = hist_browser__refresh;
@@ -2079,6 +2080,11 @@ void hist_browser__init(struct hist_browser *browser,
perf_hpp__reset_width(fmt, hists);
++browser->b.columns;
}
+   /* hierarchy entries have their own hpp list */
+   list_for_each_entry(node, >hpp_formats, list) {
+   perf_hpp_list__for_each_format(>hpp, fmt)
+   perf_hpp__reset_width(fmt, hists);
+   }
 }
 
 struct hist_browser *hist_browser__new(struct hists *hists)
-- 
2.9.3



[PATCH 2/3] perf hists: Factor out hists__reset_column_width()

2016-09-19 Thread Namhyung Kim
The stdio and tui has same code to reset hpp format column width.
Factor it out as a new function.

Suggested-by: Jiri Olsa 
Signed-off-by: Namhyung Kim 
---
 tools/perf/ui/browsers/hists.c | 12 +++-
 tools/perf/ui/hist.c   | 15 +++
 tools/perf/ui/stdio/hist.c | 10 +-
 tools/perf/util/hist.h |  1 +
 4 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 49db16334814..a6d5d248b8fb 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2067,7 +2067,6 @@ void hist_browser__init(struct hist_browser *browser,
struct hists *hists)
 {
struct perf_hpp_fmt *fmt;
-   struct perf_hpp_list_node *node;
 
browser->hists  = hists;
browser->b.refresh  = hist_browser__refresh;
@@ -2076,15 +2075,10 @@ void hist_browser__init(struct hist_browser *browser,
browser->b.use_navkeypressed= true;
browser->show_headers   = symbol_conf.show_hist_headers;
 
-   hists__for_each_format(hists, fmt) {
-   perf_hpp__reset_width(fmt, hists);
+   hists__for_each_format(hists, fmt)
++browser->b.columns;
-   }
-   /* hierarchy entries have their own hpp list */
-   list_for_each_entry(node, >hpp_formats, list) {
-   perf_hpp_list__for_each_format(>hpp, fmt)
-   perf_hpp__reset_width(fmt, hists);
-   }
+
+   hists__reset_column_width(hists);
 }
 
 struct hist_browser *hist_browser__new(struct hists *hists)
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index b47fafc8ee2a..60c4a4d08374 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -699,6 +699,21 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, 
struct hists *hists)
}
 }
 
+void hists__reset_column_width(struct hists *hists)
+{
+   struct perf_hpp_fmt *fmt;
+   struct perf_hpp_list_node *node;
+
+   hists__for_each_format(hists, fmt)
+   perf_hpp__reset_width(fmt, hists);
+
+   /* hierarchy entries have their own hpp list */
+   list_for_each_entry(node, >hpp_formats, list) {
+   perf_hpp_list__for_each_format(>hpp, fmt)
+   perf_hpp__reset_width(fmt, hists);
+   }
+}
+
 void perf_hpp__set_user_width(const char *width_list_str)
 {
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index a57131e61fe3..8e1840bff29d 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -717,8 +717,6 @@ size_t hists__fprintf(struct hists *hists, bool 
show_header, int max_rows,
  int max_cols, float min_pcnt, FILE *fp,
  bool use_callchain)
 {
-   struct perf_hpp_fmt *fmt;
-   struct perf_hpp_list_node *node;
struct rb_node *nd;
size_t ret = 0;
const char *sep = symbol_conf.field_sep;
@@ -729,13 +727,7 @@ size_t hists__fprintf(struct hists *hists, bool 
show_header, int max_rows,
 
init_rem_hits();
 
-   hists__for_each_format(hists, fmt)
-   perf_hpp__reset_width(fmt, hists);
-   /* hierarchy entries have their own hpp list */
-   list_for_each_entry(node, >hpp_formats, list) {
-   perf_hpp_list__for_each_format(>hpp, fmt)
-   perf_hpp__reset_width(fmt, hists);
-   }
+   hists__reset_column_width(hists);
 
if (symbol_conf.col_width_list_str)
perf_hpp__set_user_width(symbol_conf.col_width_list_str);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index a002c93fe422..defa957f27df 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -368,6 +368,7 @@ static inline bool perf_hpp__should_skip(struct 
perf_hpp_fmt *format,
 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
 void perf_hpp__set_user_width(const char *width_list_str);
+void hists__reset_column_width(struct hists *hists);
 
 typedef u64 (*hpp_field_fn)(struct hist_entry *he);
 typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
-- 
2.9.3



Re: [PATCH -v3 00/10] THP swap: Delay splitting THP during swapping out

2016-09-19 Thread Huang, Ying
Minchan Kim  writes:

> Hi Huang,
>
> On Tue, Sep 20, 2016 at 10:54:35AM +0800, Huang, Ying wrote:
>> Hi, Minchan,
>> 
>> Minchan Kim  writes:
>> > Hi Huang,
>> >
>> > On Sun, Sep 18, 2016 at 09:53:39AM +0800, Huang, Ying wrote:
>> >> Minchan Kim  writes:
>> >> 
>> >> > On Tue, Sep 13, 2016 at 04:53:49PM +0800, Huang, Ying wrote:
>> >> >> Minchan Kim  writes:
>> >> >> > On Tue, Sep 13, 2016 at 02:40:00PM +0800, Huang, Ying wrote:
>> >> >> >> Minchan Kim  writes:
>> >> >> >> 
>> >> >> >> > Hi Huang,
>> >> >> >> >
>> >> >> >> > On Fri, Sep 09, 2016 at 01:35:12PM -0700, Huang, Ying wrote:
>> >> >> >> >
>> 
>> [snip]
>> 
>> >> >> > 1. If we solve batching swapout, then how is THP split for swapout 
>> >> >> > bad?
>> >> >> > 2. Also, how is current conservatie swapin from khugepaged bad?
>> >> >> >
>> >> >> > I think it's one of decision point for the motivation of your work
>> >> >> > and for 1, we need batching swapout feature.
>> >> >> >
>> >> >> > I am saying again that I'm not against your goal but only concern
>> >> >> > is approach. If you don't agree, please ignore me.
>> >> >> 
>> >> >> I am glad to discuss my final goal, that is, swapping out/in the full
>> >> >> THP without splitting.  Why I want to do that is copied as below,
>> >> >
>> >> > Yes, it's your *final* goal but what if it couldn't be acceptable
>> >> > on second step you mentioned above, for example?
>> >> >
>> >> > Unncessary binded implementation to rejected work.
>> >> 
>> >> So I want to discuss my final goal.  If people accept my final goal,
>> >> this is resolved.  If people don't accept, I will reconsider it.
>> >
>> > No.
>> >
>> > Please keep it in mind. There are lots of factors the project would
>> > be broken during going on by several reasons because we are human being
>> > so we can simply miss something clear and realize it later that it's
>> > not feasible. Otherwise, others can show up with better idea for the
>> > goal or fix other subsystem which can affect your goals.
>> > I don't want to say such boring theoretical stuffs any more.
>> >
>> > My point is patchset should be self-contained if you really want to go
>> > with step-by-step approach because we are likely to miss something
>> > *easily*.
>> >
>> >> 
>> >> > If you want to achieve your goal step by step, please consider if
>> >> > one of step you are thinking could be rejected but steps already
>> >> > merged should be self-contained without side-effect.
>> >> 
>> >> What is the side-effect or possible regressions of the step 1 as in this
>> >
>> > Adding code complexity for unproved feature.
>> >
>> > When I read your steps, your *most important* goal is to avoid split/
>> > collapsing anon THP page for swap out/in. As a bonus with the approach,
>> > we could increase swapout/in bandwidth, too. Do I understand correctly?
>> 
>> It's hard to say what is the *most important* goal.  But it is clear
>> that to improve swapout/in performance isn't the only goal.  The other
>> goal to avoid split/collapsing THP page for swap out/in is very
>> important too.
>
> Okay, then, couldn't you focus a goal in patchset? After solving a problem,
> then next one. What's the problem?
> One of your goal is swapout performance and it's same with Tim's work.
> That's why I wanted to make your patchset based on Tim's work. But if you
> want your patch first, please make patchset independent with your other goal
> so everyone can review easily and focus on *a* problem.
> In your patchset, THP split delaying part could be folded into in your second
> patchset which is to avoid THP split/collapsing.

I thought multiple goals for one patchset is common.  But if you want
just one goal for review, I suggest you to review the patchset for the
goal to avoid split/collapsing anon THP page for swap out/in.  And this
patchset is just the first step for that.

>> > However, swap-in/out bandwidth enhance is common requirement for both
>> > normal and THP page and with Tim's work, we could enhance swapout path.
>> >
>> > So, I think you should give us to number about how THP split is bad
>> > for the swapout bandwidth even though we applied Tim's work.
>> > If it's serious, next approach is yours that we could tweak swap code
>> > be aware of a THP to avoid splitting a THP.
>> 
>> It's not only about CPU cycles spent in splitting and collapsing THP,
>> but also how to make THP work effectively on systems with swap turned
>> on.
>> 
>> To avoid disturbing user applications etc., THP collapsing doesn't work
>> aggressively to collapse anonymous pages into THP.  This means, once the
>> THP is split, it will take quite long time (wall time, instead of CPU
>> cycles) to be collapsed to become a THP, especially on machines with
>> large memory size.  And on systems with swap turned on, THP will be
>> split during swap out/in now.  If much swapping out/in is triggered
>> during system 

Re: [PATCH -v3 00/10] THP swap: Delay splitting THP during swapping out

2016-09-19 Thread Huang, Ying
Minchan Kim  writes:

> Hi Huang,
>
> On Tue, Sep 20, 2016 at 10:54:35AM +0800, Huang, Ying wrote:
>> Hi, Minchan,
>> 
>> Minchan Kim  writes:
>> > Hi Huang,
>> >
>> > On Sun, Sep 18, 2016 at 09:53:39AM +0800, Huang, Ying wrote:
>> >> Minchan Kim  writes:
>> >> 
>> >> > On Tue, Sep 13, 2016 at 04:53:49PM +0800, Huang, Ying wrote:
>> >> >> Minchan Kim  writes:
>> >> >> > On Tue, Sep 13, 2016 at 02:40:00PM +0800, Huang, Ying wrote:
>> >> >> >> Minchan Kim  writes:
>> >> >> >> 
>> >> >> >> > Hi Huang,
>> >> >> >> >
>> >> >> >> > On Fri, Sep 09, 2016 at 01:35:12PM -0700, Huang, Ying wrote:
>> >> >> >> >
>> 
>> [snip]
>> 
>> >> >> > 1. If we solve batching swapout, then how is THP split for swapout 
>> >> >> > bad?
>> >> >> > 2. Also, how is current conservatie swapin from khugepaged bad?
>> >> >> >
>> >> >> > I think it's one of decision point for the motivation of your work
>> >> >> > and for 1, we need batching swapout feature.
>> >> >> >
>> >> >> > I am saying again that I'm not against your goal but only concern
>> >> >> > is approach. If you don't agree, please ignore me.
>> >> >> 
>> >> >> I am glad to discuss my final goal, that is, swapping out/in the full
>> >> >> THP without splitting.  Why I want to do that is copied as below,
>> >> >
>> >> > Yes, it's your *final* goal but what if it couldn't be acceptable
>> >> > on second step you mentioned above, for example?
>> >> >
>> >> > Unncessary binded implementation to rejected work.
>> >> 
>> >> So I want to discuss my final goal.  If people accept my final goal,
>> >> this is resolved.  If people don't accept, I will reconsider it.
>> >
>> > No.
>> >
>> > Please keep it in mind. There are lots of factors the project would
>> > be broken during going on by several reasons because we are human being
>> > so we can simply miss something clear and realize it later that it's
>> > not feasible. Otherwise, others can show up with better idea for the
>> > goal or fix other subsystem which can affect your goals.
>> > I don't want to say such boring theoretical stuffs any more.
>> >
>> > My point is patchset should be self-contained if you really want to go
>> > with step-by-step approach because we are likely to miss something
>> > *easily*.
>> >
>> >> 
>> >> > If you want to achieve your goal step by step, please consider if
>> >> > one of step you are thinking could be rejected but steps already
>> >> > merged should be self-contained without side-effect.
>> >> 
>> >> What is the side-effect or possible regressions of the step 1 as in this
>> >
>> > Adding code complexity for unproved feature.
>> >
>> > When I read your steps, your *most important* goal is to avoid split/
>> > collapsing anon THP page for swap out/in. As a bonus with the approach,
>> > we could increase swapout/in bandwidth, too. Do I understand correctly?
>> 
>> It's hard to say what is the *most important* goal.  But it is clear
>> that to improve swapout/in performance isn't the only goal.  The other
>> goal to avoid split/collapsing THP page for swap out/in is very
>> important too.
>
> Okay, then, couldn't you focus a goal in patchset? After solving a problem,
> then next one. What's the problem?
> One of your goal is swapout performance and it's same with Tim's work.
> That's why I wanted to make your patchset based on Tim's work. But if you
> want your patch first, please make patchset independent with your other goal
> so everyone can review easily and focus on *a* problem.
> In your patchset, THP split delaying part could be folded into in your second
> patchset which is to avoid THP split/collapsing.

I thought multiple goals for one patchset is common.  But if you want
just one goal for review, I suggest you to review the patchset for the
goal to avoid split/collapsing anon THP page for swap out/in.  And this
patchset is just the first step for that.

>> > However, swap-in/out bandwidth enhance is common requirement for both
>> > normal and THP page and with Tim's work, we could enhance swapout path.
>> >
>> > So, I think you should give us to number about how THP split is bad
>> > for the swapout bandwidth even though we applied Tim's work.
>> > If it's serious, next approach is yours that we could tweak swap code
>> > be aware of a THP to avoid splitting a THP.
>> 
>> It's not only about CPU cycles spent in splitting and collapsing THP,
>> but also how to make THP work effectively on systems with swap turned
>> on.
>> 
>> To avoid disturbing user applications etc., THP collapsing doesn't work
>> aggressively to collapse anonymous pages into THP.  This means, once the
>> THP is split, it will take quite long time (wall time, instead of CPU
>> cycles) to be collapsed to become a THP, especially on machines with
>> large memory size.  And on systems with swap turned on, THP will be
>> split during swap out/in now.  If much swapping out/in is triggered
>> during system running, it is possible that many THP is split, and have
>> no chance to be collapsed.  Even if the THP 

Re: [PATCH] x86/efi: Add necessary checks before iterating over efi.memmap

2016-09-19 Thread Chao Gao
Sorry for bothering you. There is a regression since commit 78ce248f that if
booting xen in UEFI mode, dom0 will crash and xen reboot constantly.
This patch tries to fix it. Please take a look at it.

On Tue, Sep 13, 2016 at 11:28:15AM +0800, Chao Gao wrote:
>Commit 78ce248f (efi: Iterate over efi.memmap in for_each_efi_memory_desc())
>replaces the old loop by for_each_efi_memory_desc() which will encounter #PF
>when efi.memap are not initialized.
>
>In boot process, xen set EFI_PARAVIRT in xen_efi_init() before setup_arch()
>is called. This leads efi_memmap_init() will not initialize structures
>related to efi.memmap. However, the following functions e.g.
>efi_find_mirror(), efi_print_memmap() and efi_free_boot_services() access
>efi.memmap without necessary checks. kernel and xen crash in this case.
>After adding these checks, xen and kernel boot up normally.
>
>Signed-off-by: Chao Gao 
>---
> arch/x86/platform/efi/efi.c| 6 ++
> arch/x86/platform/efi/quirks.c | 3 +++
> 2 files changed, 9 insertions(+)
>
>diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
>index 1fbb408..68966dc 100644
>--- a/arch/x86/platform/efi/efi.c
>+++ b/arch/x86/platform/efi/efi.c
>@@ -102,6 +102,9 @@ void __init efi_find_mirror(void)
>   efi_memory_desc_t *md;
>   u64 mirror_size = 0, total_size = 0;
> 
>+  if (efi_enabled(EFI_PARAVIRT))
>+  return;
>+
>   for_each_efi_memory_desc(md) {
>   unsigned long long start = md->phys_addr;
>   unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
>@@ -207,6 +210,9 @@ void __init efi_print_memmap(void)
>   efi_memory_desc_t *md;
>   int i = 0;
> 
>+  if (efi_enabled(EFI_PARAVIRT))
>+  return;
>+
>   for_each_efi_memory_desc(md) {
>   char buf[64];
> 
>diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
>index 89d1146..4fa1e4d 100644
>--- a/arch/x86/platform/efi/quirks.c
>+++ b/arch/x86/platform/efi/quirks.c
>@@ -251,6 +251,9 @@ void __init efi_free_boot_services(void)
> {
>   efi_memory_desc_t *md;
> 
>+  if (efi_enabled(EFI_PARAVIRT))
>+  return;
>+
>   for_each_efi_memory_desc(md) {
>   unsigned long long start = md->phys_addr;
>   unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
>-- 
>1.8.3.1
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-efi" in
>the body of a message to majord...@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] x86/efi: Add necessary checks before iterating over efi.memmap

2016-09-19 Thread Chao Gao
Sorry for bothering you. There is a regression since commit 78ce248f that if
booting xen in UEFI mode, dom0 will crash and xen reboot constantly.
This patch tries to fix it. Please take a look at it.

On Tue, Sep 13, 2016 at 11:28:15AM +0800, Chao Gao wrote:
>Commit 78ce248f (efi: Iterate over efi.memmap in for_each_efi_memory_desc())
>replaces the old loop by for_each_efi_memory_desc() which will encounter #PF
>when efi.memap are not initialized.
>
>In boot process, xen set EFI_PARAVIRT in xen_efi_init() before setup_arch()
>is called. This leads efi_memmap_init() will not initialize structures
>related to efi.memmap. However, the following functions e.g.
>efi_find_mirror(), efi_print_memmap() and efi_free_boot_services() access
>efi.memmap without necessary checks. kernel and xen crash in this case.
>After adding these checks, xen and kernel boot up normally.
>
>Signed-off-by: Chao Gao 
>---
> arch/x86/platform/efi/efi.c| 6 ++
> arch/x86/platform/efi/quirks.c | 3 +++
> 2 files changed, 9 insertions(+)
>
>diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
>index 1fbb408..68966dc 100644
>--- a/arch/x86/platform/efi/efi.c
>+++ b/arch/x86/platform/efi/efi.c
>@@ -102,6 +102,9 @@ void __init efi_find_mirror(void)
>   efi_memory_desc_t *md;
>   u64 mirror_size = 0, total_size = 0;
> 
>+  if (efi_enabled(EFI_PARAVIRT))
>+  return;
>+
>   for_each_efi_memory_desc(md) {
>   unsigned long long start = md->phys_addr;
>   unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
>@@ -207,6 +210,9 @@ void __init efi_print_memmap(void)
>   efi_memory_desc_t *md;
>   int i = 0;
> 
>+  if (efi_enabled(EFI_PARAVIRT))
>+  return;
>+
>   for_each_efi_memory_desc(md) {
>   char buf[64];
> 
>diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
>index 89d1146..4fa1e4d 100644
>--- a/arch/x86/platform/efi/quirks.c
>+++ b/arch/x86/platform/efi/quirks.c
>@@ -251,6 +251,9 @@ void __init efi_free_boot_services(void)
> {
>   efi_memory_desc_t *md;
> 
>+  if (efi_enabled(EFI_PARAVIRT))
>+  return;
>+
>   for_each_efi_memory_desc(md) {
>   unsigned long long start = md->phys_addr;
>   unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
>-- 
>1.8.3.1
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-efi" in
>the body of a message to majord...@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 2/3] powerpc: get hugetlbpage handling more generic

2016-09-19 Thread Christophe Leroy



Le 20/09/2016 à 04:28, Aneesh Kumar K.V a écrit :

christophe leroy  writes:


Le 19/09/2016 à 07:50, Aneesh Kumar K.V a écrit :


Christophe Leroy  writes:

+#else
+static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
+{
+   BUG();
+}
+
 #endif



I was expecting that BUG will get removed in the next patch. But I don't
see it in the next patch. Considering

@@ -475,11 +453,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, 
hugepd_t *hpdp, int pdshif
for (i = 0; i < num_hugepd; i++, hpdp++)
hpdp->pd = 0;

-#ifdef CONFIG_PPC_FSL_BOOK3E
-   hugepd_free(tlb, hugepte);
-#else
-   pgtable_free_tlb(tlb, hugepte, pdshift - shift);
-#endif
+   if (shift >= pdshift)
+   hugepd_free(tlb, hugepte);
+   else
+   pgtable_free_tlb(tlb, hugepte, pdshift - shift);
 }

What is that I am missing ?



Previously, call to hugepd_free() was compiled only when #ifdef
CONFIG_PPC_FSL_BOOK3E
Now, it is compiled at all time, but it should never be called if not
CONFIG_PPC_FSL_BOOK3E because we always have shift < pdshift in that case.
Then the function needs to be defined anyway but should never be called.
Should I just define it static inline {} ?



For 8M with 4K mode, we have shift >= pdshift right ?



Yes, thats the reason why in the following patch we get. That way we get 
a real hugepd_free() also for the 8xx.


@@ -366,7 +373,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
 }
 #endif

-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
 #define HUGEPD_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))



Christophe


Re: [PATCH v2 2/3] powerpc: get hugetlbpage handling more generic

2016-09-19 Thread Christophe Leroy



Le 20/09/2016 à 04:28, Aneesh Kumar K.V a écrit :

christophe leroy  writes:


Le 19/09/2016 à 07:50, Aneesh Kumar K.V a écrit :


Christophe Leroy  writes:

+#else
+static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
+{
+   BUG();
+}
+
 #endif



I was expecting that BUG will get removed in the next patch. But I don't
see it in the next patch. Considering

@@ -475,11 +453,10 @@ static void free_hugepd_range(struct mmu_gather *tlb, 
hugepd_t *hpdp, int pdshif
for (i = 0; i < num_hugepd; i++, hpdp++)
hpdp->pd = 0;

-#ifdef CONFIG_PPC_FSL_BOOK3E
-   hugepd_free(tlb, hugepte);
-#else
-   pgtable_free_tlb(tlb, hugepte, pdshift - shift);
-#endif
+   if (shift >= pdshift)
+   hugepd_free(tlb, hugepte);
+   else
+   pgtable_free_tlb(tlb, hugepte, pdshift - shift);
 }

What is that I am missing ?



Previously, call to hugepd_free() was compiled only when #ifdef
CONFIG_PPC_FSL_BOOK3E
Now, it is compiled at all time, but it should never be called if not
CONFIG_PPC_FSL_BOOK3E because we always have shift < pdshift in that case.
Then the function needs to be defined anyway but should never be called.
Should I just define it static inline {} ?



For 8M with 4K mode, we have shift >= pdshift right ?



Yes, thats the reason why in the following patch we get. That way we get 
a real hugepd_free() also for the 8xx.


@@ -366,7 +373,7 @@ int alloc_bootmem_huge_page(struct hstate *hstate)
 }
 #endif

-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
 #define HUGEPD_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))



Christophe


Re: [PATCH v2 4/6] net: ethernet: bgmac: convert to feature flags

2016-09-19 Thread Rafał Miłecki
On 17 August 2016 at 13:34, Rafał Miłecki  wrote:
> On 8 July 2016 at 01:08, Jon Mason  wrote:
>> mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
>> BGMAC_DS_MM_SHIFT;
>> -   if (ci->id != BCMA_CHIP_ID_BCM47162 || mode != 0)
>> +   if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0)
>> bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT);
>> -   if (ci->id == BCMA_CHIP_ID_BCM47162 && mode == 2)
>> +   if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2)
>> bcma_chipco_chipctl_maskset(>core->bus->drv_cc, 1, ~0,
>> BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
>
> Jon, it looks to me you translated two following conditions:
> ci->id != BCMA_CHIP_ID_BCM47162
> and
> ci->id == BCMA_CHIP_ID_BCM47162
> into the same flag check:
> bgmac->feature_flags & BGMAC_FEAT_CLKCTLST
>
> I don't think it's intentional, is it? Do you have a moment to fix this?

Ping

-- 
Rafał


Re: [PATCH v2 4/6] net: ethernet: bgmac: convert to feature flags

2016-09-19 Thread Rafał Miłecki
On 17 August 2016 at 13:34, Rafał Miłecki  wrote:
> On 8 July 2016 at 01:08, Jon Mason  wrote:
>> mode = (bgmac_read(bgmac, BGMAC_DEV_STATUS) & BGMAC_DS_MM_MASK) >>
>> BGMAC_DS_MM_SHIFT;
>> -   if (ci->id != BCMA_CHIP_ID_BCM47162 || mode != 0)
>> +   if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST || mode != 0)
>> bgmac_set(bgmac, BCMA_CLKCTLST, BCMA_CLKCTLST_FORCEHT);
>> -   if (ci->id == BCMA_CHIP_ID_BCM47162 && mode == 2)
>> +   if (bgmac->feature_flags & BGMAC_FEAT_CLKCTLST && mode == 2)
>> bcma_chipco_chipctl_maskset(>core->bus->drv_cc, 1, ~0,
>> BGMAC_CHIPCTL_1_RXC_DLL_BYPASS);
>
> Jon, it looks to me you translated two following conditions:
> ci->id != BCMA_CHIP_ID_BCM47162
> and
> ci->id == BCMA_CHIP_ID_BCM47162
> into the same flag check:
> bgmac->feature_flags & BGMAC_FEAT_CLKCTLST
>
> I don't think it's intentional, is it? Do you have a moment to fix this?

Ping

-- 
Rafał


linux-next: manual merge of the staging tree with the vfs tree

2016-09-19 Thread Stephen Rothwell
Hi Greg,

Today's linux-next merge of the staging tree got a conflict in:

  drivers/staging/lustre/lustre/llite/file.c

between commit:

  47b34458fc93 ("lustre: use %pD")

from the vfs tree and commit:

  bb5c7f2630de ("staging: lustre: changelog: fix comparison between signed and 
unsigned")

from the staging tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/staging/lustre/lustre/llite/file.c
index 23249b3e6cad,5d4d17f2de61..
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@@ -1141,8 -1123,8 +1123,8 @@@ ll_file_io_generic(const struct lu_env 
struct cl_io *io;
ssize_tresult;
  
-   CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zd\n",
 -  CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zu\n",
 - file->f_path.dentry->d_name.name, iot, *ppos, count);
++  CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n",
 + file, iot, *ppos, count);
  
  restart:
io = vvp_env_thread_io(env);


linux-next: manual merge of the staging tree with the vfs tree

2016-09-19 Thread Stephen Rothwell
Hi Greg,

Today's linux-next merge of the staging tree got a conflict in:

  drivers/staging/lustre/lustre/llite/file.c

between commit:

  47b34458fc93 ("lustre: use %pD")

from the vfs tree and commit:

  bb5c7f2630de ("staging: lustre: changelog: fix comparison between signed and 
unsigned")

from the staging tree.

I fixed it up (see below) and can carry the fix as necessary. This
is now fixed as far as linux-next is concerned, but any non trivial
conflicts should be mentioned to your upstream maintainer when your tree
is submitted for merging.  You may also want to consider cooperating
with the maintainer of the conflicting tree to minimise any particularly
complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc drivers/staging/lustre/lustre/llite/file.c
index 23249b3e6cad,5d4d17f2de61..
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@@ -1141,8 -1123,8 +1123,8 @@@ ll_file_io_generic(const struct lu_env 
struct cl_io *io;
ssize_tresult;
  
-   CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zd\n",
 -  CDEBUG(D_VFSTRACE, "file: %s, type: %d ppos: %llu, count: %zu\n",
 - file->f_path.dentry->d_name.name, iot, *ppos, count);
++  CDEBUG(D_VFSTRACE, "file: %pD, type: %d ppos: %llu, count: %zu\n",
 + file, iot, *ppos, count);
  
  restart:
io = vvp_env_thread_io(env);


Re: 答复: [PATCH] sunrpc: queue work on system_power_efficient_wq

2016-09-19 Thread Chunyan Zhang
Resend behalf on Ke Wang.

Thanks,
Chunyan

On 20 September 2016 at 10:33, Ke Wang (王科)  wrote:
> May I have any comments for this patch?
> or
> This patch can be merged directly into next release?
>
> Thanks,
> Ke
> 
> 发件人: Anna Schumaker 
> 发送时间: 2016年9月2日 2:46
> 收件人: Chunyan Zhang; trond.mykleb...@primarydata.com; 
> anna.schuma...@netapp.com; da...@davemloft.net
> 抄送: linux-...@vger.kernel.org; net...@vger.kernel.org; 
> linux-kernel@vger.kernel.org; Ke Wang (王科)
> 主题: Re: [PATCH] sunrpc: queue work on system_power_efficient_wq
>
> On 09/01/2016 03:30 AM, Chunyan Zhang wrote:
>> From: Ke Wang 
>>
>> sunrpc uses workqueue to clean cache regulary. There is no real dependency
>> of executing work on the cpu which queueing it.
>>
>> On a idle system, especially for a heterogeneous systems like big.LITTLE,
>> it is observed that the big idle cpu was woke up many times just to service
>> this work, which against the principle of power saving. It would be better
>> if we can schedule it on a cpu which the scheduler believes to be the most
>> appropriate one.
>>
>> After apply this patch, system_wq will be replaced by
>> system_power_efficient_wq for sunrpc. This functionality is enabled when
>> CONFIG_WQ_POWER_EFFICIENT is selected.
>
> Makes sense to me, but I'm a little surprised that there isn't a 
> "schedule_delayed_power_efficient_work()" function to match how the normal 
> workqueue is used.
>
> Thanks,
> Anna
>
>>
>> Signed-off-by: Ke Wang 
>> ---
>>  net/sunrpc/cache.c | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
>> index 4d8e11f..8aabe12 100644
>> --- a/net/sunrpc/cache.c
>> +++ b/net/sunrpc/cache.c
>> @@ -353,7 +353,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
>>   spin_unlock(_list_lock);
>>
>>   /* start the cleaning process */
>> - schedule_delayed_work(_cleaner, 0);
>> + queue_delayed_work(system_power_efficient_wq, _cleaner, 0);
>>  }
>>  EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail);
>>
>> @@ -476,7 +476,8 @@ static void do_cache_clean(struct work_struct *work)
>>   delay = 0;
>>
>>   if (delay)
>> - schedule_delayed_work(_cleaner, delay);
>> + queue_delayed_work(system_power_efficient_wq,
>> +_cleaner, delay);
>>  }
>>
>>
>>
>


Re: 答复: [PATCH] sunrpc: queue work on system_power_efficient_wq

2016-09-19 Thread Chunyan Zhang
Resend behalf on Ke Wang.

Thanks,
Chunyan

On 20 September 2016 at 10:33, Ke Wang (王科)  wrote:
> May I have any comments for this patch?
> or
> This patch can be merged directly into next release?
>
> Thanks,
> Ke
> 
> 发件人: Anna Schumaker 
> 发送时间: 2016年9月2日 2:46
> 收件人: Chunyan Zhang; trond.mykleb...@primarydata.com; 
> anna.schuma...@netapp.com; da...@davemloft.net
> 抄送: linux-...@vger.kernel.org; net...@vger.kernel.org; 
> linux-kernel@vger.kernel.org; Ke Wang (王科)
> 主题: Re: [PATCH] sunrpc: queue work on system_power_efficient_wq
>
> On 09/01/2016 03:30 AM, Chunyan Zhang wrote:
>> From: Ke Wang 
>>
>> sunrpc uses workqueue to clean cache regulary. There is no real dependency
>> of executing work on the cpu which queueing it.
>>
>> On a idle system, especially for a heterogeneous systems like big.LITTLE,
>> it is observed that the big idle cpu was woke up many times just to service
>> this work, which against the principle of power saving. It would be better
>> if we can schedule it on a cpu which the scheduler believes to be the most
>> appropriate one.
>>
>> After apply this patch, system_wq will be replaced by
>> system_power_efficient_wq for sunrpc. This functionality is enabled when
>> CONFIG_WQ_POWER_EFFICIENT is selected.
>
> Makes sense to me, but I'm a little surprised that there isn't a 
> "schedule_delayed_power_efficient_work()" function to match how the normal 
> workqueue is used.
>
> Thanks,
> Anna
>
>>
>> Signed-off-by: Ke Wang 
>> ---
>>  net/sunrpc/cache.c | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
>> index 4d8e11f..8aabe12 100644
>> --- a/net/sunrpc/cache.c
>> +++ b/net/sunrpc/cache.c
>> @@ -353,7 +353,7 @@ void sunrpc_init_cache_detail(struct cache_detail *cd)
>>   spin_unlock(_list_lock);
>>
>>   /* start the cleaning process */
>> - schedule_delayed_work(_cleaner, 0);
>> + queue_delayed_work(system_power_efficient_wq, _cleaner, 0);
>>  }
>>  EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail);
>>
>> @@ -476,7 +476,8 @@ static void do_cache_clean(struct work_struct *work)
>>   delay = 0;
>>
>>   if (delay)
>> - schedule_delayed_work(_cleaner, delay);
>> + queue_delayed_work(system_power_efficient_wq,
>> +_cleaner, delay);
>>  }
>>
>>
>>
>


[PATCH] ARM: cache-uniphier: rename jump label to follow coding style guideline

2016-09-19 Thread Masahiro Yamada
Documentation/CodingStyle recommends to use label names which say
what the goto does or why the goto exists.

Signed-off-by: Masahiro Yamada 
---

 arch/arm/mm/cache-uniphier.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c
index eac516a..9a2a249 100644
--- a/arch/arm/mm/cache-uniphier.c
+++ b/arch/arm/mm/cache-uniphier.c
@@ -375,7 +375,7 @@ static int __init __uniphier_cache_init(struct device_node 
*np,
pr_err("L%d: cache-line-size is unspecified or invalid\n",
   *cache_level);
ret = -EINVAL;
-   goto err;
+   goto iounmap;
}
 
if (of_property_read_u32(np, "cache-sets", >nsets) ||
@@ -383,7 +383,7 @@ static int __init __uniphier_cache_init(struct device_node 
*np,
pr_err("L%d: cache-sets is unspecified or invalid\n",
   *cache_level);
ret = -EINVAL;
-   goto err;
+   goto iounmap;
}
 
if (of_property_read_u32(np, "cache-size", _size) ||
@@ -391,7 +391,7 @@ static int __init __uniphier_cache_init(struct device_node 
*np,
pr_err("L%d: cache-size is unspecified or invalid\n",
   *cache_level);
ret = -EINVAL;
-   goto err;
+   goto iounmap;
}
 
data->way_present_mask =
@@ -401,21 +401,21 @@ static int __init __uniphier_cache_init(struct 
device_node *np,
if (!data->ctrl_base) {
pr_err("L%d: failed to map control register\n", *cache_level);
ret = -ENOMEM;
-   goto err;
+   goto iounmap;
}
 
data->rev_base = of_iomap(np, 1);
if (!data->rev_base) {
pr_err("L%d: failed to map revision register\n", *cache_level);
ret = -ENOMEM;
-   goto err;
+   goto iounmap;
}
 
data->op_base = of_iomap(np, 2);
if (!data->op_base) {
pr_err("L%d: failed to map operation register\n", *cache_level);
ret = -ENOMEM;
-   goto err;
+   goto iounmap;
}
 
data->way_ctrl_base = data->ctrl_base + 0xc00;
@@ -465,7 +465,7 @@ static int __init __uniphier_cache_init(struct device_node 
*np,
of_node_put(next_np);
 
return ret;
-err:
+iounmap:
iounmap(data->op_base);
iounmap(data->rev_base);
iounmap(data->ctrl_base);
-- 
1.9.1



[PATCH] ARM: cache-uniphier: rename jump label to follow coding style guideline

2016-09-19 Thread Masahiro Yamada
Documentation/CodingStyle recommends to use label names which say
what the goto does or why the goto exists.

Signed-off-by: Masahiro Yamada 
---

 arch/arm/mm/cache-uniphier.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/arm/mm/cache-uniphier.c b/arch/arm/mm/cache-uniphier.c
index eac516a..9a2a249 100644
--- a/arch/arm/mm/cache-uniphier.c
+++ b/arch/arm/mm/cache-uniphier.c
@@ -375,7 +375,7 @@ static int __init __uniphier_cache_init(struct device_node 
*np,
pr_err("L%d: cache-line-size is unspecified or invalid\n",
   *cache_level);
ret = -EINVAL;
-   goto err;
+   goto iounmap;
}
 
if (of_property_read_u32(np, "cache-sets", >nsets) ||
@@ -383,7 +383,7 @@ static int __init __uniphier_cache_init(struct device_node 
*np,
pr_err("L%d: cache-sets is unspecified or invalid\n",
   *cache_level);
ret = -EINVAL;
-   goto err;
+   goto iounmap;
}
 
if (of_property_read_u32(np, "cache-size", _size) ||
@@ -391,7 +391,7 @@ static int __init __uniphier_cache_init(struct device_node 
*np,
pr_err("L%d: cache-size is unspecified or invalid\n",
   *cache_level);
ret = -EINVAL;
-   goto err;
+   goto iounmap;
}
 
data->way_present_mask =
@@ -401,21 +401,21 @@ static int __init __uniphier_cache_init(struct 
device_node *np,
if (!data->ctrl_base) {
pr_err("L%d: failed to map control register\n", *cache_level);
ret = -ENOMEM;
-   goto err;
+   goto iounmap;
}
 
data->rev_base = of_iomap(np, 1);
if (!data->rev_base) {
pr_err("L%d: failed to map revision register\n", *cache_level);
ret = -ENOMEM;
-   goto err;
+   goto iounmap;
}
 
data->op_base = of_iomap(np, 2);
if (!data->op_base) {
pr_err("L%d: failed to map operation register\n", *cache_level);
ret = -ENOMEM;
-   goto err;
+   goto iounmap;
}
 
data->way_ctrl_base = data->ctrl_base + 0xc00;
@@ -465,7 +465,7 @@ static int __init __uniphier_cache_init(struct device_node 
*np,
of_node_put(next_np);
 
return ret;
-err:
+iounmap:
iounmap(data->op_base);
iounmap(data->rev_base);
iounmap(data->ctrl_base);
-- 
1.9.1



[INFO] ratio of const vs dynamic usercopy

2016-09-19 Thread Kees Cook
Hi,

Al had asked me a couple weeks back what the ratio of const vs dynamic
usercopying was. With Josh's cleanup and my fix-up to only call the
hardened usercopy when non-const, I can actually gather these statistics
on a build. It's a bit of a hack (see attached patch that should not go
into the tree), but with my not-very-defconfig, it's rougly 2 to 1 const
vs dynamic.  However, this doesn't take into account the frequency at
_runtime_, which maybe could be discovered via perf comparing copy*user()
calls to __check_object_size() calls, but I didn't try that. Does someone
have perf setup to check this?

$ grep 'warning: call to' build.stderr | wc -l
1505
$ grep 'warning: call to' build.stderr | grep 'const usercopy' | wc -l
998
$ grep 'warning: call to' build.stderr | grep 'dynamic usercopy' | wc -l
507

Signed-off-by: Kees Cook 
---
 include/linux/thread_info.h | 8 +++-
 mm/usercopy.c   | 4 
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 2b5b10eed74f..4cae922797e5 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -115,14 +115,20 @@ static inline int arch_within_stack_frames(const void * 
const stack,
 #endif
 
 #ifdef CONFIG_HARDENED_USERCOPY
-extern void __check_object_size(const void *ptr, unsigned long n,
+extern void __compiletime_warning("dynamic usercopy")
+__check_object_size(const void *ptr, unsigned long n,
bool to_user);
 
+extern void __compiletime_warning("builtin-const usercopy")
+__skip_check_object_size(void);
+
 static __always_inline void check_object_size(const void *ptr, unsigned long n,
  bool to_user)
 {
if (!__builtin_constant_p(n))
__check_object_size(ptr, n, to_user);
+   else
+   __skip_check_object_size();
 }
 #else
 static inline void check_object_size(const void *ptr, unsigned long n,
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 089328f2b920..9969a06f5e25 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -275,3 +275,7 @@ report:
report_usercopy(ptr, n, to_user, err);
 }
 EXPORT_SYMBOL(__check_object_size);
+
+void __skip_check_object_size(void)
+{ }
+EXPORT_SYMBOL(__skip_check_object_size);
-- 
2.7.4


-- 
Kees Cook
Nexus Security


[INFO] ratio of const vs dynamic usercopy

2016-09-19 Thread Kees Cook
Hi,

Al had asked me a couple weeks back what the ratio of const vs dynamic
usercopying was. With Josh's cleanup and my fix-up to only call the
hardened usercopy when non-const, I can actually gather these statistics
on a build. It's a bit of a hack (see attached patch that should not go
into the tree), but with my not-very-defconfig, it's rougly 2 to 1 const
vs dynamic.  However, this doesn't take into account the frequency at
_runtime_, which maybe could be discovered via perf comparing copy*user()
calls to __check_object_size() calls, but I didn't try that. Does someone
have perf setup to check this?

$ grep 'warning: call to' build.stderr | wc -l
1505
$ grep 'warning: call to' build.stderr | grep 'const usercopy' | wc -l
998
$ grep 'warning: call to' build.stderr | grep 'dynamic usercopy' | wc -l
507

Signed-off-by: Kees Cook 
---
 include/linux/thread_info.h | 8 +++-
 mm/usercopy.c   | 4 
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 2b5b10eed74f..4cae922797e5 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -115,14 +115,20 @@ static inline int arch_within_stack_frames(const void * 
const stack,
 #endif
 
 #ifdef CONFIG_HARDENED_USERCOPY
-extern void __check_object_size(const void *ptr, unsigned long n,
+extern void __compiletime_warning("dynamic usercopy")
+__check_object_size(const void *ptr, unsigned long n,
bool to_user);
 
+extern void __compiletime_warning("builtin-const usercopy")
+__skip_check_object_size(void);
+
 static __always_inline void check_object_size(const void *ptr, unsigned long n,
  bool to_user)
 {
if (!__builtin_constant_p(n))
__check_object_size(ptr, n, to_user);
+   else
+   __skip_check_object_size();
 }
 #else
 static inline void check_object_size(const void *ptr, unsigned long n,
diff --git a/mm/usercopy.c b/mm/usercopy.c
index 089328f2b920..9969a06f5e25 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -275,3 +275,7 @@ report:
report_usercopy(ptr, n, to_user, err);
 }
 EXPORT_SYMBOL(__check_object_size);
+
+void __skip_check_object_size(void)
+{ }
+EXPORT_SYMBOL(__skip_check_object_size);
-- 
2.7.4


-- 
Kees Cook
Nexus Security


Re: [PATCH] arm64, numa: Add cpu_to_node() implementation.

2016-09-19 Thread Ganapatrao Kulkarni
[sending again, previous email was not text]

On Tue, Sep 20, 2016 at 12:19 AM, David Daney  wrote:
> From: David Daney 
>
> The wq_numa_init() function makes a private CPU to node map by calling
> cpu_to_node() early in the boot process, before the non-boot CPUs are
> brought online.  Since the default implementation of cpu_to_node()
> returns zero for CPUs that have never been brought online, the
> workqueue system's view is that *all* CPUs are on node zero.
>
> When the unbound workqueue for a non-zero node is created, the
> tsk_cpus_allowed() for the worker threads is the empty set because
> there are, in the view of the workqueue system, no CPUs on non-zero
> nodes.  The code in try_to_wake_up() using this empty cpumask ends up
> using the cpumask empty set value of NR_CPUS as an index into the
> per-CPU area pointer array, and gets garbage as it is one past the end
> of the array.  This results in:
>
> [0.881970] Unable to handle kernel paging request at virtual address 
> fb1008b926a4
> [1.970095] pgd = fc00094b
> [1.973530] [fb1008b926a4] *pgd=, 
> *pud=, *pmd=
> [1.982610] Internal error: Oops: 9604 [#1] SMP
> [1.987541] Modules linked in:
> [1.990631] CPU: 48 PID: 295 Comm: cpuhp/48 Tainted: GW   
> 4.8.0-rc6-preempt-vol+ #9
> [1.999435] Hardware name: Cavium ThunderX CN88XX board (DT)
> [2.005159] task: fe0fe89cc300 task.stack: fe0fe8b8c000
> [2.011158] PC is at try_to_wake_up+0x194/0x34c
> [2.015737] LR is at try_to_wake_up+0x150/0x34c
> [2.020318] pc : [] lr : [] pstate: 
> 60c5
> [2.027803] sp : fe0fe8b8fb10
> [2.031149] x29: fe0fe8b8fb10 x28: 
> [2.036522] x27: fc0008c63bc8 x26: 1000
> [2.041896] x25: fc0008c63c80 x24: fc0008bfb200
> [2.047270] x23: 00c0 x22: 0004
> [2.052642] x21: fe0fe89d25bc x20: 1000
> [2.058014] x19: fe0fe89d1d00 x18: 
> [2.063386] x17:  x16: 
> [2.068760] x15: 0018 x14: 
> [2.074133] x13:  x12: 
> [2.079505] x11:  x10: 
> [2.084879] x9 :  x8 : 
> [2.090251] x7 : 0040 x6 : 
> [2.095621] x5 :  x4 : 
> [2.100991] x3 :  x2 : 
> [2.106364] x1 : fc0008be4c24 x0 : ff0ada80
> [2.111737]
> [2.113236] Process cpuhp/48 (pid: 295, stack limit = 0xfe0fe8b8c020)
> [2.120102] Stack: (0xfe0fe8b8fb10 to 0xfe0fe8b9)
> [2.125914] fb00:   fe0fe8b8fb80 
> fc00080e7648
> .
> .
> .
> [2.442859] Call trace:
> [2.445327] Exception stack(0xfe0fe8b8f940 to 0xfe0fe8b8fa70)
> [2.451843] f940: fe0fe89d1d00 0400 fe0fe8b8fb10 
> fc00080e7468
> [2.459767] f960: fe0fe8b8f980 fc00080e4958 ff0ff91ab200 
> fc00080e4b64
> [2.467690] f980: fe0fe8b8f9d0 fc00080e515c fe0fe8b8fa80 
> 
> [2.475614] f9a0: fe0fe8b8f9d0 fc00080e58e4 fe0fe8b8fa80 
> 
> [2.483540] f9c0: fe0fe8d1 0040 fe0fe8b8fa50 
> fc00080e5ac4
> [2.491465] f9e0: ff0ada80 fc0008be4c24  
> 
> [2.499387] fa00:    
> 0040
> [2.507309] fa20:    
> 
> [2.515233] fa40:    
> 0018
> [2.523156] fa60:  
> [2.528089] [] try_to_wake_up+0x194/0x34c
> [2.533723] [] wake_up_process+0x28/0x34
> [2.539275] [] create_worker+0x110/0x19c
> [2.544824] [] alloc_unbound_pwq+0x3cc/0x4b0
> [2.550724] [] wq_update_unbound_numa+0x10c/0x1e4
> [2.557066] [] workqueue_online_cpu+0x220/0x28c
> [2.563234] [] cpuhp_invoke_callback+0x6c/0x168
> [2.569398] [] cpuhp_up_callbacks+0x44/0xe4
> [2.575210] [] cpuhp_thread_fun+0x13c/0x148
> [2.581027] [] smpboot_thread_fn+0x19c/0x1a8
> [2.586929] [] kthread+0xdc/0xf0
> [2.591776] [] ret_from_fork+0x10/0x50
> [2.597147] Code: b00057e1 91304021 91005021 b8626822 (b8606821)
> [2.603464] ---[ end trace 58c0cd36b88802bc ]---
> [2.608138] Kernel panic - not syncing: Fatal exception
>
> Fix by supplying a cpu_to_node() implementation that returns correct
> node mappings.
>
> Cc:  # 4.7.x-
> Signed-off-by: David Daney 
>

Acked-by: Ganapatrao Kulkarni 

> ---
>  arch/arm64/include/asm/topology.h |  3 +++
>  

Re: [PATCH] arm64, numa: Add cpu_to_node() implementation.

2016-09-19 Thread Ganapatrao Kulkarni
[sending again, previous email was not text]

On Tue, Sep 20, 2016 at 12:19 AM, David Daney  wrote:
> From: David Daney 
>
> The wq_numa_init() function makes a private CPU to node map by calling
> cpu_to_node() early in the boot process, before the non-boot CPUs are
> brought online.  Since the default implementation of cpu_to_node()
> returns zero for CPUs that have never been brought online, the
> workqueue system's view is that *all* CPUs are on node zero.
>
> When the unbound workqueue for a non-zero node is created, the
> tsk_cpus_allowed() for the worker threads is the empty set because
> there are, in the view of the workqueue system, no CPUs on non-zero
> nodes.  The code in try_to_wake_up() using this empty cpumask ends up
> using the cpumask empty set value of NR_CPUS as an index into the
> per-CPU area pointer array, and gets garbage as it is one past the end
> of the array.  This results in:
>
> [0.881970] Unable to handle kernel paging request at virtual address 
> fb1008b926a4
> [1.970095] pgd = fc00094b
> [1.973530] [fb1008b926a4] *pgd=, 
> *pud=, *pmd=
> [1.982610] Internal error: Oops: 9604 [#1] SMP
> [1.987541] Modules linked in:
> [1.990631] CPU: 48 PID: 295 Comm: cpuhp/48 Tainted: GW   
> 4.8.0-rc6-preempt-vol+ #9
> [1.999435] Hardware name: Cavium ThunderX CN88XX board (DT)
> [2.005159] task: fe0fe89cc300 task.stack: fe0fe8b8c000
> [2.011158] PC is at try_to_wake_up+0x194/0x34c
> [2.015737] LR is at try_to_wake_up+0x150/0x34c
> [2.020318] pc : [] lr : [] pstate: 
> 60c5
> [2.027803] sp : fe0fe8b8fb10
> [2.031149] x29: fe0fe8b8fb10 x28: 
> [2.036522] x27: fc0008c63bc8 x26: 1000
> [2.041896] x25: fc0008c63c80 x24: fc0008bfb200
> [2.047270] x23: 00c0 x22: 0004
> [2.052642] x21: fe0fe89d25bc x20: 1000
> [2.058014] x19: fe0fe89d1d00 x18: 
> [2.063386] x17:  x16: 
> [2.068760] x15: 0018 x14: 
> [2.074133] x13:  x12: 
> [2.079505] x11:  x10: 
> [2.084879] x9 :  x8 : 
> [2.090251] x7 : 0040 x6 : 
> [2.095621] x5 :  x4 : 
> [2.100991] x3 :  x2 : 
> [2.106364] x1 : fc0008be4c24 x0 : ff0ada80
> [2.111737]
> [2.113236] Process cpuhp/48 (pid: 295, stack limit = 0xfe0fe8b8c020)
> [2.120102] Stack: (0xfe0fe8b8fb10 to 0xfe0fe8b9)
> [2.125914] fb00:   fe0fe8b8fb80 
> fc00080e7648
> .
> .
> .
> [2.442859] Call trace:
> [2.445327] Exception stack(0xfe0fe8b8f940 to 0xfe0fe8b8fa70)
> [2.451843] f940: fe0fe89d1d00 0400 fe0fe8b8fb10 
> fc00080e7468
> [2.459767] f960: fe0fe8b8f980 fc00080e4958 ff0ff91ab200 
> fc00080e4b64
> [2.467690] f980: fe0fe8b8f9d0 fc00080e515c fe0fe8b8fa80 
> 
> [2.475614] f9a0: fe0fe8b8f9d0 fc00080e58e4 fe0fe8b8fa80 
> 
> [2.483540] f9c0: fe0fe8d1 0040 fe0fe8b8fa50 
> fc00080e5ac4
> [2.491465] f9e0: ff0ada80 fc0008be4c24  
> 
> [2.499387] fa00:    
> 0040
> [2.507309] fa20:    
> 
> [2.515233] fa40:    
> 0018
> [2.523156] fa60:  
> [2.528089] [] try_to_wake_up+0x194/0x34c
> [2.533723] [] wake_up_process+0x28/0x34
> [2.539275] [] create_worker+0x110/0x19c
> [2.544824] [] alloc_unbound_pwq+0x3cc/0x4b0
> [2.550724] [] wq_update_unbound_numa+0x10c/0x1e4
> [2.557066] [] workqueue_online_cpu+0x220/0x28c
> [2.563234] [] cpuhp_invoke_callback+0x6c/0x168
> [2.569398] [] cpuhp_up_callbacks+0x44/0xe4
> [2.575210] [] cpuhp_thread_fun+0x13c/0x148
> [2.581027] [] smpboot_thread_fn+0x19c/0x1a8
> [2.586929] [] kthread+0xdc/0xf0
> [2.591776] [] ret_from_fork+0x10/0x50
> [2.597147] Code: b00057e1 91304021 91005021 b8626822 (b8606821)
> [2.603464] ---[ end trace 58c0cd36b88802bc ]---
> [2.608138] Kernel panic - not syncing: Fatal exception
>
> Fix by supplying a cpu_to_node() implementation that returns correct
> node mappings.
>
> Cc:  # 4.7.x-
> Signed-off-by: David Daney 
>

Acked-by: Ganapatrao Kulkarni 

> ---
>  arch/arm64/include/asm/topology.h |  3 +++
>  arch/arm64/mm/numa.c  | 18 ++
>  2 files changed, 21 insertions(+)
>
> diff --git 

Re: [PATCH] nfs: cover ->migratepage with CONFIG_MIGRATION

2016-09-19 Thread kbuild test robot
Hi Chao,

[auto build test ERROR on nfs/linux-next]
[also build test ERROR on v4.8-rc7 next-20160919]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]
[Suggest to use git(>=2.9.0) format-patch --base= (or --base=auto for 
convenience) to record what (public, well-known) commit your patch series was 
built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:
https://github.com/0day-ci/linux/commits/Chao-Yu/nfs-cover-migratepage-with-CONFIG_MIGRATION/20160920-121006
base:   git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next
config: i386-randconfig-s1-09191616 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

>> fs/nfs/file.c:547:17: error: 'nfs_migrate_page' undeclared here (not in a 
>> function)
 .migratepage = nfs_migrate_page,
^~~~

vim +/nfs_migrate_page +547 fs/nfs/file.c

4899f9c8 Nick Piggin 2007-10-16  541.write_begin = nfs_write_begin,
4899f9c8 Nick Piggin 2007-10-16  542.write_end = nfs_write_end,
cd52ed35 Trond Myklebust 2006-03-20  543.invalidatepage = 
nfs_invalidate_page,
cd52ed35 Trond Myklebust 2006-03-20  544.releasepage = nfs_release_page,
^1da177e Linus Torvalds  2005-04-16  545.direct_IO = nfs_direct_IO,
daa42d9f Chao Yu 2016-09-20  546  #ifdef CONFIG_MIGRATION
074cc1de Trond Myklebust 2009-08-10 @547.migratepage = nfs_migrate_page,
daa42d9f Chao Yu 2016-09-20  548  #endif
e3db7691 Trond Myklebust 2007-01-10  549.launder_page = 
nfs_launder_page,
f919b196 Mel Gorman  2013-07-03  550.is_dirty_writeback = 
nfs_check_dirty_writeback,

:: The code at line 547 was first introduced by commit
:: 074cc1deec5dee63fcd5d966b36fa4f3765b50fc NFS: Add a ->migratepage() aop 
for NFS

:: TO: Trond Myklebust <trond.mykleb...@netapp.com>
:: CC: Trond Myklebust <trond.mykleb...@netapp.com>

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH] nfs: cover ->migratepage with CONFIG_MIGRATION

2016-09-19 Thread kbuild test robot
Hi Chao,

[auto build test ERROR on nfs/linux-next]
[also build test ERROR on v4.8-rc7 next-20160919]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]
[Suggest to use git(>=2.9.0) format-patch --base= (or --base=auto for 
convenience) to record what (public, well-known) commit your patch series was 
built on]
[Check https://git-scm.com/docs/git-format-patch for more information]

url:
https://github.com/0day-ci/linux/commits/Chao-Yu/nfs-cover-migratepage-with-CONFIG_MIGRATION/20160920-121006
base:   git://git.linux-nfs.org/projects/trondmy/linux-nfs.git linux-next
config: i386-randconfig-s1-09191616 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All errors (new ones prefixed by >>):

>> fs/nfs/file.c:547:17: error: 'nfs_migrate_page' undeclared here (not in a 
>> function)
 .migratepage = nfs_migrate_page,
^~~~

vim +/nfs_migrate_page +547 fs/nfs/file.c

4899f9c8 Nick Piggin 2007-10-16  541.write_begin = nfs_write_begin,
4899f9c8 Nick Piggin 2007-10-16  542.write_end = nfs_write_end,
cd52ed35 Trond Myklebust 2006-03-20  543.invalidatepage = 
nfs_invalidate_page,
cd52ed35 Trond Myklebust 2006-03-20  544.releasepage = nfs_release_page,
^1da177e Linus Torvalds  2005-04-16  545.direct_IO = nfs_direct_IO,
daa42d9f Chao Yu 2016-09-20  546  #ifdef CONFIG_MIGRATION
074cc1de Trond Myklebust 2009-08-10 @547.migratepage = nfs_migrate_page,
daa42d9f Chao Yu 2016-09-20  548  #endif
e3db7691 Trond Myklebust 2007-01-10  549.launder_page = 
nfs_launder_page,
f919b196 Mel Gorman  2013-07-03  550.is_dirty_writeback = 
nfs_check_dirty_writeback,

:: The code at line 547 was first introduced by commit
:: 074cc1deec5dee63fcd5d966b36fa4f3765b50fc NFS: Add a ->migratepage() aop 
for NFS

:: TO: Trond Myklebust 
:: CC: Trond Myklebust 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


[PATCH] ARM: uniphier: select ARCH_HAS_RESET_CONTROLLER

2016-09-19 Thread Masahiro Yamada
The UniPhier reset driver (drivers/reset/reset-uniphier.c) has been
merged.  Select ARCH_HAS_RESET_CONTROLLER from the SoC Kconfig.

Signed-off-by: Masahiro Yamada 
---

Philipp,

IIRC, you mentioned that you were planning to consolidate the double
gurad by CONFIG_RESET_CONTROLLER and CONFIG_ARCH_HAS_RESET_CONTROLLER.

I have not seen it in the ML, so I am sending this.

Please let me know if you have some updates.


 arch/arm/mach-uniphier/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-uniphier/Kconfig b/arch/arm/mach-uniphier/Kconfig
index 82dddee..3930fbb 100644
--- a/arch/arm/mach-uniphier/Kconfig
+++ b/arch/arm/mach-uniphier/Kconfig
@@ -1,6 +1,7 @@
 config ARCH_UNIPHIER
bool "Socionext UniPhier SoCs"
depends on ARCH_MULTI_V7
+   select ARCH_HAS_RESET_CONTROLLER
select ARM_AMBA
select ARM_GLOBAL_TIMER
select ARM_GIC
-- 
1.9.1



[PATCH] ARM: uniphier: select ARCH_HAS_RESET_CONTROLLER

2016-09-19 Thread Masahiro Yamada
The UniPhier reset driver (drivers/reset/reset-uniphier.c) has been
merged.  Select ARCH_HAS_RESET_CONTROLLER from the SoC Kconfig.

Signed-off-by: Masahiro Yamada 
---

Philipp,

IIRC, you mentioned that you were planning to consolidate the double
gurad by CONFIG_RESET_CONTROLLER and CONFIG_ARCH_HAS_RESET_CONTROLLER.

I have not seen it in the ML, so I am sending this.

Please let me know if you have some updates.


 arch/arm/mach-uniphier/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-uniphier/Kconfig b/arch/arm/mach-uniphier/Kconfig
index 82dddee..3930fbb 100644
--- a/arch/arm/mach-uniphier/Kconfig
+++ b/arch/arm/mach-uniphier/Kconfig
@@ -1,6 +1,7 @@
 config ARCH_UNIPHIER
bool "Socionext UniPhier SoCs"
depends on ARCH_MULTI_V7
+   select ARCH_HAS_RESET_CONTROLLER
select ARM_AMBA
select ARM_GLOBAL_TIMER
select ARM_GIC
-- 
1.9.1



Re: [PATCH] drm/amdgpu: mark symbols static where possible

2016-09-19 Thread Alex Deucher
On Mon, Sep 19, 2016 at 6:01 AM, Christian König
 wrote:
> Am 18.09.2016 um 16:09 schrieb Baoyou Xie:
>>
>> We get 7 warnings when building kernel with W=1:
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1990:5: warning: no previous
>> prototype for 'amdgpu_pre_soft_reset' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c:1548:5: warning: no
>> previous prototype for 'amdgpu_connector_virtual_dpms'
>> [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c:1560:5: warning: no
>> previous prototype for 'amdgpu_connector_virtual_set_property'
>> [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:330:5: warning: no previous
>> prototype for 'amdgpu_cs_list_validate' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/dce_virtual.c:98:6: warning: no previous
>> prototype for 'dce_virtual_stop_mc_access' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/dce_virtual.c:130:6: warning: no previous
>> prototype for 'dce_virtual_resume_mc_access' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/dce_virtual.c:136:6: warning: no previous
>> prototype for 'dce_virtual_set_vga_render_state' [-Wmissing-prototypes]
>>
>> In fact, all of the functions are only used in the file
>> in which they are declared and don't need a declaration,
>> but can be made static.
>>
>> So this patch marks both functions with 'static'.
>>
>> Signed-off-by: Baoyou Xie 
>
>
> Reviewed-by: Christian König .
>

Applied.  thanks!

Alex

>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 6 --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
>>   drivers/gpu/drm/amd/amdgpu/dce_virtual.c   | 6 +++---
>>   4 files changed, 9 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
>> index 319a5e1..decbba5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
>> @@ -1545,7 +1545,8 @@ static int
>> amdgpu_connector_virtual_mode_valid(struct drm_connector *connector,
>> return MODE_OK;
>>   }
>>   -int amdgpu_connector_virtual_dpms(struct drm_connector *connector, int
>> mode)
>> +static int
>> +amdgpu_connector_virtual_dpms(struct drm_connector *connector, int mode)
>>   {
>> return 0;
>>   }
>> @@ -1557,7 +1558,8 @@ amdgpu_connector_virtual_detect(struct drm_connector
>> *connector, bool force)
>> return connector_status_connected;
>>   }
>>   -int amdgpu_connector_virtual_set_property(struct drm_connector
>> *connector,
>> +static int
>> +amdgpu_connector_virtual_set_property(struct drm_connector *connector,
>>   struct drm_property *property,
>>   uint64_t val)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index d80e5d3..b408eea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -327,7 +327,7 @@ retry:
>> return r;
>>   }
>>   -int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
>> +static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
>> struct list_head *validated)
>>   {
>> struct amdgpu_bo_list_entry *lobj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index c38dc47..09b809d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1987,7 +1987,7 @@ static bool amdgpu_check_soft_reset(struct
>> amdgpu_device *adev)
>> return asic_hang;
>>   }
>>   -int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
>> +static int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
>>   {
>> int i, r = 0;
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>> b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>> index 00663a7..2d02acd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>> @@ -95,7 +95,7 @@ static bool dce_virtual_is_display_hung(struct
>> amdgpu_device *adev)
>> return false;
>>   }
>>   -void dce_virtual_stop_mc_access(struct amdgpu_device *adev,
>> +static void dce_virtual_stop_mc_access(struct amdgpu_device *adev,
>>   struct amdgpu_mode_mc_save *save)
>>   {
>> switch (adev->asic_type) {
>> @@ -127,13 +127,13 @@ void dce_virtual_stop_mc_access(struct amdgpu_device
>> *adev,
>> return;
>>   }
>> -void dce_virtual_resume_mc_access(struct amdgpu_device *adev,
>> +static void dce_virtual_resume_mc_access(struct amdgpu_device *adev,
>> struct amdgpu_mode_mc_save *save)
>>   {
>> return;
>>   }
>>   -void 

Re: [PATCH] drm/amdgpu: mark symbols static where possible

2016-09-19 Thread Alex Deucher
On Mon, Sep 19, 2016 at 6:01 AM, Christian König
 wrote:
> Am 18.09.2016 um 16:09 schrieb Baoyou Xie:
>>
>> We get 7 warnings when building kernel with W=1:
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c:1990:5: warning: no previous
>> prototype for 'amdgpu_pre_soft_reset' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c:1548:5: warning: no
>> previous prototype for 'amdgpu_connector_virtual_dpms'
>> [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c:1560:5: warning: no
>> previous prototype for 'amdgpu_connector_virtual_set_property'
>> [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c:330:5: warning: no previous
>> prototype for 'amdgpu_cs_list_validate' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/dce_virtual.c:98:6: warning: no previous
>> prototype for 'dce_virtual_stop_mc_access' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/dce_virtual.c:130:6: warning: no previous
>> prototype for 'dce_virtual_resume_mc_access' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/dce_virtual.c:136:6: warning: no previous
>> prototype for 'dce_virtual_set_vga_render_state' [-Wmissing-prototypes]
>>
>> In fact, all of the functions are only used in the file
>> in which they are declared and don't need a declaration,
>> but can be made static.
>>
>> So this patch marks both functions with 'static'.
>>
>> Signed-off-by: Baoyou Xie 
>
>
> Reviewed-by: Christian König .
>

Applied.  thanks!

Alex

>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c | 6 --
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
>>   drivers/gpu/drm/amd/amdgpu/dce_virtual.c   | 6 +++---
>>   4 files changed, 9 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
>> index 319a5e1..decbba5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
>> @@ -1545,7 +1545,8 @@ static int
>> amdgpu_connector_virtual_mode_valid(struct drm_connector *connector,
>> return MODE_OK;
>>   }
>>   -int amdgpu_connector_virtual_dpms(struct drm_connector *connector, int
>> mode)
>> +static int
>> +amdgpu_connector_virtual_dpms(struct drm_connector *connector, int mode)
>>   {
>> return 0;
>>   }
>> @@ -1557,7 +1558,8 @@ amdgpu_connector_virtual_detect(struct drm_connector
>> *connector, bool force)
>> return connector_status_connected;
>>   }
>>   -int amdgpu_connector_virtual_set_property(struct drm_connector
>> *connector,
>> +static int
>> +amdgpu_connector_virtual_set_property(struct drm_connector *connector,
>>   struct drm_property *property,
>>   uint64_t val)
>>   {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index d80e5d3..b408eea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -327,7 +327,7 @@ retry:
>> return r;
>>   }
>>   -int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
>> +static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
>> struct list_head *validated)
>>   {
>> struct amdgpu_bo_list_entry *lobj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index c38dc47..09b809d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -1987,7 +1987,7 @@ static bool amdgpu_check_soft_reset(struct
>> amdgpu_device *adev)
>> return asic_hang;
>>   }
>>   -int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
>> +static int amdgpu_pre_soft_reset(struct amdgpu_device *adev)
>>   {
>> int i, r = 0;
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>> b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>> index 00663a7..2d02acd 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
>> @@ -95,7 +95,7 @@ static bool dce_virtual_is_display_hung(struct
>> amdgpu_device *adev)
>> return false;
>>   }
>>   -void dce_virtual_stop_mc_access(struct amdgpu_device *adev,
>> +static void dce_virtual_stop_mc_access(struct amdgpu_device *adev,
>>   struct amdgpu_mode_mc_save *save)
>>   {
>> switch (adev->asic_type) {
>> @@ -127,13 +127,13 @@ void dce_virtual_stop_mc_access(struct amdgpu_device
>> *adev,
>> return;
>>   }
>> -void dce_virtual_resume_mc_access(struct amdgpu_device *adev,
>> +static void dce_virtual_resume_mc_access(struct amdgpu_device *adev,
>> struct amdgpu_mode_mc_save *save)
>>   {
>> return;
>>   }
>>   -void dce_virtual_set_vga_render_state(struct amdgpu_device *adev,
>> +static void 

Re: [RFC v3 18/22] cgroup,landlock: Add CGRP_NO_NEW_PRIVS to handle unprivileged hooks

2016-09-19 Thread Sargun Dhillon
On Thu, Sep 15, 2016 at 09:41:33PM +0200, Mickaël Salaün wrote:
> 
> On 15/09/2016 06:48, Alexei Starovoitov wrote:
> > On Wed, Sep 14, 2016 at 09:38:16PM -0700, Andy Lutomirski wrote:
> >> On Wed, Sep 14, 2016 at 9:31 PM, Alexei Starovoitov
> >>  wrote:
> >>> On Wed, Sep 14, 2016 at 09:08:57PM -0700, Andy Lutomirski wrote:
>  On Wed, Sep 14, 2016 at 9:00 PM, Alexei Starovoitov
>   wrote:
> > On Wed, Sep 14, 2016 at 07:27:08PM -0700, Andy Lutomirski wrote:
> >
> > This RFC handle both cgroup and seccomp approaches in a similar 
> > way. I
> > don't see why building on top of cgroup v2 is a problem. Is there
> > security issues with delegation?
> 
>  What I mean is: cgroup v2 delegation has a functionality problem.
>  Tejun says [1]:
> 
>  We haven't had to face this decision because cgroup has never 
>  properly
>  supported delegating to applications and the in-use setups where this
>  happens are custom configurations where there is no boundary between
>  system and applications and adhoc trial-and-error is good enough a 
>  way
>  to find a working solution.  That wiggle room goes away once we
>  officially open this up to individual applications.
> 
>  Unless and until that changes, I think that landlock should stay away
>  from cgroups.  Others could reasonably disagree with me.
> >>>
> >>> Ours and Sargun's use cases for cgroup+lsm+bpf is not for security
> >>> and not for sandboxing. So the above doesn't matter in such contexts.
> >>> lsm hooks + cgroups provide convenient scope and existing entry 
> >>> points.
> >>> Please see checmate examples how it's used.
> >>>
> >>
> >> To be clear: I'm not arguing at all that there shouldn't be
> >> bpf+lsm+cgroup integration.  I'm arguing that the unprivileged
> >> landlock interface shouldn't expose any cgroup integration, at least
> >> until the cgroup situation settles down a lot.
> >
> > ahh. yes. we're perfectly in agreement here.
> > I'm suggesting that the next RFC shouldn't include unpriv
> > and seccomp at all. Once bpf+lsm+cgroup is merged, we can
> > argue about unpriv with cgroups and even unpriv as a whole,
> > since it's not a given. Seccomp integration is also questionable.
> > I'd rather not have seccomp as a gate keeper for this lsm.
> > lsm and seccomp are orthogonal hook points. Syscalls and lsm hooks
> > don't have one to one relationship, so mixing them up is only
> > asking for trouble further down the road.
> > If we really need to carry some information from seccomp to lsm+bpf,
> > it's easier to add eBPF support to seccomp and let bpf side deal
> > with passing whatever information.
> >
> 
>  As an argument for keeping seccomp (or an extended seccomp) as the
>  interface for an unprivileged bpf+lsm: seccomp already checks off most
>  of the boxes for safely letting unprivileged programs sandbox
>  themselves.
> >>>
> >>> you mean the attach part of seccomp syscall that deals with no_new_priv?
> >>> sure, that's reusable.
> >>>
>  Furthermore, to the extent that there are use cases for
>  unprivileged bpf+lsm that *aren't* expressible within the seccomp
>  hierarchy, I suspect that syscall filters have exactly the same
>  problem and that we should fix seccomp to cover it.
> >>>
> >>> not sure what you mean by 'seccomp hierarchy'. The normal process
> >>> hierarchy ?
> >>
> >> Kind of.  I mean the filter layers that are inherited across fork(),
> >> the TSYNC mechanism, etc.
> >>
> >>> imo the main deficiency of secccomp is inability to look into arguments.
> >>> One can argue that it's a blessing, since composite args
> >>> are not yet copied into the kernel memory.
> >>> But in a lot of cases the seccomp arguments are FDs pointing
> >>> to kernel objects and if programs could examine those objects
> >>> the sandboxing scope would be more precise.
> >>> lsm+bpf solves that part and I'd still argue that it's
> >>> orthogonal to seccomp's pass/reject flow.
> >>> I mean if seccomp says 'ok' the syscall should continue executing
> >>> as normal and whatever LSM hooks were triggered by it may have
> >>> their own lsm+bpf verdicts.
> >>
> >> I agree with all of this...
> >>
> >>> Furthermore in the process hierarchy different children
> >>> should be able to set their own lsm+bpf filters that are not
> >>> related to parallel seccomp+bpf hierarchy of programs.
> >>> seccomp syscall can be an interface to attach programs
> >>> to lsm hooks, but nothing more than that.
> >>
> >> I'm not sure what you mean.  I mean that, logically, I think we should
> >> be able to do:
> >>
> >> seccomp(attach a syscall filter);
> >> fork();
> >> child does seccomp(attach some lsm filters);
> 

Re: [RFC v3 18/22] cgroup,landlock: Add CGRP_NO_NEW_PRIVS to handle unprivileged hooks

2016-09-19 Thread Sargun Dhillon
On Thu, Sep 15, 2016 at 09:41:33PM +0200, Mickaël Salaün wrote:
> 
> On 15/09/2016 06:48, Alexei Starovoitov wrote:
> > On Wed, Sep 14, 2016 at 09:38:16PM -0700, Andy Lutomirski wrote:
> >> On Wed, Sep 14, 2016 at 9:31 PM, Alexei Starovoitov
> >>  wrote:
> >>> On Wed, Sep 14, 2016 at 09:08:57PM -0700, Andy Lutomirski wrote:
>  On Wed, Sep 14, 2016 at 9:00 PM, Alexei Starovoitov
>   wrote:
> > On Wed, Sep 14, 2016 at 07:27:08PM -0700, Andy Lutomirski wrote:
> >
> > This RFC handle both cgroup and seccomp approaches in a similar 
> > way. I
> > don't see why building on top of cgroup v2 is a problem. Is there
> > security issues with delegation?
> 
>  What I mean is: cgroup v2 delegation has a functionality problem.
>  Tejun says [1]:
> 
>  We haven't had to face this decision because cgroup has never 
>  properly
>  supported delegating to applications and the in-use setups where this
>  happens are custom configurations where there is no boundary between
>  system and applications and adhoc trial-and-error is good enough a 
>  way
>  to find a working solution.  That wiggle room goes away once we
>  officially open this up to individual applications.
> 
>  Unless and until that changes, I think that landlock should stay away
>  from cgroups.  Others could reasonably disagree with me.
> >>>
> >>> Ours and Sargun's use cases for cgroup+lsm+bpf is not for security
> >>> and not for sandboxing. So the above doesn't matter in such contexts.
> >>> lsm hooks + cgroups provide convenient scope and existing entry 
> >>> points.
> >>> Please see checmate examples how it's used.
> >>>
> >>
> >> To be clear: I'm not arguing at all that there shouldn't be
> >> bpf+lsm+cgroup integration.  I'm arguing that the unprivileged
> >> landlock interface shouldn't expose any cgroup integration, at least
> >> until the cgroup situation settles down a lot.
> >
> > ahh. yes. we're perfectly in agreement here.
> > I'm suggesting that the next RFC shouldn't include unpriv
> > and seccomp at all. Once bpf+lsm+cgroup is merged, we can
> > argue about unpriv with cgroups and even unpriv as a whole,
> > since it's not a given. Seccomp integration is also questionable.
> > I'd rather not have seccomp as a gate keeper for this lsm.
> > lsm and seccomp are orthogonal hook points. Syscalls and lsm hooks
> > don't have one to one relationship, so mixing them up is only
> > asking for trouble further down the road.
> > If we really need to carry some information from seccomp to lsm+bpf,
> > it's easier to add eBPF support to seccomp and let bpf side deal
> > with passing whatever information.
> >
> 
>  As an argument for keeping seccomp (or an extended seccomp) as the
>  interface for an unprivileged bpf+lsm: seccomp already checks off most
>  of the boxes for safely letting unprivileged programs sandbox
>  themselves.
> >>>
> >>> you mean the attach part of seccomp syscall that deals with no_new_priv?
> >>> sure, that's reusable.
> >>>
>  Furthermore, to the extent that there are use cases for
>  unprivileged bpf+lsm that *aren't* expressible within the seccomp
>  hierarchy, I suspect that syscall filters have exactly the same
>  problem and that we should fix seccomp to cover it.
> >>>
> >>> not sure what you mean by 'seccomp hierarchy'. The normal process
> >>> hierarchy ?
> >>
> >> Kind of.  I mean the filter layers that are inherited across fork(),
> >> the TSYNC mechanism, etc.
> >>
> >>> imo the main deficiency of secccomp is inability to look into arguments.
> >>> One can argue that it's a blessing, since composite args
> >>> are not yet copied into the kernel memory.
> >>> But in a lot of cases the seccomp arguments are FDs pointing
> >>> to kernel objects and if programs could examine those objects
> >>> the sandboxing scope would be more precise.
> >>> lsm+bpf solves that part and I'd still argue that it's
> >>> orthogonal to seccomp's pass/reject flow.
> >>> I mean if seccomp says 'ok' the syscall should continue executing
> >>> as normal and whatever LSM hooks were triggered by it may have
> >>> their own lsm+bpf verdicts.
> >>
> >> I agree with all of this...
> >>
> >>> Furthermore in the process hierarchy different children
> >>> should be able to set their own lsm+bpf filters that are not
> >>> related to parallel seccomp+bpf hierarchy of programs.
> >>> seccomp syscall can be an interface to attach programs
> >>> to lsm hooks, but nothing more than that.
> >>
> >> I'm not sure what you mean.  I mean that, logically, I think we should
> >> be able to do:
> >>
> >> seccomp(attach a syscall filter);
> >> fork();
> >> child does seccomp(attach some lsm filters);
> >>
> >> I think that they *should* be related to the 

Re: [PATCH 3/4] watchdog: sa11x0/pxa: get rid of get_clock_tick_rate

2016-09-19 Thread Guenter Roeck

On 09/19/2016 03:36 PM, Russell King - ARM Linux wrote:

On Mon, Sep 19, 2016 at 01:08:16PM -0700, Guenter Roeck wrote:

On Mon, Sep 19, 2016 at 09:12:14PM +0200, Robert Jarzmik wrote:

The OS timer rate used for the watchdog can now be fetched from the
standard clock API. This will remove the last user of
get_clock_tick_rate() in both pxa and sa11x0 architectures.

Signed-off-by: Robert Jarzmik 


Did you test this ? Potential problem, if built into the kernel, could be that
the clocks might not be ready by the time the driver is instantiated. Unless
this is converted to a platform driver, it won't be able to handle a
-EPROBE_DEFER from the clock subsystem.


Really not a problem at all.  The OSTIMER0 is required for the system
tick, and if that's not present, the kernel will be without any kind
of time keeping, so a missing watchdog driver is the least of the
problems.

Therefore, both PXA and SA11x0 register their clocks really early to
ensure that OSTIMER0 is available by the time_init() stage, which is
way before driver probe time.



You are right. And, at least in qemu, it actually works.

Thanks,
Guenter



Re: [PATCH 3/4] watchdog: sa11x0/pxa: get rid of get_clock_tick_rate

2016-09-19 Thread Guenter Roeck

On 09/19/2016 03:36 PM, Russell King - ARM Linux wrote:

On Mon, Sep 19, 2016 at 01:08:16PM -0700, Guenter Roeck wrote:

On Mon, Sep 19, 2016 at 09:12:14PM +0200, Robert Jarzmik wrote:

The OS timer rate used for the watchdog can now be fetched from the
standard clock API. This will remove the last user of
get_clock_tick_rate() in both pxa and sa11x0 architectures.

Signed-off-by: Robert Jarzmik 


Did you test this ? Potential problem, if built into the kernel, could be that
the clocks might not be ready by the time the driver is instantiated. Unless
this is converted to a platform driver, it won't be able to handle a
-EPROBE_DEFER from the clock subsystem.


Really not a problem at all.  The OSTIMER0 is required for the system
tick, and if that's not present, the kernel will be without any kind
of time keeping, so a missing watchdog driver is the least of the
problems.

Therefore, both PXA and SA11x0 register their clocks really early to
ensure that OSTIMER0 is available by the time_init() stage, which is
way before driver probe time.



You are right. And, at least in qemu, it actually works.

Thanks,
Guenter



Re: [PATCH 3/4] watchdog: sa11x0/pxa: get rid of get_clock_tick_rate

2016-09-19 Thread Guenter Roeck

On 09/19/2016 12:12 PM, Robert Jarzmik wrote:

The OS timer rate used for the watchdog can now be fetched from the
standard clock API. This will remove the last user of
get_clock_tick_rate() in both pxa and sa11x0 architectures.

Signed-off-by: Robert Jarzmik 


Reviewed-by: Guenter Roeck 
Tested-by: Guenter Roeck 


---
 drivers/watchdog/sa1100_wdt.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c
index e1d39a1e9628..8965e3f536c3 100644
--- a/drivers/watchdog/sa1100_wdt.c
+++ b/drivers/watchdog/sa1100_wdt.c
@@ -22,6 +22,7 @@

 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -155,12 +156,27 @@ static struct miscdevice sa1100dog_miscdev = {
 };

 static int margin __initdata = 60; /* (secs) Default is 1 minute */
+static struct clk *clk;

 static int __init sa1100dog_init(void)
 {
int ret;

-   oscr_freq = get_clock_tick_rate();
+   clk = clk_get(NULL, "OSTIMER0");
+   if (IS_ERR(clk)) {
+   pr_err("SA1100/PXA2xx Watchdog Timer: clock not found: %d\n",
+  (int) PTR_ERR(clk));
+   return PTR_ERR(clk);
+   }
+
+   ret = clk_prepare_enable(clk);
+   if (ret) {
+   pr_err("SA1100/PXA2xx Watchdog Timer: clock failed to 
prepare+enable: %d\n",
+  ret);
+   goto err;
+   }
+
+   oscr_freq = clk_get_rate(clk);

/*
 * Read the reset status, and save it for later.  If
@@ -176,11 +192,17 @@ static int __init sa1100dog_init(void)
pr_info("SA1100/PXA2xx Watchdog Timer: timer margin %d sec\n",
margin);
return ret;
+err:
+   clk_disable_unprepare(clk);
+   clk_put(clk);
+   return ret;
 }

 static void __exit sa1100dog_exit(void)
 {
misc_deregister(_miscdev);
+   clk_disable_unprepare(clk);
+   clk_put(clk);
 }

 module_init(sa1100dog_init);





Re: [PATCH 3/4] watchdog: sa11x0/pxa: get rid of get_clock_tick_rate

2016-09-19 Thread Guenter Roeck

On 09/19/2016 12:12 PM, Robert Jarzmik wrote:

The OS timer rate used for the watchdog can now be fetched from the
standard clock API. This will remove the last user of
get_clock_tick_rate() in both pxa and sa11x0 architectures.

Signed-off-by: Robert Jarzmik 


Reviewed-by: Guenter Roeck 
Tested-by: Guenter Roeck 


---
 drivers/watchdog/sa1100_wdt.c | 24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c
index e1d39a1e9628..8965e3f536c3 100644
--- a/drivers/watchdog/sa1100_wdt.c
+++ b/drivers/watchdog/sa1100_wdt.c
@@ -22,6 +22,7 @@

 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -155,12 +156,27 @@ static struct miscdevice sa1100dog_miscdev = {
 };

 static int margin __initdata = 60; /* (secs) Default is 1 minute */
+static struct clk *clk;

 static int __init sa1100dog_init(void)
 {
int ret;

-   oscr_freq = get_clock_tick_rate();
+   clk = clk_get(NULL, "OSTIMER0");
+   if (IS_ERR(clk)) {
+   pr_err("SA1100/PXA2xx Watchdog Timer: clock not found: %d\n",
+  (int) PTR_ERR(clk));
+   return PTR_ERR(clk);
+   }
+
+   ret = clk_prepare_enable(clk);
+   if (ret) {
+   pr_err("SA1100/PXA2xx Watchdog Timer: clock failed to 
prepare+enable: %d\n",
+  ret);
+   goto err;
+   }
+
+   oscr_freq = clk_get_rate(clk);

/*
 * Read the reset status, and save it for later.  If
@@ -176,11 +192,17 @@ static int __init sa1100dog_init(void)
pr_info("SA1100/PXA2xx Watchdog Timer: timer margin %d sec\n",
margin);
return ret;
+err:
+   clk_disable_unprepare(clk);
+   clk_put(clk);
+   return ret;
 }

 static void __exit sa1100dog_exit(void)
 {
misc_deregister(_miscdev);
+   clk_disable_unprepare(clk);
+   clk_put(clk);
 }

 module_init(sa1100dog_init);





[patch v4] x86/platform/mellanox: introduce support for Mellanox systems platform

2016-09-19 Thread vadimp
From: Vadim Pasternak 

Enable system support for the Mellanox Technologies platform, which
provides support for the next Mellanox basic systems: "msx6710",
"msx6720", "msb7700", "msn2700", "msx1410", "msn2410", "msb7800",
"msn2740", "msn2100" and also various number of derivative systems from
the above basic types.

The Kconfig currently controlling compilation of this code is:
arch/x86/platform:config MLX_PLATFORM
arch/x86/platform:  tristate "Mellanox Technologies platform support"

Signed-off-by: Vadim Pasternak 
---
v1->v2:
 Comments pointed out by Greg:
  - kick out all PCI related code;
v2->v3
 Comments pointed out by Guenter:
  - remove not directly related depends and selects for Kconfig;
  - use single dimensional array for mlxplat_msn21xx_channels;
  - define mlxplat_mux_data and mlxplat_priv as static;
  - fix line split;
  - remove error messages for memory allocation failure;
  - change module alias to dmi;
v3->v4
 Comments pointed out by Thomas:
  - remove dependencies from Kconfig for DMI and I2C.
---
 MAINTAINERS   |   6 +
 arch/x86/Kconfig  |  12 ++
 arch/x86/platform/Makefile|   1 +
 arch/x86/platform/mellanox/Makefile   |   1 +
 arch/x86/platform/mellanox/mlx-platform.c | 268 ++
 5 files changed, 288 insertions(+)
 create mode 100644 arch/x86/platform/mellanox/Makefile
 create mode 100644 arch/x86/platform/mellanox/mlx-platform.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 4705c94..98ced39 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7664,6 +7664,12 @@ W:   http://www.mellanox.com
 Q: http://patchwork.ozlabs.org/project/netdev/list/
 F: drivers/net/ethernet/mellanox/mlxsw/
 
+MELLANOX PLATFORM DRIVER
+M:  Vadim Pasternak 
+L:  platform-driver-...@vger.kernel.org
+S:  Supported
+F:  arch/x86/platform/mellanox/mlx-platform.c
+
 SOFT-ROCE DRIVER (rxe)
 M: Moni Shoua 
 L: linux-r...@vger.kernel.org
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c580d8c..a6df619 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -550,6 +550,18 @@ config X86_INTEL_QUARK
  Say Y here if you have a Quark based system such as the Arduino
  compatible Intel Galileo.
 
+config MLX_PLATFORM
+   tristate "Mellanox Technologies platform support"
+   depends on X86_64
+   depends on X86_EXTENDED_PLATFORM
+   ---help---
+ This option enables system support for the Mellanox Technologies
+ platform.
+
+ Say Y here if you are building a kernel for Mellanox system.
+
+ Otherwise, say N.
+
 config X86_INTEL_LPSS
bool "Intel Low Power Subsystem Support"
depends on X86 && ACPI
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 184842e..3c3c19e 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -8,6 +8,7 @@ obj-y   += iris/
 obj-y  += intel/
 obj-y  += intel-mid/
 obj-y  += intel-quark/
+obj-y  += mellanox/
 obj-y  += olpc/
 obj-y  += scx200/
 obj-y  += sfi/
diff --git a/arch/x86/platform/mellanox/Makefile 
b/arch/x86/platform/mellanox/Makefile
new file mode 100644
index 000..f43c931
--- /dev/null
+++ b/arch/x86/platform/mellanox/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MLX_PLATFORM) += mlx-platform.o
diff --git a/arch/x86/platform/mellanox/mlx-platform.c 
b/arch/x86/platform/mellanox/mlx-platform.c
new file mode 100644
index 000..cdc44a1
--- /dev/null
+++ b/arch/x86/platform/mellanox/mlx-platform.c
@@ -0,0 +1,268 @@
+/*
+ * arch/x86/platform/mellanox/mlx-platform.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *contributors may be used to endorse or promote products derived from
+ *this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT 

[patch v4] x86/platform/mellanox: introduce support for Mellanox systems platform

2016-09-19 Thread vadimp
From: Vadim Pasternak 

Enable system support for the Mellanox Technologies platform, which
provides support for the next Mellanox basic systems: "msx6710",
"msx6720", "msb7700", "msn2700", "msx1410", "msn2410", "msb7800",
"msn2740", "msn2100" and also various number of derivative systems from
the above basic types.

The Kconfig currently controlling compilation of this code is:
arch/x86/platform:config MLX_PLATFORM
arch/x86/platform:  tristate "Mellanox Technologies platform support"

Signed-off-by: Vadim Pasternak 
---
v1->v2:
 Comments pointed out by Greg:
  - kick out all PCI related code;
v2->v3
 Comments pointed out by Guenter:
  - remove not directly related depends and selects for Kconfig;
  - use single dimensional array for mlxplat_msn21xx_channels;
  - define mlxplat_mux_data and mlxplat_priv as static;
  - fix line split;
  - remove error messages for memory allocation failure;
  - change module alias to dmi;
v3->v4
 Comments pointed out by Thomas:
  - remove dependencies from Kconfig for DMI and I2C.
---
 MAINTAINERS   |   6 +
 arch/x86/Kconfig  |  12 ++
 arch/x86/platform/Makefile|   1 +
 arch/x86/platform/mellanox/Makefile   |   1 +
 arch/x86/platform/mellanox/mlx-platform.c | 268 ++
 5 files changed, 288 insertions(+)
 create mode 100644 arch/x86/platform/mellanox/Makefile
 create mode 100644 arch/x86/platform/mellanox/mlx-platform.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 4705c94..98ced39 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7664,6 +7664,12 @@ W:   http://www.mellanox.com
 Q: http://patchwork.ozlabs.org/project/netdev/list/
 F: drivers/net/ethernet/mellanox/mlxsw/
 
+MELLANOX PLATFORM DRIVER
+M:  Vadim Pasternak 
+L:  platform-driver-...@vger.kernel.org
+S:  Supported
+F:  arch/x86/platform/mellanox/mlx-platform.c
+
 SOFT-ROCE DRIVER (rxe)
 M: Moni Shoua 
 L: linux-r...@vger.kernel.org
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c580d8c..a6df619 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -550,6 +550,18 @@ config X86_INTEL_QUARK
  Say Y here if you have a Quark based system such as the Arduino
  compatible Intel Galileo.
 
+config MLX_PLATFORM
+   tristate "Mellanox Technologies platform support"
+   depends on X86_64
+   depends on X86_EXTENDED_PLATFORM
+   ---help---
+ This option enables system support for the Mellanox Technologies
+ platform.
+
+ Say Y here if you are building a kernel for Mellanox system.
+
+ Otherwise, say N.
+
 config X86_INTEL_LPSS
bool "Intel Low Power Subsystem Support"
depends on X86 && ACPI
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile
index 184842e..3c3c19e 100644
--- a/arch/x86/platform/Makefile
+++ b/arch/x86/platform/Makefile
@@ -8,6 +8,7 @@ obj-y   += iris/
 obj-y  += intel/
 obj-y  += intel-mid/
 obj-y  += intel-quark/
+obj-y  += mellanox/
 obj-y  += olpc/
 obj-y  += scx200/
 obj-y  += sfi/
diff --git a/arch/x86/platform/mellanox/Makefile 
b/arch/x86/platform/mellanox/Makefile
new file mode 100644
index 000..f43c931
--- /dev/null
+++ b/arch/x86/platform/mellanox/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_MLX_PLATFORM) += mlx-platform.o
diff --git a/arch/x86/platform/mellanox/mlx-platform.c 
b/arch/x86/platform/mellanox/mlx-platform.c
new file mode 100644
index 000..cdc44a1
--- /dev/null
+++ b/arch/x86/platform/mellanox/mlx-platform.c
@@ -0,0 +1,268 @@
+/*
+ * arch/x86/platform/mellanox/mlx-platform.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *contributors may be used to endorse or promote products derived from
+ *this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 

Re: [PATCH] drm/amdgpu: remove unused functions

2016-09-19 Thread Alex Deucher
On Mon, Sep 19, 2016 at 6:02 AM, Christian König
 wrote:
> Am 18.09.2016 um 16:13 schrieb Baoyou Xie:
>>
>> We get 2 warnings when building kernel with W=1:
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c:146:5: warning: no previous
>> prototype for 'pool_to_domain' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/cz_smc.c:104:5: warning: no previous prototype
>> for 'cz_send_msg_to_smc_with_parameter_async' [-Wmissing-prototypes]
>>
>> In fact, both functions are called by no one and not exported,
>> so this patch removes them.
>>
>> Signed-off-by: Baoyou Xie 
>
>
> Reviewed-by: Christian König 
>
>

Applied.  thanks!

Alex

>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 
>>   drivers/gpu/drm/amd/amdgpu/cz_smc.c| 7 ---
>>   2 files changed, 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> index d080d08..dba8a5b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> @@ -143,14 +143,6 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
>> return r;
>>   }
>>   -u32 pool_to_domain(enum kgd_memory_pool p)
>> -{
>> -   switch (p) {
>> -   case KGD_POOL_FRAMEBUFFER: return AMDGPU_GEM_DOMAIN_VRAM;
>> -   default: return AMDGPU_GEM_DOMAIN_GTT;
>> -   }
>> -}
>> -
>>   int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
>> void **mem_obj, uint64_t *gpu_addr,
>> void **cpu_ptr)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/cz_smc.c
>> b/drivers/gpu/drm/amd/amdgpu/cz_smc.c
>> index 69ac373c4..db67e0c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/cz_smc.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/cz_smc.c
>> @@ -101,13 +101,6 @@ int cz_send_msg_to_smc(struct amdgpu_device *adev,
>> u16 msg)
>> return 0;
>>   }
>>   -int cz_send_msg_to_smc_with_parameter_async(struct amdgpu_device *adev,
>> -   u16 msg, u32 parameter)
>> -{
>> -   WREG32(mmSMU_MP1_SRBM2P_ARG_0, parameter);
>> -   return cz_send_msg_to_smc_async(adev, msg);
>> -}
>> -
>>   int cz_send_msg_to_smc_with_parameter(struct amdgpu_device *adev,
>> u16 msg, u32 parameter)
>>   {
>
>
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/amdgpu: remove unused functions

2016-09-19 Thread Alex Deucher
On Mon, Sep 19, 2016 at 6:02 AM, Christian König
 wrote:
> Am 18.09.2016 um 16:13 schrieb Baoyou Xie:
>>
>> We get 2 warnings when building kernel with W=1:
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c:146:5: warning: no previous
>> prototype for 'pool_to_domain' [-Wmissing-prototypes]
>> drivers/gpu/drm/amd/amdgpu/cz_smc.c:104:5: warning: no previous prototype
>> for 'cz_send_msg_to_smc_with_parameter_async' [-Wmissing-prototypes]
>>
>> In fact, both functions are called by no one and not exported,
>> so this patch removes them.
>>
>> Signed-off-by: Baoyou Xie 
>
>
> Reviewed-by: Christian König 
>
>

Applied.  thanks!

Alex

>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 8 
>>   drivers/gpu/drm/amd/amdgpu/cz_smc.c| 7 ---
>>   2 files changed, 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> index d080d08..dba8a5b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> @@ -143,14 +143,6 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
>> return r;
>>   }
>>   -u32 pool_to_domain(enum kgd_memory_pool p)
>> -{
>> -   switch (p) {
>> -   case KGD_POOL_FRAMEBUFFER: return AMDGPU_GEM_DOMAIN_VRAM;
>> -   default: return AMDGPU_GEM_DOMAIN_GTT;
>> -   }
>> -}
>> -
>>   int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
>> void **mem_obj, uint64_t *gpu_addr,
>> void **cpu_ptr)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/cz_smc.c
>> b/drivers/gpu/drm/amd/amdgpu/cz_smc.c
>> index 69ac373c4..db67e0c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/cz_smc.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/cz_smc.c
>> @@ -101,13 +101,6 @@ int cz_send_msg_to_smc(struct amdgpu_device *adev,
>> u16 msg)
>> return 0;
>>   }
>>   -int cz_send_msg_to_smc_with_parameter_async(struct amdgpu_device *adev,
>> -   u16 msg, u32 parameter)
>> -{
>> -   WREG32(mmSMU_MP1_SRBM2P_ARG_0, parameter);
>> -   return cz_send_msg_to_smc_async(adev, msg);
>> -}
>> -
>>   int cz_send_msg_to_smc_with_parameter(struct amdgpu_device *adev,
>> u16 msg, u32 parameter)
>>   {
>
>
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/amdgpu: amend amdgpu_gfx_parse_disable_cu() declaration

2016-09-19 Thread Alex Deucher
On Sun, Sep 18, 2016 at 9:37 AM, Baoyou Xie  wrote:
> In amdgpu_gfx.h, the declaration of amdgpu_gfx_parse_disable_cu()
> is incorrect.
>
> Signed-off-by: Baoyou Xie 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 5 -
>  2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 01a42b6..8575039 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -82,7 +82,9 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, 
> uint32_t reg)
>   * The bitmask of CUs to be disabled in the shader array determined by se and
>   * sh is stored in mask[se * max_sh + sh].
>   */
> -void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned 
> max_sh)
> +void amdgpu_gfx_parse_disable_cu(unsigned int *mask,
> +   unsigned int max_se,
> +   unsigned int max_sh)

For consistency with the rest of the driver can you keep the other
parameters as just unsigned?

Alex

>  {
> unsigned se, sh, cu;
> const char *p;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 51321e1..0b9ad4f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -27,6 +27,9 @@
>  int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
>  void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
>
> -unsigned amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, 
> unsigned max_sh);
> +void
> +amdgpu_gfx_parse_disable_cu(unsigned int *mask,
> +  unsigned int max_se,
> +  unsigned int max_sh);
>
>  #endif
> --
> 2.7.4
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [PATCH] drm/amdgpu: amend amdgpu_gfx_parse_disable_cu() declaration

2016-09-19 Thread Alex Deucher
On Sun, Sep 18, 2016 at 9:37 AM, Baoyou Xie  wrote:
> In amdgpu_gfx.h, the declaration of amdgpu_gfx_parse_disable_cu()
> is incorrect.
>
> Signed-off-by: Baoyou Xie 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 +++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 5 -
>  2 files changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 01a42b6..8575039 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -82,7 +82,9 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, 
> uint32_t reg)
>   * The bitmask of CUs to be disabled in the shader array determined by se and
>   * sh is stored in mask[se * max_sh + sh].
>   */
> -void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned 
> max_sh)
> +void amdgpu_gfx_parse_disable_cu(unsigned int *mask,
> +   unsigned int max_se,
> +   unsigned int max_sh)

For consistency with the rest of the driver can you keep the other
parameters as just unsigned?

Alex

>  {
> unsigned se, sh, cu;
> const char *p;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> index 51321e1..0b9ad4f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> @@ -27,6 +27,9 @@
>  int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
>  void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
>
> -unsigned amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, 
> unsigned max_sh);
> +void
> +amdgpu_gfx_parse_disable_cu(unsigned int *mask,
> +  unsigned int max_se,
> +  unsigned int max_sh);
>
>  #endif
> --
> 2.7.4
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: [RFC] Arm64 boot fail with numa enable in BIOS

2016-09-19 Thread Yisheng Xie


On 2016/9/20 10:01, Ming Lei wrote:
> On Mon, Sep 19, 2016 at 9:05 PM, Yisheng Xie  wrote:
>> hi all,
>> When I enable NUMA in BIOS for arm64, it failed to boot on 
>> v4.8-rc4-162-g071e31e.
>> For the crash log, it seems caused by error number of cpumask.
>> Any ideas about it?
> 
> When I played v4.7 on ARM64 with NUMA, I saw the same issue[1] too,
> but it can be avoided by reverting e9d867a(sched: Allow per-cpu kernel
> threads to run on online && !active).
> 
> But with v4.8-rc6, looks the issue can't be observed any more, so I guess
> it has been fixed with some recent patch.
> 
> 
> [1] https://lkml.org/lkml/2016/8/8/74
> 
> Thanks,
> 
Hi Ming,
Thanks for this info.
Do you use the same config as me? I have tried on 4.8.0-rc6-00331-gb01cf67
also have the same problem.

Thanks
Yisheng Xie

>>
>> Thanks.
>>
>> The related config and detail dmesg can be seen in the attachment.
>>
>> --- crash messages ---
>> [1.279155] [ cut here ]
>> [1.537146] WARNING: CPU: 16 PID: 103 at ./include/linux/cpumask.h:121 
>> try_to_wake_up+0x298/0x300
>> [1.546112] Modules linked in:
>> [1.549190]
>> [1.550687] CPU: 16 PID: 103 Comm: cpuhp/16 Tainted: GW   
>> 4.8.0-rc4-00163-g803ea3a #21
>> [1.559741] Hardware name: Hisilicon Hi1616 Evaluation Board (DT)
>> [1.565896] task: 8013e9678000 task.stack: 8013e9674000
>> [1.571874] PC is at try_to_wake_up+0x298/0x300
>> [1.576446] LR is at try_to_wake_up+0x278/0x300
>> [1.581019] pc : [] lr : [] pstate: 
>> 20c5
>> [1.588490] sp : 8013e9677b90
>> [1.591832] x29: 8013e9677b90 x28: 8413eb81a4b0
>> [1.597196] x27: 008c x26: 08d6e840
>> [1.602561] x25: 0004 x24: 8013e96e82e0
>> [1.607925] x23: 0040 x22: 00c0
>> [1.613289] x21: 8013e96e868c x20: 
>> [1.618653] x19: 8013e96e8000 x18: 
>> [1.624018] x17:  x16: 03010066
>> [1.629381] x15: 08ca8000 x14: 0003
>> [1.634745] x13: 0026 x12: 0009
>> [1.640109] x11: 0009 x10: 
>> [1.645472] x9 :  x8 : 0014
>> [1.650837] x7 : 8013e9452e00 x6 : 
>> [1.656200] x5 :  x4 : 
>> [1.661565] x3 :  x2 : 0040
>> [1.666929] x1 : 0001 x0 : 08d63df9
>> [1.672293]
>> [1.673788] ---[ end trace b58e70f3295a8cd8 ]---
>> [1.678448] Call trace:
>> [1.680911] Exception stack(0x8013e96779c0 to 0x8013e9677af0)
>> [1.687417] 79c0: 8013e96e8000 0001 8013e9677b90 
>> 080df66c
>> [1.695329] 79e0:  0808e1f4  
>> 8013e9d30c80
>> [1.703242] 7a00: 8013e9677a20 0882b6f4 8013e9677a60 
>> 080dd384
>> [1.711153] 7a20:  8013e9677b00 08cbaa00 
>> 08d6e000
>> [1.719065] 7a40:   0001 
>> 0080
>> [1.726977] 7a60: 08d63df9 0001 0040 
>> 
>> [1.734889] 7a80:    
>> 8013e9452e00
>> [1.742801] 7aa0: 0014   
>> 0009
>> [1.750713] 7ac0: 0009 0026 0003 
>> 08ca8000
>> [1.758624] 7ae0: 03010066 
>> [1.763548] [] try_to_wake_up+0x298/0x300
>> [1.769175] [] wake_up_process+0x14/0x1c
>> [1.774716] [] create_worker+0x108/0x194
>> [1.780255] [] alloc_unbound_pwq+0x1e4/0x398
>> [1.786146] [] wq_update_unbound_numa+0xdc/0x190
>> [1.792389] [] workqueue_online_cpu+0x254/0x2a8
>> [1.798545] [] cpuhp_up_callbacks+0x54/0x100
>> [1.804436] [] cpuhp_thread_fun+0x12c/0x13c
>> [1.810240] [] smpboot_thread_fn+0x1a8/0x1cc
>> [1.816130] [] kthread+0xd4/0xe8
>> [1.820967] [] ret_from_fork+0x10/0x40
>> [1.826334] Unable to handle kernel paging request at virtual address 
>> fffe841404c71524
>> [1.834333] pgd = 08dae000
>> [1.837762] [fffe841404c71524] *pgd=0413fbfee003, 
>> *pud=
>> [1.844797] Internal error: Oops: 9604 [#1] SMP
>> [1.849720] Modules linked in:
>> [1.852799] CPU: 16 PID: 103 Comm: cpuhp/16 Tainted: GW   
>> 4.8.0-rc4-00163-g803ea3a #21
>> [1.861853] Hardware name: Hisilicon Hi1616 Evaluation Board (DT)
>> [1.868007] task: 8013e9678000 task.stack: 8013e9674000
>> [1.873985] PC is at try_to_wake_up+0x148/0x300
>> [1.878557] LR is at try_to_wake_up+0x11c/0x300
>> [1.883129] pc : [] lr : [] pstate: 
>> 60c5
>> [1.890602] sp : 8013e9677b90
>> [1.893943] x29: 

Re: [RFC] Arm64 boot fail with numa enable in BIOS

2016-09-19 Thread Yisheng Xie


On 2016/9/20 10:01, Ming Lei wrote:
> On Mon, Sep 19, 2016 at 9:05 PM, Yisheng Xie  wrote:
>> hi all,
>> When I enable NUMA in BIOS for arm64, it failed to boot on 
>> v4.8-rc4-162-g071e31e.
>> For the crash log, it seems caused by error number of cpumask.
>> Any ideas about it?
> 
> When I played v4.7 on ARM64 with NUMA, I saw the same issue[1] too,
> but it can be avoided by reverting e9d867a(sched: Allow per-cpu kernel
> threads to run on online && !active).
> 
> But with v4.8-rc6, looks the issue can't be observed any more, so I guess
> it has been fixed with some recent patch.
> 
> 
> [1] https://lkml.org/lkml/2016/8/8/74
> 
> Thanks,
> 
Hi Ming,
Thanks for this info.
Do you use the same config as me? I have tried on 4.8.0-rc6-00331-gb01cf67
also have the same problem.

Thanks
Yisheng Xie

>>
>> Thanks.
>>
>> The related config and detail dmesg can be seen in the attachment.
>>
>> --- crash messages ---
>> [1.279155] [ cut here ]
>> [1.537146] WARNING: CPU: 16 PID: 103 at ./include/linux/cpumask.h:121 
>> try_to_wake_up+0x298/0x300
>> [1.546112] Modules linked in:
>> [1.549190]
>> [1.550687] CPU: 16 PID: 103 Comm: cpuhp/16 Tainted: GW   
>> 4.8.0-rc4-00163-g803ea3a #21
>> [1.559741] Hardware name: Hisilicon Hi1616 Evaluation Board (DT)
>> [1.565896] task: 8013e9678000 task.stack: 8013e9674000
>> [1.571874] PC is at try_to_wake_up+0x298/0x300
>> [1.576446] LR is at try_to_wake_up+0x278/0x300
>> [1.581019] pc : [] lr : [] pstate: 
>> 20c5
>> [1.588490] sp : 8013e9677b90
>> [1.591832] x29: 8013e9677b90 x28: 8413eb81a4b0
>> [1.597196] x27: 008c x26: 08d6e840
>> [1.602561] x25: 0004 x24: 8013e96e82e0
>> [1.607925] x23: 0040 x22: 00c0
>> [1.613289] x21: 8013e96e868c x20: 
>> [1.618653] x19: 8013e96e8000 x18: 
>> [1.624018] x17:  x16: 03010066
>> [1.629381] x15: 08ca8000 x14: 0003
>> [1.634745] x13: 0026 x12: 0009
>> [1.640109] x11: 0009 x10: 
>> [1.645472] x9 :  x8 : 0014
>> [1.650837] x7 : 8013e9452e00 x6 : 
>> [1.656200] x5 :  x4 : 
>> [1.661565] x3 :  x2 : 0040
>> [1.666929] x1 : 0001 x0 : 08d63df9
>> [1.672293]
>> [1.673788] ---[ end trace b58e70f3295a8cd8 ]---
>> [1.678448] Call trace:
>> [1.680911] Exception stack(0x8013e96779c0 to 0x8013e9677af0)
>> [1.687417] 79c0: 8013e96e8000 0001 8013e9677b90 
>> 080df66c
>> [1.695329] 79e0:  0808e1f4  
>> 8013e9d30c80
>> [1.703242] 7a00: 8013e9677a20 0882b6f4 8013e9677a60 
>> 080dd384
>> [1.711153] 7a20:  8013e9677b00 08cbaa00 
>> 08d6e000
>> [1.719065] 7a40:   0001 
>> 0080
>> [1.726977] 7a60: 08d63df9 0001 0040 
>> 
>> [1.734889] 7a80:    
>> 8013e9452e00
>> [1.742801] 7aa0: 0014   
>> 0009
>> [1.750713] 7ac0: 0009 0026 0003 
>> 08ca8000
>> [1.758624] 7ae0: 03010066 
>> [1.763548] [] try_to_wake_up+0x298/0x300
>> [1.769175] [] wake_up_process+0x14/0x1c
>> [1.774716] [] create_worker+0x108/0x194
>> [1.780255] [] alloc_unbound_pwq+0x1e4/0x398
>> [1.786146] [] wq_update_unbound_numa+0xdc/0x190
>> [1.792389] [] workqueue_online_cpu+0x254/0x2a8
>> [1.798545] [] cpuhp_up_callbacks+0x54/0x100
>> [1.804436] [] cpuhp_thread_fun+0x12c/0x13c
>> [1.810240] [] smpboot_thread_fn+0x1a8/0x1cc
>> [1.816130] [] kthread+0xd4/0xe8
>> [1.820967] [] ret_from_fork+0x10/0x40
>> [1.826334] Unable to handle kernel paging request at virtual address 
>> fffe841404c71524
>> [1.834333] pgd = 08dae000
>> [1.837762] [fffe841404c71524] *pgd=0413fbfee003, 
>> *pud=
>> [1.844797] Internal error: Oops: 9604 [#1] SMP
>> [1.849720] Modules linked in:
>> [1.852799] CPU: 16 PID: 103 Comm: cpuhp/16 Tainted: GW   
>> 4.8.0-rc4-00163-g803ea3a #21
>> [1.861853] Hardware name: Hisilicon Hi1616 Evaluation Board (DT)
>> [1.868007] task: 8013e9678000 task.stack: 8013e9674000
>> [1.873985] PC is at try_to_wake_up+0x148/0x300
>> [1.878557] LR is at try_to_wake_up+0x11c/0x300
>> [1.883129] pc : [] lr : [] pstate: 
>> 60c5
>> [1.890602] sp : 8013e9677b90
>> [1.893943] x29: 8013e9677b90 x28: 

[PATCH] nfs: cover ->migratepage with CONFIG_MIGRATION

2016-09-19 Thread Chao Yu
It will be more clean to use CONFIG_MIGRATION to cover nfs' private
.migratepage in nfs_file_aops like we do in other part of nfs
operations.

Signed-off-by: Chao Yu 
---
 fs/nfs/file.c | 2 ++
 fs/nfs/internal.h | 8 
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7d62097..6cfb83e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -543,7 +543,9 @@ const struct address_space_operations nfs_file_aops = {
.invalidatepage = nfs_invalidate_page,
.releasepage = nfs_release_page,
.direct_IO = nfs_direct_IO,
+#ifdef CONFIG_MIGRATION
.migratepage = nfs_migrate_page,
+#endif
.launder_page = nfs_launder_page,
.is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7ce5e02..0d508f7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -532,14 +532,6 @@ void nfs_clear_pnfs_ds_commit_verifiers(struct 
pnfs_ds_commit_info *cinfo)
 }
 #endif
 
-
-#ifdef CONFIG_MIGRATION
-extern int nfs_migrate_page(struct address_space *,
-   struct page *, struct page *, enum migrate_mode);
-#else
-#define nfs_migrate_page NULL
-#endif
-
 static inline int
 nfs_write_verifier_cmp(const struct nfs_write_verifier *v1,
const struct nfs_write_verifier *v2)
-- 
2.8.2.311.gee88674



[PATCH] nfs: cover ->migratepage with CONFIG_MIGRATION

2016-09-19 Thread Chao Yu
It will be more clean to use CONFIG_MIGRATION to cover nfs' private
.migratepage in nfs_file_aops like we do in other part of nfs
operations.

Signed-off-by: Chao Yu 
---
 fs/nfs/file.c | 2 ++
 fs/nfs/internal.h | 8 
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7d62097..6cfb83e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -543,7 +543,9 @@ const struct address_space_operations nfs_file_aops = {
.invalidatepage = nfs_invalidate_page,
.releasepage = nfs_release_page,
.direct_IO = nfs_direct_IO,
+#ifdef CONFIG_MIGRATION
.migratepage = nfs_migrate_page,
+#endif
.launder_page = nfs_launder_page,
.is_dirty_writeback = nfs_check_dirty_writeback,
.error_remove_page = generic_error_remove_page,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7ce5e02..0d508f7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -532,14 +532,6 @@ void nfs_clear_pnfs_ds_commit_verifiers(struct 
pnfs_ds_commit_info *cinfo)
 }
 #endif
 
-
-#ifdef CONFIG_MIGRATION
-extern int nfs_migrate_page(struct address_space *,
-   struct page *, struct page *, enum migrate_mode);
-#else
-#define nfs_migrate_page NULL
-#endif
-
 static inline int
 nfs_write_verifier_cmp(const struct nfs_write_verifier *v1,
const struct nfs_write_verifier *v2)
-- 
2.8.2.311.gee88674



[PATCH v7 8/8] ARM: dts: imx6q-evi: Fix onboard hub reset line

2016-09-19 Thread Peter Chen
From: Joshua Clayton 

Previously the onboard hub was made to work by treating its
reset gpio as a regulator enable.
Get rid of that kludge now that pwseq has added reset gpio support
Move pin muxing the hub reset pin into the usbh1 group

Signed-off-by: Joshua Clayton 
Signed-off-by: Peter Chen 
---
 arch/arm/boot/dts/imx6q-evi.dts | 25 +++--
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/arm/boot/dts/imx6q-evi.dts b/arch/arm/boot/dts/imx6q-evi.dts
index 4fa5601..49c6f61 100644
--- a/arch/arm/boot/dts/imx6q-evi.dts
+++ b/arch/arm/boot/dts/imx6q-evi.dts
@@ -54,18 +54,6 @@
reg = <0x1000 0x4000>;
};
 
-   reg_usbh1_vbus: regulator-usbhubreset {
-   compatible = "regulator-fixed";
-   regulator-name = "usbh1_vbus";
-   regulator-min-microvolt = <500>;
-   regulator-max-microvolt = <500>;
-   enable-active-high;
-   startup-delay-us = <2>;
-   pinctrl-names = "default";
-   pinctrl-0 = <_usbh1_hubreset>;
-   gpio = < 12 GPIO_ACTIVE_HIGH>;
-   };
-
reg_usb_otg_vbus: regulator-usbotgvbus {
compatible = "regulator-fixed";
regulator-name = "usb_otg_vbus";
@@ -204,12 +192,18 @@
 };
 
  {
-   vbus-supply = <_usbh1_vbus>;
pinctrl-names = "default";
pinctrl-0 = <_usbh1>;
dr_mode = "host";
disable-over-current;
status = "okay";
+
+   usb2415host: hub@1 {
+   compatible = "usb424,2513";
+   reg = <1>;
+   reset-gpios = < 12 GPIO_ACTIVE_LOW>;
+   reset-duration-us = <3000>;
+   };
 };
 
  {
@@ -467,11 +461,6 @@
MX6QDL_PAD_GPIO_3__USB_H1_OC 0x1b0b0
/* usbh1_b OC */
MX6QDL_PAD_GPIO_0__GPIO1_IO00 0x1b0b0
-   >;
-   };
-
-   pinctrl_usbh1_hubreset: usbh1hubresetgrp {
-   fsl,pins = <
MX6QDL_PAD_GPIO_17__GPIO7_IO12 0x1b0b0
>;
};
-- 
2.7.4



[PATCH v7 8/8] ARM: dts: imx6q-evi: Fix onboard hub reset line

2016-09-19 Thread Peter Chen
From: Joshua Clayton 

Previously the onboard hub was made to work by treating its
reset gpio as a regulator enable.
Get rid of that kludge now that pwseq has added reset gpio support
Move pin muxing the hub reset pin into the usbh1 group

Signed-off-by: Joshua Clayton 
Signed-off-by: Peter Chen 
---
 arch/arm/boot/dts/imx6q-evi.dts | 25 +++--
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/arm/boot/dts/imx6q-evi.dts b/arch/arm/boot/dts/imx6q-evi.dts
index 4fa5601..49c6f61 100644
--- a/arch/arm/boot/dts/imx6q-evi.dts
+++ b/arch/arm/boot/dts/imx6q-evi.dts
@@ -54,18 +54,6 @@
reg = <0x1000 0x4000>;
};
 
-   reg_usbh1_vbus: regulator-usbhubreset {
-   compatible = "regulator-fixed";
-   regulator-name = "usbh1_vbus";
-   regulator-min-microvolt = <500>;
-   regulator-max-microvolt = <500>;
-   enable-active-high;
-   startup-delay-us = <2>;
-   pinctrl-names = "default";
-   pinctrl-0 = <_usbh1_hubreset>;
-   gpio = < 12 GPIO_ACTIVE_HIGH>;
-   };
-
reg_usb_otg_vbus: regulator-usbotgvbus {
compatible = "regulator-fixed";
regulator-name = "usb_otg_vbus";
@@ -204,12 +192,18 @@
 };
 
  {
-   vbus-supply = <_usbh1_vbus>;
pinctrl-names = "default";
pinctrl-0 = <_usbh1>;
dr_mode = "host";
disable-over-current;
status = "okay";
+
+   usb2415host: hub@1 {
+   compatible = "usb424,2513";
+   reg = <1>;
+   reset-gpios = < 12 GPIO_ACTIVE_LOW>;
+   reset-duration-us = <3000>;
+   };
 };
 
  {
@@ -467,11 +461,6 @@
MX6QDL_PAD_GPIO_3__USB_H1_OC 0x1b0b0
/* usbh1_b OC */
MX6QDL_PAD_GPIO_0__GPIO1_IO00 0x1b0b0
-   >;
-   };
-
-   pinctrl_usbh1_hubreset: usbh1hubresetgrp {
-   fsl,pins = <
MX6QDL_PAD_GPIO_17__GPIO7_IO12 0x1b0b0
>;
};
-- 
2.7.4



[PATCH v7 6/8] ARM: dts: imx6qdl: Enable usb node children with

2016-09-19 Thread Peter Chen
From: Joshua Clayton 

Give usb nodes #address and #size attributes, so that a child node
representing a permanently connected device such as an onboard hub may
be addressed with a  attribute

Signed-off-by: Joshua Clayton 
Signed-off-by: Peter Chen 
---
 arch/arm/boot/dts/imx6qdl.dtsi | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index b13b0b2..8fc66e1 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -935,6 +935,8 @@
 
usbh1: usb@02184200 {
compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+   #address-cells = <1>;
+   #size-cells = <0>;
reg = <0x02184200 0x200>;
interrupts = <0 40 IRQ_TYPE_LEVEL_HIGH>;
clocks = < IMX6QDL_CLK_USBOH3>;
@@ -949,6 +951,8 @@
 
usbh2: usb@02184400 {
compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+   #address-cells = <1>;
+   #size-cells = <0>;
reg = <0x02184400 0x200>;
interrupts = <0 41 IRQ_TYPE_LEVEL_HIGH>;
clocks = < IMX6QDL_CLK_USBOH3>;
@@ -962,6 +966,8 @@
 
usbh3: usb@02184600 {
compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+   #address-cells = <1>;
+   #size-cells = <0>;
reg = <0x02184600 0x200>;
interrupts = <0 42 IRQ_TYPE_LEVEL_HIGH>;
clocks = < IMX6QDL_CLK_USBOH3>;
-- 
2.7.4



[PATCH v7 6/8] ARM: dts: imx6qdl: Enable usb node children with

2016-09-19 Thread Peter Chen
From: Joshua Clayton 

Give usb nodes #address and #size attributes, so that a child node
representing a permanently connected device such as an onboard hub may
be addressed with a  attribute

Signed-off-by: Joshua Clayton 
Signed-off-by: Peter Chen 
---
 arch/arm/boot/dts/imx6qdl.dtsi | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index b13b0b2..8fc66e1 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -935,6 +935,8 @@
 
usbh1: usb@02184200 {
compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+   #address-cells = <1>;
+   #size-cells = <0>;
reg = <0x02184200 0x200>;
interrupts = <0 40 IRQ_TYPE_LEVEL_HIGH>;
clocks = < IMX6QDL_CLK_USBOH3>;
@@ -949,6 +951,8 @@
 
usbh2: usb@02184400 {
compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+   #address-cells = <1>;
+   #size-cells = <0>;
reg = <0x02184400 0x200>;
interrupts = <0 41 IRQ_TYPE_LEVEL_HIGH>;
clocks = < IMX6QDL_CLK_USBOH3>;
@@ -962,6 +966,8 @@
 
usbh3: usb@02184600 {
compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+   #address-cells = <1>;
+   #size-cells = <0>;
reg = <0x02184600 0x200>;
interrupts = <0 42 IRQ_TYPE_LEVEL_HIGH>;
clocks = < IMX6QDL_CLK_USBOH3>;
-- 
2.7.4



[PATCH v7 5/8] usb: chipidea: let chipidea core device of_node equal's glue layer device of_node

2016-09-19 Thread Peter Chen
From: Peter Chen 

At device tree, we have no device node for chipidea core,
the glue layer's node is the parent node for host and udc
device. But in related driver, the parent device is chipidea
core. So, in order to let the common driver get parent's node,
we let the core's device node equals glue layer device node.

Signed-off-by: Peter Chen 
Tested-by: Maciej S. Szmigiero 
Tested-by Joshua Clayton 
---
 drivers/usb/chipidea/core.c | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 69426e6..6839e19 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -927,6 +927,16 @@ static int ci_hdrc_probe(struct platform_device *pdev)
return -ENODEV;
}
 
+   /*
+* At device tree, we have no device node for chipidea core,
+* the glue layer's node is the parent node for host and udc
+* device. But in related driver, the parent device is chipidea
+* core. So, in order to let the common driver get parent's node,
+* we let the core's device node equals glue layer's node.
+*/
+   if (dev->parent && dev->parent->of_node)
+   dev->of_node = dev->parent->of_node;
+
if (ci->platdata->phy) {
ci->phy = ci->platdata->phy;
} else if (ci->platdata->usb_phy) {
@@ -937,11 +947,15 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 
/* if both generic PHY and USB PHY layers aren't enabled */
if (PTR_ERR(ci->phy) == -ENOSYS &&
-   PTR_ERR(ci->usb_phy) == -ENXIO)
-   return -ENXIO;
+   PTR_ERR(ci->usb_phy) == -ENXIO) {
+   ret = -ENXIO;
+   goto clear_of_node;
+   }
 
-   if (IS_ERR(ci->phy) && IS_ERR(ci->usb_phy))
-   return -EPROBE_DEFER;
+   if (IS_ERR(ci->phy) && IS_ERR(ci->usb_phy)) {
+   ret = -EPROBE_DEFER;
+   goto clear_of_node;
+   }
 
if (IS_ERR(ci->phy))
ci->phy = NULL;
@@ -952,7 +966,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
ret = ci_usb_phy_init(ci);
if (ret) {
dev_err(dev, "unable to init phy: %d\n", ret);
-   return ret;
+   goto clear_of_node;
}
 
ci->hw_bank.phys = res->start;
@@ -1058,6 +1072,8 @@ stop:
ci_role_destroy(ci);
 deinit_phy:
ci_usb_phy_exit(ci);
+clear_of_node:
+   dev->of_node = NULL;
 
return ret;
 }
@@ -1076,6 +1092,7 @@ static int ci_hdrc_remove(struct platform_device *pdev)
ci_extcon_unregister(ci);
ci_role_destroy(ci);
ci_hdrc_enter_lpm(ci, true);
+   ci->dev->of_node = NULL;
ci_usb_phy_exit(ci);
 
return 0;
-- 
2.7.4



[PATCH v7 5/8] usb: chipidea: let chipidea core device of_node equal's glue layer device of_node

2016-09-19 Thread Peter Chen
From: Peter Chen 

At device tree, we have no device node for chipidea core,
the glue layer's node is the parent node for host and udc
device. But in related driver, the parent device is chipidea
core. So, in order to let the common driver get parent's node,
we let the core's device node equals glue layer device node.

Signed-off-by: Peter Chen 
Tested-by: Maciej S. Szmigiero 
Tested-by Joshua Clayton 
---
 drivers/usb/chipidea/core.c | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 69426e6..6839e19 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -927,6 +927,16 @@ static int ci_hdrc_probe(struct platform_device *pdev)
return -ENODEV;
}
 
+   /*
+* At device tree, we have no device node for chipidea core,
+* the glue layer's node is the parent node for host and udc
+* device. But in related driver, the parent device is chipidea
+* core. So, in order to let the common driver get parent's node,
+* we let the core's device node equals glue layer's node.
+*/
+   if (dev->parent && dev->parent->of_node)
+   dev->of_node = dev->parent->of_node;
+
if (ci->platdata->phy) {
ci->phy = ci->platdata->phy;
} else if (ci->platdata->usb_phy) {
@@ -937,11 +947,15 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 
/* if both generic PHY and USB PHY layers aren't enabled */
if (PTR_ERR(ci->phy) == -ENOSYS &&
-   PTR_ERR(ci->usb_phy) == -ENXIO)
-   return -ENXIO;
+   PTR_ERR(ci->usb_phy) == -ENXIO) {
+   ret = -ENXIO;
+   goto clear_of_node;
+   }
 
-   if (IS_ERR(ci->phy) && IS_ERR(ci->usb_phy))
-   return -EPROBE_DEFER;
+   if (IS_ERR(ci->phy) && IS_ERR(ci->usb_phy)) {
+   ret = -EPROBE_DEFER;
+   goto clear_of_node;
+   }
 
if (IS_ERR(ci->phy))
ci->phy = NULL;
@@ -952,7 +966,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
ret = ci_usb_phy_init(ci);
if (ret) {
dev_err(dev, "unable to init phy: %d\n", ret);
-   return ret;
+   goto clear_of_node;
}
 
ci->hw_bank.phys = res->start;
@@ -1058,6 +1072,8 @@ stop:
ci_role_destroy(ci);
 deinit_phy:
ci_usb_phy_exit(ci);
+clear_of_node:
+   dev->of_node = NULL;
 
return ret;
 }
@@ -1076,6 +1092,7 @@ static int ci_hdrc_remove(struct platform_device *pdev)
ci_extcon_unregister(ci);
ci_role_destroy(ci);
ci_hdrc_enter_lpm(ci, true);
+   ci->dev->of_node = NULL;
ci_usb_phy_exit(ci);
 
return 0;
-- 
2.7.4



[PATCH] [linux-next] Fix double word "the the" in Doc/filesystems

2016-09-19 Thread Masanari Iida
This patch fix typos "the the" found in Documentation/filesystems.

Signed-off-by: Masanari Iida 
---
 Documentation/filesystems/autofs4-mount-control.txt | 10 +-
 Documentation/filesystems/directory-locking |  2 +-
 Documentation/filesystems/overlayfs.txt |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/autofs4-mount-control.txt 
b/Documentation/filesystems/autofs4-mount-control.txt
index 50a3e01a36f8..572f7e6612a6 100644
--- a/Documentation/filesystems/autofs4-mount-control.txt
+++ b/Documentation/filesystems/autofs4-mount-control.txt
@@ -386,8 +386,8 @@ variation uses the path and optionally in.type field of 
struct args_ismountpoint
 set to an autofs mount type. The call returns 1 if this is a mount point
 and sets out.devid field to the device number of the mount and out.magic
 field to the relevant super block magic number (described below) or 0 if
-it isn't a mountpoint. In both cases the the device number (as returned
-by new_encode_dev()) is returned in out.devid field.
+it isn't a mountpoint. In both cases the device number (as returned by
+new_encode_dev()) is returned in out.devid field.
 
 If supplied with a file descriptor we're looking for a specific mount,
 not necessarily at the top of the mounted stack. In this case the path
@@ -400,7 +400,7 @@ is one or 0 if it isn't a mountpoint.
 If a path is supplied (and the ioctlfd field is set to -1) then the path
 is looked up and is checked to see if it is the root of a mount. If a
 type is also given we are looking for a particular autofs mount and if
-a match isn't found a fail is returned. If the the located path is the
-root of a mount 1 is returned along with the super magic of the mount
-or 0 otherwise.
+a match isn't found a fail is returned. If the located path is the root
+of a mount 1 is returned along with the super magic of the mount or 0
+otherwise.
 
diff --git a/Documentation/filesystems/directory-locking 
b/Documentation/filesystems/directory-locking
index 4e32cb961e5b..fe38d83bc3de 100644
--- a/Documentation/filesystems/directory-locking
+++ b/Documentation/filesystems/directory-locking
@@ -23,7 +23,7 @@ RENAME_EXCHANGE in flags argument) lock both.  In any case,
 if the target already exists, lock it.  If the source is a non-directory,
 lock it.  If we need to lock both, lock them in inode pointer order.
 Then call the method.  All locks are exclusive.
-NB: we might get away with locking the the source (and target in exchange
+NB: we might get away with locking the source (and target in exchange
 case) shared.
 
 5) link creation.  Locking rules:
diff --git a/Documentation/filesystems/overlayfs.txt 
b/Documentation/filesystems/overlayfs.txt
index 4de8475e3a04..820203d87f63 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -163,7 +163,7 @@ rename or unlink will of course be noticed and handled).
 Multiple lower layers
 -
 
-Multiple lower layers can now be given using the the colon (":") as a
+Multiple lower layers can now be given using the colon (":") as a
 separator character between the directory names.  For example:
 
   mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged
-- 
2.10.0.177.ge510a86



[PATCH] [linux-next] Fix double word "the the" in Doc/filesystems

2016-09-19 Thread Masanari Iida
This patch fix typos "the the" found in Documentation/filesystems.

Signed-off-by: Masanari Iida 
---
 Documentation/filesystems/autofs4-mount-control.txt | 10 +-
 Documentation/filesystems/directory-locking |  2 +-
 Documentation/filesystems/overlayfs.txt |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Documentation/filesystems/autofs4-mount-control.txt 
b/Documentation/filesystems/autofs4-mount-control.txt
index 50a3e01a36f8..572f7e6612a6 100644
--- a/Documentation/filesystems/autofs4-mount-control.txt
+++ b/Documentation/filesystems/autofs4-mount-control.txt
@@ -386,8 +386,8 @@ variation uses the path and optionally in.type field of 
struct args_ismountpoint
 set to an autofs mount type. The call returns 1 if this is a mount point
 and sets out.devid field to the device number of the mount and out.magic
 field to the relevant super block magic number (described below) or 0 if
-it isn't a mountpoint. In both cases the the device number (as returned
-by new_encode_dev()) is returned in out.devid field.
+it isn't a mountpoint. In both cases the device number (as returned by
+new_encode_dev()) is returned in out.devid field.
 
 If supplied with a file descriptor we're looking for a specific mount,
 not necessarily at the top of the mounted stack. In this case the path
@@ -400,7 +400,7 @@ is one or 0 if it isn't a mountpoint.
 If a path is supplied (and the ioctlfd field is set to -1) then the path
 is looked up and is checked to see if it is the root of a mount. If a
 type is also given we are looking for a particular autofs mount and if
-a match isn't found a fail is returned. If the the located path is the
-root of a mount 1 is returned along with the super magic of the mount
-or 0 otherwise.
+a match isn't found a fail is returned. If the located path is the root
+of a mount 1 is returned along with the super magic of the mount or 0
+otherwise.
 
diff --git a/Documentation/filesystems/directory-locking 
b/Documentation/filesystems/directory-locking
index 4e32cb961e5b..fe38d83bc3de 100644
--- a/Documentation/filesystems/directory-locking
+++ b/Documentation/filesystems/directory-locking
@@ -23,7 +23,7 @@ RENAME_EXCHANGE in flags argument) lock both.  In any case,
 if the target already exists, lock it.  If the source is a non-directory,
 lock it.  If we need to lock both, lock them in inode pointer order.
 Then call the method.  All locks are exclusive.
-NB: we might get away with locking the the source (and target in exchange
+NB: we might get away with locking the source (and target in exchange
 case) shared.
 
 5) link creation.  Locking rules:
diff --git a/Documentation/filesystems/overlayfs.txt 
b/Documentation/filesystems/overlayfs.txt
index 4de8475e3a04..820203d87f63 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.txt
@@ -163,7 +163,7 @@ rename or unlink will of course be noticed and handled).
 Multiple lower layers
 -
 
-Multiple lower layers can now be given using the the colon (":") as a
+Multiple lower layers can now be given using the colon (":") as a
 separator character between the directory names.  For example:
 
   mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged
-- 
2.10.0.177.ge510a86



[PATCH v2] power: bq27xxx_battery: allow kernel poll_interval parameter runtime update

2016-09-19 Thread Matt Ranostay
Fix issue with poll_interval being not updated till the previous
interval expired.

Cc: Tony Lindgren 
Cc: Liam Breck 
Signed-off-by: Matt Ranostay 
---

Changes from v1:
 * Change param_get_ushort to param_get_uint
 * Changed param_lock naming to more precise bq27xxx_list_lock

 drivers/power/supply/bq27xxx_battery.c | 38 +-
 include/linux/power/bq27xxx_battery.h  |  1 +
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/bq27xxx_battery.c 
b/drivers/power/supply/bq27xxx_battery.c
index 3f57dd54803a..3b0dbc689d72 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -39,6 +39,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -390,8 +391,35 @@ static struct {
BQ27XXX_PROP(BQ27421, bq27421_battery_props),
 };
 
+static DEFINE_MUTEX(bq27xxx_list_lock);
+static LIST_HEAD(bq27xxx_battery_devices);
+
+static int poll_interval_param_set(const char *val, const struct kernel_param 
*kp)
+{
+   struct bq27xxx_device_info *di;
+   int ret;
+
+   ret = param_set_uint(val, kp);
+   if (ret < 0)
+   return ret;
+
+   mutex_lock(_list_lock);
+   list_for_each_entry(di, _battery_devices, list) {
+   cancel_delayed_work_sync(>work);
+   schedule_delayed_work(>work, 0);
+   }
+   mutex_unlock(_list_lock);
+
+   return ret;
+}
+
+static const struct kernel_param_ops param_ops_poll_interval = {
+   .get = param_get_uint,
+   .set = poll_interval_param_set,
+};
+
 static unsigned int poll_interval = 360;
-module_param(poll_interval, uint, 0644);
+module_param_cb(poll_interval, _ops_poll_interval, _interval, 0644);
 MODULE_PARM_DESC(poll_interval,
 "battery poll interval in seconds - 0 disables polling");
 
@@ -972,6 +1000,10 @@ int bq27xxx_battery_setup(struct bq27xxx_device_info *di)
 
bq27xxx_battery_update(di);
 
+   mutex_lock(_list_lock);
+   list_add(>list, _battery_devices);
+   mutex_unlock(_list_lock);
+
return 0;
 }
 EXPORT_SYMBOL_GPL(bq27xxx_battery_setup);
@@ -990,6 +1022,10 @@ void bq27xxx_battery_teardown(struct bq27xxx_device_info 
*di)
 
power_supply_unregister(di->bat);
 
+   mutex_lock(_list_lock);
+   list_del(>list);
+   mutex_unlock(_list_lock);
+
mutex_destroy(>lock);
 }
 EXPORT_SYMBOL_GPL(bq27xxx_battery_teardown);
diff --git a/include/linux/power/bq27xxx_battery.h 
b/include/linux/power/bq27xxx_battery.h
index b50c0492629d..e30deb046156 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -58,6 +58,7 @@ struct bq27xxx_device_info {
unsigned long last_update;
struct delayed_work work;
struct power_supply *bat;
+   struct list_head list;
struct mutex lock;
u8 *regs;
 };
-- 
2.7.4



[PATCH v2] power: bq27xxx_battery: allow kernel poll_interval parameter runtime update

2016-09-19 Thread Matt Ranostay
Fix issue with poll_interval being not updated till the previous
interval expired.

Cc: Tony Lindgren 
Cc: Liam Breck 
Signed-off-by: Matt Ranostay 
---

Changes from v1:
 * Change param_get_ushort to param_get_uint
 * Changed param_lock naming to more precise bq27xxx_list_lock

 drivers/power/supply/bq27xxx_battery.c | 38 +-
 include/linux/power/bq27xxx_battery.h  |  1 +
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/bq27xxx_battery.c 
b/drivers/power/supply/bq27xxx_battery.c
index 3f57dd54803a..3b0dbc689d72 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -39,6 +39,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -390,8 +391,35 @@ static struct {
BQ27XXX_PROP(BQ27421, bq27421_battery_props),
 };
 
+static DEFINE_MUTEX(bq27xxx_list_lock);
+static LIST_HEAD(bq27xxx_battery_devices);
+
+static int poll_interval_param_set(const char *val, const struct kernel_param 
*kp)
+{
+   struct bq27xxx_device_info *di;
+   int ret;
+
+   ret = param_set_uint(val, kp);
+   if (ret < 0)
+   return ret;
+
+   mutex_lock(_list_lock);
+   list_for_each_entry(di, _battery_devices, list) {
+   cancel_delayed_work_sync(>work);
+   schedule_delayed_work(>work, 0);
+   }
+   mutex_unlock(_list_lock);
+
+   return ret;
+}
+
+static const struct kernel_param_ops param_ops_poll_interval = {
+   .get = param_get_uint,
+   .set = poll_interval_param_set,
+};
+
 static unsigned int poll_interval = 360;
-module_param(poll_interval, uint, 0644);
+module_param_cb(poll_interval, _ops_poll_interval, _interval, 0644);
 MODULE_PARM_DESC(poll_interval,
 "battery poll interval in seconds - 0 disables polling");
 
@@ -972,6 +1000,10 @@ int bq27xxx_battery_setup(struct bq27xxx_device_info *di)
 
bq27xxx_battery_update(di);
 
+   mutex_lock(_list_lock);
+   list_add(>list, _battery_devices);
+   mutex_unlock(_list_lock);
+
return 0;
 }
 EXPORT_SYMBOL_GPL(bq27xxx_battery_setup);
@@ -990,6 +1022,10 @@ void bq27xxx_battery_teardown(struct bq27xxx_device_info 
*di)
 
power_supply_unregister(di->bat);
 
+   mutex_lock(_list_lock);
+   list_del(>list);
+   mutex_unlock(_list_lock);
+
mutex_destroy(>lock);
 }
 EXPORT_SYMBOL_GPL(bq27xxx_battery_teardown);
diff --git a/include/linux/power/bq27xxx_battery.h 
b/include/linux/power/bq27xxx_battery.h
index b50c0492629d..e30deb046156 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -58,6 +58,7 @@ struct bq27xxx_device_info {
unsigned long last_update;
struct delayed_work work;
struct power_supply *bat;
+   struct list_head list;
struct mutex lock;
u8 *regs;
 };
-- 
2.7.4



[PATCH v7 4/8] usb: core: add power sequence handling for USB devices

2016-09-19 Thread Peter Chen
Some hard-wired USB devices need to do power sequence to let the
device work normally, the typical power sequence like: enable USB
PHY clock, toggle reset pin, etc. But current Linux USB driver
lacks of such code to do it, it may cause some hard-wired USB devices
works abnormal or can't be recognized by controller at all.

In this patch, it calls power sequence library APIs to finish
the power sequence events. It will do power on sequence at hub's
probe for all devices under this hub (includes root hub).
At hub_disconnect, it will do power off sequence which is at powered
on list.

Signed-off-by: Peter Chen 
Tested-by Joshua Clayton 
---
 drivers/usb/core/hub.c | 41 ++---
 drivers/usb/core/hub.h |  1 +
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b48dc76..f3de1de 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1695,6 +1696,7 @@ static void hub_disconnect(struct usb_interface *intf)
hub->error = 0;
hub_quiesce(hub, HUB_DISCONNECT);
 
+   of_pwrseq_off_list(>pwrseq_on_list);
mutex_lock(_port_peer_mutex);
 
/* Avoid races with recursively_mark_NOTATTACHED() */
@@ -1722,12 +1724,41 @@ static void hub_disconnect(struct usb_interface *intf)
kref_put(>kref, hub_release);
 }
 
+#ifdef CONFIG_OF
+static int hub_of_pwrseq_on(struct usb_hub *hub)
+{
+   struct device *parent;
+   struct usb_device *hdev = hub->hdev;
+   struct device_node *np;
+   int ret;
+
+   if (hdev->parent)
+   parent = >dev;
+   else
+   parent = bus_to_hcd(hdev->bus)->self.controller;
+
+   for_each_child_of_node(parent->of_node, np) {
+   ret = of_pwrseq_on_list(np, >pwrseq_on_list);
+   if (ret)
+   return ret;
+   }
+
+   return 0;
+}
+#else
+static int hub_of_pwrseq_on(struct usb_hub *hub)
+{
+   return 0;
+}
+#endif
+
 static int hub_probe(struct usb_interface *intf, const struct usb_device_id 
*id)
 {
struct usb_host_interface *desc;
struct usb_endpoint_descriptor *endpoint;
struct usb_device *hdev;
struct usb_hub *hub;
+   int ret = -ENODEV;
 
desc = intf->cur_altsetting;
hdev = interface_to_usbdev(intf);
@@ -1832,6 +1863,7 @@ descriptor_error:
INIT_DELAYED_WORK(>leds, led_work);
INIT_DELAYED_WORK(>init_work, NULL);
INIT_WORK(>events, hub_event);
+   INIT_LIST_HEAD(>pwrseq_on_list);
usb_get_intf(intf);
usb_get_dev(hdev);
 
@@ -1845,11 +1877,14 @@ descriptor_error:
if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND)
hub->quirk_check_port_auto_suspend = 1;
 
-   if (hub_configure(hub, endpoint) >= 0)
-   return 0;
+   if (hub_configure(hub, endpoint) >= 0) {
+   ret = hub_of_pwrseq_on(hub);
+   if (!ret)
+   return 0;
+   }
 
hub_disconnect(intf);
-   return -ENODEV;
+   return ret;
 }
 
 static int
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 34c1a7e..cd86f91 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -78,6 +78,7 @@ struct usb_hub {
struct delayed_work init_work;
struct work_struct  events;
struct usb_port **ports;
+   struct list_headpwrseq_on_list; /* powered pwrseq node list */
 };
 
 /**
-- 
2.7.4



[PATCH v7 7/8] ARM: dts: imx6qdl-udoo.dtsi: fix onboard USB HUB property

2016-09-19 Thread Peter Chen
The current dts describes USB HUB's property at USB controller's
entry, it is improper. The USB HUB should be the child node
under USB controller, and power sequence properties are under
it. Besides, using gpio pinctrl setting for USB2415's reset pin.

Signed-off-by: Peter Chen 
Signed-off-by: Joshua Clayton 
---
 arch/arm/boot/dts/imx6qdl-udoo.dtsi | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi 
b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
index 3bee2f9..87fe31f 100644
--- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
@@ -9,6 +9,8 @@
  *
  */
 
+#include 
+
 / {
aliases {
backlight = 
@@ -58,17 +60,6 @@
#address-cells = <1>;
#size-cells = <0>;
 
-   reg_usb_h1_vbus: regulator@0 {
-   compatible = "regulator-fixed";
-   reg = <0>;
-   regulator-name = "usb_h1_vbus";
-   regulator-min-microvolt = <500>;
-   regulator-max-microvolt = <500>;
-   enable-active-high;
-   startup-delay-us = <2>; /* USB2415 requires a POR of 1 
us minimum */
-   gpio = < 12 0>;
-   };
-
reg_panel: regulator@1 {
compatible = "regulator-fixed";
reg = <1>;
@@ -188,7 +179,7 @@
 
pinctrl_usbh: usbhgrp {
fsl,pins = <
-   MX6QDL_PAD_GPIO_17__GPIO7_IO12 0x8000
+   MX6QDL_PAD_GPIO_17__GPIO7_IO12  0x1b0b0
MX6QDL_PAD_NANDF_CS2__CCM_CLKO2 0x130b0
>;
};
@@ -259,9 +250,16 @@
  {
pinctrl-names = "default";
pinctrl-0 = <_usbh>;
-   vbus-supply = <_usb_h1_vbus>;
-   clocks = < IMX6QDL_CLK_CKO>;
status = "okay";
+
+   usb2415: hub@1 {
+   compatible = "usb424,2514";
+   reg = <1>;
+
+   clocks = < IMX6QDL_CLK_CKO>;
+   reset-gpios = < 12 GPIO_ACTIVE_LOW>;
+   reset-duration-us = <3000>;
+   };
 };
 
  {
-- 
2.7.4



[PATCH v7 4/8] usb: core: add power sequence handling for USB devices

2016-09-19 Thread Peter Chen
Some hard-wired USB devices need to do power sequence to let the
device work normally, the typical power sequence like: enable USB
PHY clock, toggle reset pin, etc. But current Linux USB driver
lacks of such code to do it, it may cause some hard-wired USB devices
works abnormal or can't be recognized by controller at all.

In this patch, it calls power sequence library APIs to finish
the power sequence events. It will do power on sequence at hub's
probe for all devices under this hub (includes root hub).
At hub_disconnect, it will do power off sequence which is at powered
on list.

Signed-off-by: Peter Chen 
Tested-by Joshua Clayton 
---
 drivers/usb/core/hub.c | 41 ++---
 drivers/usb/core/hub.h |  1 +
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b48dc76..f3de1de 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -1695,6 +1696,7 @@ static void hub_disconnect(struct usb_interface *intf)
hub->error = 0;
hub_quiesce(hub, HUB_DISCONNECT);
 
+   of_pwrseq_off_list(>pwrseq_on_list);
mutex_lock(_port_peer_mutex);
 
/* Avoid races with recursively_mark_NOTATTACHED() */
@@ -1722,12 +1724,41 @@ static void hub_disconnect(struct usb_interface *intf)
kref_put(>kref, hub_release);
 }
 
+#ifdef CONFIG_OF
+static int hub_of_pwrseq_on(struct usb_hub *hub)
+{
+   struct device *parent;
+   struct usb_device *hdev = hub->hdev;
+   struct device_node *np;
+   int ret;
+
+   if (hdev->parent)
+   parent = >dev;
+   else
+   parent = bus_to_hcd(hdev->bus)->self.controller;
+
+   for_each_child_of_node(parent->of_node, np) {
+   ret = of_pwrseq_on_list(np, >pwrseq_on_list);
+   if (ret)
+   return ret;
+   }
+
+   return 0;
+}
+#else
+static int hub_of_pwrseq_on(struct usb_hub *hub)
+{
+   return 0;
+}
+#endif
+
 static int hub_probe(struct usb_interface *intf, const struct usb_device_id 
*id)
 {
struct usb_host_interface *desc;
struct usb_endpoint_descriptor *endpoint;
struct usb_device *hdev;
struct usb_hub *hub;
+   int ret = -ENODEV;
 
desc = intf->cur_altsetting;
hdev = interface_to_usbdev(intf);
@@ -1832,6 +1863,7 @@ descriptor_error:
INIT_DELAYED_WORK(>leds, led_work);
INIT_DELAYED_WORK(>init_work, NULL);
INIT_WORK(>events, hub_event);
+   INIT_LIST_HEAD(>pwrseq_on_list);
usb_get_intf(intf);
usb_get_dev(hdev);
 
@@ -1845,11 +1877,14 @@ descriptor_error:
if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND)
hub->quirk_check_port_auto_suspend = 1;
 
-   if (hub_configure(hub, endpoint) >= 0)
-   return 0;
+   if (hub_configure(hub, endpoint) >= 0) {
+   ret = hub_of_pwrseq_on(hub);
+   if (!ret)
+   return 0;
+   }
 
hub_disconnect(intf);
-   return -ENODEV;
+   return ret;
 }
 
 static int
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 34c1a7e..cd86f91 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -78,6 +78,7 @@ struct usb_hub {
struct delayed_work init_work;
struct work_struct  events;
struct usb_port **ports;
+   struct list_headpwrseq_on_list; /* powered pwrseq node list */
 };
 
 /**
-- 
2.7.4



[PATCH v7 7/8] ARM: dts: imx6qdl-udoo.dtsi: fix onboard USB HUB property

2016-09-19 Thread Peter Chen
The current dts describes USB HUB's property at USB controller's
entry, it is improper. The USB HUB should be the child node
under USB controller, and power sequence properties are under
it. Besides, using gpio pinctrl setting for USB2415's reset pin.

Signed-off-by: Peter Chen 
Signed-off-by: Joshua Clayton 
---
 arch/arm/boot/dts/imx6qdl-udoo.dtsi | 26 --
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi 
b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
index 3bee2f9..87fe31f 100644
--- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi
@@ -9,6 +9,8 @@
  *
  */
 
+#include 
+
 / {
aliases {
backlight = 
@@ -58,17 +60,6 @@
#address-cells = <1>;
#size-cells = <0>;
 
-   reg_usb_h1_vbus: regulator@0 {
-   compatible = "regulator-fixed";
-   reg = <0>;
-   regulator-name = "usb_h1_vbus";
-   regulator-min-microvolt = <500>;
-   regulator-max-microvolt = <500>;
-   enable-active-high;
-   startup-delay-us = <2>; /* USB2415 requires a POR of 1 
us minimum */
-   gpio = < 12 0>;
-   };
-
reg_panel: regulator@1 {
compatible = "regulator-fixed";
reg = <1>;
@@ -188,7 +179,7 @@
 
pinctrl_usbh: usbhgrp {
fsl,pins = <
-   MX6QDL_PAD_GPIO_17__GPIO7_IO12 0x8000
+   MX6QDL_PAD_GPIO_17__GPIO7_IO12  0x1b0b0
MX6QDL_PAD_NANDF_CS2__CCM_CLKO2 0x130b0
>;
};
@@ -259,9 +250,16 @@
  {
pinctrl-names = "default";
pinctrl-0 = <_usbh>;
-   vbus-supply = <_usb_h1_vbus>;
-   clocks = < IMX6QDL_CLK_CKO>;
status = "okay";
+
+   usb2415: hub@1 {
+   compatible = "usb424,2514";
+   reg = <1>;
+
+   clocks = < IMX6QDL_CLK_CKO>;
+   reset-gpios = < 12 GPIO_ACTIVE_LOW>;
+   reset-duration-us = <3000>;
+   };
 };
 
  {
-- 
2.7.4



[PATCH v7 2/8] power: add power sequence library

2016-09-19 Thread Peter Chen
We have an well-known problem that the device needs to do some power
sequence before it can be recognized by related host, the typical
example like hard-wired mmc devices and usb devices.

This power sequence is hard to be described at device tree and handled by
related host driver, so we have created a common power sequence
library to cover this requirement. The core code has supplied
some common helpers for host driver, and individual power sequence
libraries handle kinds of power sequence for devices.

pwrseq_generic is intended for general purpose of power sequence, which
handles gpios and clocks currently, and can cover regulator and pinctrl
in future. The host driver just needs to call of_pwrseq_on/of_pwrseq_off
if only one power sequence is needed, else call of_pwrseq_on_list
/of_pwrseq_off_list instead (eg, USB hub driver).

Signed-off-by: Peter Chen 
Tested-by Joshua Clayton 
Reviewed-by: Matthias Kaehlcke 
Tested-by: Matthias Kaehlcke 
---
 MAINTAINERS |   9 ++
 drivers/power/Kconfig   |   1 +
 drivers/power/Makefile  |   1 +
 drivers/power/pwrseq/Kconfig|  45 ++
 drivers/power/pwrseq/Makefile   |   3 +
 drivers/power/pwrseq/core.c | 190 
 drivers/power/pwrseq/pwrseq_compatible_sample.c | 178 ++
 drivers/power/pwrseq/pwrseq_generic.c   | 177 ++
 include/linux/power/pwrseq.h|  73 +
 9 files changed, 677 insertions(+)
 create mode 100644 drivers/power/pwrseq/Kconfig
 create mode 100644 drivers/power/pwrseq/Makefile
 create mode 100644 drivers/power/pwrseq/core.c
 create mode 100644 drivers/power/pwrseq/pwrseq_compatible_sample.c
 create mode 100644 drivers/power/pwrseq/pwrseq_generic.c
 create mode 100644 include/linux/power/pwrseq.h

diff --git a/MAINTAINERS b/MAINTAINERS
index b3e9395..b353769 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9343,6 +9343,15 @@ F:   include/linux/pm_*
 F: include/linux/powercap.h
 F: drivers/powercap/
 
+POWER SEQUENCE LIBRARY
+M: Peter Chen 
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git
+L: linux...@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/power/pwrseq/
+F: drivers/power/pwrseq/
+F: include/linux/power/pwrseq.h/
+
 POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
 M: Sebastian Reichel 
 M: Dmitry Eremin-Solenikov 
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index acd4a15..f6aa4fd 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -515,3 +515,4 @@ endif # POWER_SUPPLY
 
 source "drivers/power/reset/Kconfig"
 source "drivers/power/avs/Kconfig"
+source "drivers/power/pwrseq/Kconfig"
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index e46b75d..4ed2e12 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -74,3 +74,4 @@ obj-$(CONFIG_CHARGER_TPS65217)+= tps65217_charger.o
 obj-$(CONFIG_POWER_RESET)  += reset/
 obj-$(CONFIG_AXP288_FUEL_GAUGE) += axp288_fuel_gauge.o
 obj-$(CONFIG_AXP288_CHARGER)   += axp288_charger.o
+obj-$(CONFIG_POWER_SEQUENCE)   += pwrseq/
diff --git a/drivers/power/pwrseq/Kconfig b/drivers/power/pwrseq/Kconfig
new file mode 100644
index 000..dff5e35
--- /dev/null
+++ b/drivers/power/pwrseq/Kconfig
@@ -0,0 +1,45 @@
+#
+# Power Sequence library
+#
+
+config POWER_SEQUENCE
+   bool
+
+menu "Power Sequence Support"
+
+config PWRSEQ_GENERIC
+   bool "Generic power sequence control"
+   depends on OF
+   select POWER_SEQUENCE
+   help
+  It is used for drivers which needs to do power sequence
+  (eg, turn on clock, toggle reset gpio) before the related
+  devices can be found by hardware. This generic one can be
+  used for common power sequence control.
+
+config PWRSEQ_GENERIC_INSTANCE_NUMBER
+   int "Number of Generic Power Sequence Instance"
+   depends on PWRSEQ_GENERIC
+   range 1 10
+   default 2
+   help
+  Usually, there are not so many devices needs power sequence, we set 
two
+  as default value.
+
+config PWRSEQ_SAMPLE
+   bool "sample power sequence control using compatible string"
+   depends on OF
+   select POWER_SEQUENCE
+   help
+  It is a sample library which implements power sequence for device id,
+  it is an example purpose.
+
+config PWRSEQ_SAMPLE_INSTANCE_NUMBER
+   int "Number of Sample Power Sequence Instance"
+   depends on PWRSEQ_SAMPLE
+   range 1 5
+   default 1
+   help
+  Usually, this file is special for certain device, so the default for 
this number
+  is 1.
+endmenu
diff --git a/drivers/power/pwrseq/Makefile 

[PATCH v7 3/8] binding-doc: usb: usb-device: add optional properties for power sequence

2016-09-19 Thread Peter Chen
Add optional properties for power sequence.

Signed-off-by: Peter Chen 
Acked-by: Rob Herring 
---
 Documentation/devicetree/bindings/usb/usb-device.txt | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/usb/usb-device.txt 
b/Documentation/devicetree/bindings/usb/usb-device.txt
index 1c35e7b..3661dd2 100644
--- a/Documentation/devicetree/bindings/usb/usb-device.txt
+++ b/Documentation/devicetree/bindings/usb/usb-device.txt
@@ -13,6 +13,10 @@ Required properties:
 - reg: the port number which this device is connecting to, the range
   is 1-31.
 
+Optional properties:
+power sequence properties, see
+Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt for detail
+
 Example:
 
  {
@@ -21,8 +25,12 @@ Example:
#address-cells = <1>;
#size-cells = <0>;
 
-   hub: genesys@1 {
+   genesys: hub@1 {
compatible = "usb5e3,608";
reg = <1>;
+
+   clocks = < IMX6SX_CLK_CKO>;
+   reset-gpios = < 5 GPIO_ACTIVE_LOW>; /* hub reset pin */
+   reset-duration-us = <10>;
};
 }
-- 
2.7.4



[PATCH v7 2/8] power: add power sequence library

2016-09-19 Thread Peter Chen
We have an well-known problem that the device needs to do some power
sequence before it can be recognized by related host, the typical
example like hard-wired mmc devices and usb devices.

This power sequence is hard to be described at device tree and handled by
related host driver, so we have created a common power sequence
library to cover this requirement. The core code has supplied
some common helpers for host driver, and individual power sequence
libraries handle kinds of power sequence for devices.

pwrseq_generic is intended for general purpose of power sequence, which
handles gpios and clocks currently, and can cover regulator and pinctrl
in future. The host driver just needs to call of_pwrseq_on/of_pwrseq_off
if only one power sequence is needed, else call of_pwrseq_on_list
/of_pwrseq_off_list instead (eg, USB hub driver).

Signed-off-by: Peter Chen 
Tested-by Joshua Clayton 
Reviewed-by: Matthias Kaehlcke 
Tested-by: Matthias Kaehlcke 
---
 MAINTAINERS |   9 ++
 drivers/power/Kconfig   |   1 +
 drivers/power/Makefile  |   1 +
 drivers/power/pwrseq/Kconfig|  45 ++
 drivers/power/pwrseq/Makefile   |   3 +
 drivers/power/pwrseq/core.c | 190 
 drivers/power/pwrseq/pwrseq_compatible_sample.c | 178 ++
 drivers/power/pwrseq/pwrseq_generic.c   | 177 ++
 include/linux/power/pwrseq.h|  73 +
 9 files changed, 677 insertions(+)
 create mode 100644 drivers/power/pwrseq/Kconfig
 create mode 100644 drivers/power/pwrseq/Makefile
 create mode 100644 drivers/power/pwrseq/core.c
 create mode 100644 drivers/power/pwrseq/pwrseq_compatible_sample.c
 create mode 100644 drivers/power/pwrseq/pwrseq_generic.c
 create mode 100644 include/linux/power/pwrseq.h

diff --git a/MAINTAINERS b/MAINTAINERS
index b3e9395..b353769 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9343,6 +9343,15 @@ F:   include/linux/pm_*
 F: include/linux/powercap.h
 F: drivers/powercap/
 
+POWER SEQUENCE LIBRARY
+M: Peter Chen 
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git
+L: linux...@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/power/pwrseq/
+F: drivers/power/pwrseq/
+F: include/linux/power/pwrseq.h/
+
 POWER SUPPLY CLASS/SUBSYSTEM and DRIVERS
 M: Sebastian Reichel 
 M: Dmitry Eremin-Solenikov 
diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index acd4a15..f6aa4fd 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -515,3 +515,4 @@ endif # POWER_SUPPLY
 
 source "drivers/power/reset/Kconfig"
 source "drivers/power/avs/Kconfig"
+source "drivers/power/pwrseq/Kconfig"
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index e46b75d..4ed2e12 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -74,3 +74,4 @@ obj-$(CONFIG_CHARGER_TPS65217)+= tps65217_charger.o
 obj-$(CONFIG_POWER_RESET)  += reset/
 obj-$(CONFIG_AXP288_FUEL_GAUGE) += axp288_fuel_gauge.o
 obj-$(CONFIG_AXP288_CHARGER)   += axp288_charger.o
+obj-$(CONFIG_POWER_SEQUENCE)   += pwrseq/
diff --git a/drivers/power/pwrseq/Kconfig b/drivers/power/pwrseq/Kconfig
new file mode 100644
index 000..dff5e35
--- /dev/null
+++ b/drivers/power/pwrseq/Kconfig
@@ -0,0 +1,45 @@
+#
+# Power Sequence library
+#
+
+config POWER_SEQUENCE
+   bool
+
+menu "Power Sequence Support"
+
+config PWRSEQ_GENERIC
+   bool "Generic power sequence control"
+   depends on OF
+   select POWER_SEQUENCE
+   help
+  It is used for drivers which needs to do power sequence
+  (eg, turn on clock, toggle reset gpio) before the related
+  devices can be found by hardware. This generic one can be
+  used for common power sequence control.
+
+config PWRSEQ_GENERIC_INSTANCE_NUMBER
+   int "Number of Generic Power Sequence Instance"
+   depends on PWRSEQ_GENERIC
+   range 1 10
+   default 2
+   help
+  Usually, there are not so many devices needs power sequence, we set 
two
+  as default value.
+
+config PWRSEQ_SAMPLE
+   bool "sample power sequence control using compatible string"
+   depends on OF
+   select POWER_SEQUENCE
+   help
+  It is a sample library which implements power sequence for device id,
+  it is an example purpose.
+
+config PWRSEQ_SAMPLE_INSTANCE_NUMBER
+   int "Number of Sample Power Sequence Instance"
+   depends on PWRSEQ_SAMPLE
+   range 1 5
+   default 1
+   help
+  Usually, this file is special for certain device, so the default for 
this number
+  is 1.
+endmenu
diff --git a/drivers/power/pwrseq/Makefile b/drivers/power/pwrseq/Makefile
new file mode 100644
index 000..62f3cbf
--- /dev/null
+++ b/drivers/power/pwrseq/Makefile
@@ -0,0 +1,3 @@

[PATCH v7 3/8] binding-doc: usb: usb-device: add optional properties for power sequence

2016-09-19 Thread Peter Chen
Add optional properties for power sequence.

Signed-off-by: Peter Chen 
Acked-by: Rob Herring 
---
 Documentation/devicetree/bindings/usb/usb-device.txt | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/usb/usb-device.txt 
b/Documentation/devicetree/bindings/usb/usb-device.txt
index 1c35e7b..3661dd2 100644
--- a/Documentation/devicetree/bindings/usb/usb-device.txt
+++ b/Documentation/devicetree/bindings/usb/usb-device.txt
@@ -13,6 +13,10 @@ Required properties:
 - reg: the port number which this device is connecting to, the range
   is 1-31.
 
+Optional properties:
+power sequence properties, see
+Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt for detail
+
 Example:
 
  {
@@ -21,8 +25,12 @@ Example:
#address-cells = <1>;
#size-cells = <0>;
 
-   hub: genesys@1 {
+   genesys: hub@1 {
compatible = "usb5e3,608";
reg = <1>;
+
+   clocks = < IMX6SX_CLK_CKO>;
+   reset-gpios = < 5 GPIO_ACTIVE_LOW>; /* hub reset pin */
+   reset-duration-us = <10>;
};
 }
-- 
2.7.4



[PATCH v7 1/8] binding-doc: power: pwrseq-generic: add binding doc for generic power sequence library

2016-09-19 Thread Peter Chen
Add binding doc for generic power sequence library.

Signed-off-by: Peter Chen 
Acked-by: Philipp Zabel 
Acked-by: Rob Herring 
---
 .../bindings/power/pwrseq/pwrseq-generic.txt   | 48 ++
 1 file changed, 48 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt

diff --git a/Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt 
b/Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt
new file mode 100644
index 000..ebf0d47
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt
@@ -0,0 +1,48 @@
+The generic power sequence library
+
+Some hard-wired devices (eg USB/MMC) need to do power sequence before
+the device can be enumerated on the bus, the typical power sequence
+like: enable USB PHY clock, toggle reset pin, etc. But current
+Linux device driver lacks of such code to do it, it may cause some
+hard-wired devices works abnormal or can't be recognized by
+controller at all. The power sequence will be done before this device
+can be found at the bus.
+
+The power sequence properties is under the device node.
+
+Optional properties:
+- clocks: the input clocks for device.
+- reset-gpios: Should specify the GPIO for reset.
+- reset-duration-us: the duration in microsecond for assert reset signal.
+
+Below is the example of USB power sequence properties on USB device
+nodes which have two level USB hubs.
+
+ {
+   vbus-supply = <_usb_otg1_vbus>;
+   pinctrl-names = "default";
+   pinctrl-0 = <_usb_otg1_id>;
+   status = "okay";
+
+   #address-cells = <1>;
+   #size-cells = <0>;
+   genesys: hub@1 {
+   compatible = "usb5e3,608";
+   reg = <1>;
+
+   clocks = < IMX6SX_CLK_CKO>;
+   reset-gpios = < 5 GPIO_ACTIVE_LOW>; /* hub reset pin */
+   reset-duration-us = <10>;
+
+   #address-cells = <1>;
+   #size-cells = <0>;
+   asix: ethernet@1 {
+   compatible = "usbb95,1708";
+   reg = <1>;
+
+   clocks = < IMX6SX_CLK_IPG>;
+   reset-gpios = < 6 GPIO_ACTIVE_LOW>; /* 
ethernet_rst */
+   reset-duration-us = <15>;
+   };
+   };
+};
-- 
2.7.4



[PATCH v7 0/8] power: add power sequence library

2016-09-19 Thread Peter Chen
Hi all,

This is a follow-up for my last power sequence framework patch set [1].
According to Rob Herring and Ulf Hansson's comments[2]. The kinds of
power sequence instances will be added at postcore_initcall, the match
criteria is compatible string first, if the compatible string is not
matched between dts and library, it will try to use generic power sequence.
 
The host driver just needs to call of_pwrseq_on/of_pwrseq_off
if only one power sequence instance is needed, for more power sequences
are used, using of_pwrseq_on_list/of_pwrseq_off_list instead (eg, USB hub 
driver).

In future, if there are special power sequence requirements, the special
power sequence library can be created.

This patch set is tested on i.mx6 sabresx evk using a dts change, I use
two hot-plug devices to simulate this use case, the related binding
change is updated at patch [1/6], The udoo board changes were tested
using my last power sequence patch set.[3]

Except for hard-wired MMC and USB devices, I find the USB ULPI PHY also
need to power on itself before it can be found by ULPI bus.

[1] http://www.spinics.net/lists/linux-usb/msg142755.html
[2] http://www.spinics.net/lists/linux-usb/msg143106.html
[3] http://www.spinics.net/lists/linux-usb/msg142815.html

Changes for v7:
- Create kinds of power sequence instance at postcore_initcall, and match
  the instance with node using compatible string, the beneit of this is
  the host driver doesn't need to consider which pwrseq instance needs
  to be used, and pwrseq core will match it, however, it eats some memories
  if less power sequence instances are used. [Patch 2/8]
- Add pwrseq_compatible_sample.c to test match pwrseq using device_id. [Patch 
2/8]
- Fix the comments Vaibhav Hiremath adds for error path for clock and do not
  use device_node for parameters at pwrseq_on. [Patch 2/8]
- Simplify the caller to use power sequence, follows Alan's commnets [Patch 4/8]
- Tested three pwrseq instances together using both specific compatible string 
and
  generic libraries.

Changes for v6:
- Add Matthias Kaehlcke's Reviewed-by and Tested-by. (patch [2/6])
- Change chipidea core of_node assignment for coming user. (patch [5/6])
- Applies Joshua Clayton's three dts changes for two boards,
  the USB device's reg has only #address-cells, but without #size-cells.

Changes for v5:
- Delete pwrseq_register/pwrseq_unregister, which is useless currently
- Fix the linker error when the pwrseq user is compiled as module

Changes for v4:
- Create the patch on next-20160722 
- Fix the of_node is not NULL after chipidea driver is unbinded [Patch 5/6]
- Using more friendly wait method for reset gpio [Patch 2/6]
- Support multiple input clocks [Patch 2/6]
- Add Rob Herring's ack for DT changes
- Add Joshua Clayton's Tested-by

Changes for v3:
- Delete "power-sequence" property at binding-doc, and change related code
  at both library and user code.
- Change binding-doc example node name with Rob's comments
- of_get_named_gpio_flags only gets the gpio, but without setting gpio flags,
  add additional code request gpio with proper gpio flags
- Add Philipp Zabel's Ack and MAINTAINER's entry

Changes for v2:
- Delete "pwrseq" prefix and clock-names for properties at dt binding
- Should use structure not but its pointer for kzalloc
- Since chipidea core has no of_node, let core's of_node equals glue
  layer's at core's probe

Joshua Clayton (2):
  ARM: dts: imx6qdl: Enable usb node children with 
  ARM: dts: imx6q-evi: Fix onboard hub reset line

Peter Chen (6):
  binding-doc: power: pwrseq-generic: add binding doc for generic power
sequence library
  power: add power sequence library
  binding-doc: usb: usb-device: add optional properties for power
sequence
  usb: core: add power sequence handling for USB devices
  usb: chipidea: let chipidea core device of_node equal's glue layer
device of_node
  ARM: dts: imx6qdl-udoo.dtsi: fix onboard USB HUB property

 .../bindings/power/pwrseq/pwrseq-generic.txt   |  48 ++
 .../devicetree/bindings/usb/usb-device.txt |  10 +-
 MAINTAINERS|   9 +
 arch/arm/boot/dts/imx6q-evi.dts|  25 +--
 arch/arm/boot/dts/imx6qdl-udoo.dtsi|  26 ++-
 arch/arm/boot/dts/imx6qdl.dtsi |   6 +
 drivers/power/Kconfig  |   1 +
 drivers/power/Makefile |   1 +
 drivers/power/pwrseq/Kconfig   |  45 +
 drivers/power/pwrseq/Makefile  |   3 +
 drivers/power/pwrseq/core.c| 190 +
 drivers/power/pwrseq/pwrseq_compatible_sample.c| 178 +++
 drivers/power/pwrseq/pwrseq_generic.c  | 177 +++
 drivers/usb/chipidea/core.c|  27 ++-
 drivers/usb/core/hub.c |  41 -
 drivers/usb/core/hub.h |   1 +
 

[PATCH v7 1/8] binding-doc: power: pwrseq-generic: add binding doc for generic power sequence library

2016-09-19 Thread Peter Chen
Add binding doc for generic power sequence library.

Signed-off-by: Peter Chen 
Acked-by: Philipp Zabel 
Acked-by: Rob Herring 
---
 .../bindings/power/pwrseq/pwrseq-generic.txt   | 48 ++
 1 file changed, 48 insertions(+)
 create mode 100644 
Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt

diff --git a/Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt 
b/Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt
new file mode 100644
index 000..ebf0d47
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/pwrseq/pwrseq-generic.txt
@@ -0,0 +1,48 @@
+The generic power sequence library
+
+Some hard-wired devices (eg USB/MMC) need to do power sequence before
+the device can be enumerated on the bus, the typical power sequence
+like: enable USB PHY clock, toggle reset pin, etc. But current
+Linux device driver lacks of such code to do it, it may cause some
+hard-wired devices works abnormal or can't be recognized by
+controller at all. The power sequence will be done before this device
+can be found at the bus.
+
+The power sequence properties is under the device node.
+
+Optional properties:
+- clocks: the input clocks for device.
+- reset-gpios: Should specify the GPIO for reset.
+- reset-duration-us: the duration in microsecond for assert reset signal.
+
+Below is the example of USB power sequence properties on USB device
+nodes which have two level USB hubs.
+
+ {
+   vbus-supply = <_usb_otg1_vbus>;
+   pinctrl-names = "default";
+   pinctrl-0 = <_usb_otg1_id>;
+   status = "okay";
+
+   #address-cells = <1>;
+   #size-cells = <0>;
+   genesys: hub@1 {
+   compatible = "usb5e3,608";
+   reg = <1>;
+
+   clocks = < IMX6SX_CLK_CKO>;
+   reset-gpios = < 5 GPIO_ACTIVE_LOW>; /* hub reset pin */
+   reset-duration-us = <10>;
+
+   #address-cells = <1>;
+   #size-cells = <0>;
+   asix: ethernet@1 {
+   compatible = "usbb95,1708";
+   reg = <1>;
+
+   clocks = < IMX6SX_CLK_IPG>;
+   reset-gpios = < 6 GPIO_ACTIVE_LOW>; /* 
ethernet_rst */
+   reset-duration-us = <15>;
+   };
+   };
+};
-- 
2.7.4



[PATCH v7 0/8] power: add power sequence library

2016-09-19 Thread Peter Chen
Hi all,

This is a follow-up for my last power sequence framework patch set [1].
According to Rob Herring and Ulf Hansson's comments[2]. The kinds of
power sequence instances will be added at postcore_initcall, the match
criteria is compatible string first, if the compatible string is not
matched between dts and library, it will try to use generic power sequence.
 
The host driver just needs to call of_pwrseq_on/of_pwrseq_off
if only one power sequence instance is needed, for more power sequences
are used, using of_pwrseq_on_list/of_pwrseq_off_list instead (eg, USB hub 
driver).

In future, if there are special power sequence requirements, the special
power sequence library can be created.

This patch set is tested on i.mx6 sabresx evk using a dts change, I use
two hot-plug devices to simulate this use case, the related binding
change is updated at patch [1/6], The udoo board changes were tested
using my last power sequence patch set.[3]

Except for hard-wired MMC and USB devices, I find the USB ULPI PHY also
need to power on itself before it can be found by ULPI bus.

[1] http://www.spinics.net/lists/linux-usb/msg142755.html
[2] http://www.spinics.net/lists/linux-usb/msg143106.html
[3] http://www.spinics.net/lists/linux-usb/msg142815.html

Changes for v7:
- Create kinds of power sequence instance at postcore_initcall, and match
  the instance with node using compatible string, the beneit of this is
  the host driver doesn't need to consider which pwrseq instance needs
  to be used, and pwrseq core will match it, however, it eats some memories
  if less power sequence instances are used. [Patch 2/8]
- Add pwrseq_compatible_sample.c to test match pwrseq using device_id. [Patch 
2/8]
- Fix the comments Vaibhav Hiremath adds for error path for clock and do not
  use device_node for parameters at pwrseq_on. [Patch 2/8]
- Simplify the caller to use power sequence, follows Alan's commnets [Patch 4/8]
- Tested three pwrseq instances together using both specific compatible string 
and
  generic libraries.

Changes for v6:
- Add Matthias Kaehlcke's Reviewed-by and Tested-by. (patch [2/6])
- Change chipidea core of_node assignment for coming user. (patch [5/6])
- Applies Joshua Clayton's three dts changes for two boards,
  the USB device's reg has only #address-cells, but without #size-cells.

Changes for v5:
- Delete pwrseq_register/pwrseq_unregister, which is useless currently
- Fix the linker error when the pwrseq user is compiled as module

Changes for v4:
- Create the patch on next-20160722 
- Fix the of_node is not NULL after chipidea driver is unbinded [Patch 5/6]
- Using more friendly wait method for reset gpio [Patch 2/6]
- Support multiple input clocks [Patch 2/6]
- Add Rob Herring's ack for DT changes
- Add Joshua Clayton's Tested-by

Changes for v3:
- Delete "power-sequence" property at binding-doc, and change related code
  at both library and user code.
- Change binding-doc example node name with Rob's comments
- of_get_named_gpio_flags only gets the gpio, but without setting gpio flags,
  add additional code request gpio with proper gpio flags
- Add Philipp Zabel's Ack and MAINTAINER's entry

Changes for v2:
- Delete "pwrseq" prefix and clock-names for properties at dt binding
- Should use structure not but its pointer for kzalloc
- Since chipidea core has no of_node, let core's of_node equals glue
  layer's at core's probe

Joshua Clayton (2):
  ARM: dts: imx6qdl: Enable usb node children with 
  ARM: dts: imx6q-evi: Fix onboard hub reset line

Peter Chen (6):
  binding-doc: power: pwrseq-generic: add binding doc for generic power
sequence library
  power: add power sequence library
  binding-doc: usb: usb-device: add optional properties for power
sequence
  usb: core: add power sequence handling for USB devices
  usb: chipidea: let chipidea core device of_node equal's glue layer
device of_node
  ARM: dts: imx6qdl-udoo.dtsi: fix onboard USB HUB property

 .../bindings/power/pwrseq/pwrseq-generic.txt   |  48 ++
 .../devicetree/bindings/usb/usb-device.txt |  10 +-
 MAINTAINERS|   9 +
 arch/arm/boot/dts/imx6q-evi.dts|  25 +--
 arch/arm/boot/dts/imx6qdl-udoo.dtsi|  26 ++-
 arch/arm/boot/dts/imx6qdl.dtsi |   6 +
 drivers/power/Kconfig  |   1 +
 drivers/power/Makefile |   1 +
 drivers/power/pwrseq/Kconfig   |  45 +
 drivers/power/pwrseq/Makefile  |   3 +
 drivers/power/pwrseq/core.c| 190 +
 drivers/power/pwrseq/pwrseq_compatible_sample.c| 178 +++
 drivers/power/pwrseq/pwrseq_generic.c  | 177 +++
 drivers/usb/chipidea/core.c|  27 ++-
 drivers/usb/core/hub.c |  41 -
 drivers/usb/core/hub.h |   1 +
 

Re: [RFC] Arm64 boot fail with numa enable in BIOS

2016-09-19 Thread Yisheng Xie


On 2016/9/19 22:07, Mark Rutland wrote:
> [adding LAKML, arm64 maintainers]
> 
> On Mon, Sep 19, 2016 at 09:05:26PM +0800, Yisheng Xie wrote:
>> hi all,
> 
> Can you modify the warning in cpumask.h to dump the bad CPU number? That
> would make it fairly clear if that's the case.
> 
hi Mark,
I dump the bad CPU number, it is 64,
And the cpumask get from task is ,.

[3.873044] select_task_rq: allowed 0, allow_cpumask ,
[3.879727] cpumask_check: cpu 64, nr_cpumask_bits:64, nr_cpu_ids= 64
[3.895989] [ cut here ]
[3.900652] WARNING: CPU: 16 PID: 103 at ./include/linux/cpumask.h:122 
try_to_wake_up+0x410/0x4ac

Thanks.
Yisheng Xie

> Thanks,
> Mark.
> 
>> [0.297337] Detected PIPT I-cache on CPU1
>> [0.297347] GICv3: CPU1: found redistributor 10001 region 
>> 1:0x4d14
>> [0.297356] CPU1: Booted secondary processor [410fd082]
>> [0.297375] [ cut here ]
>> [0.320390] WARNING: CPU: 1 PID: 0 at ./include/linux/cpumask.h:121 
>> gic_raise_softirq+0x128/0x17c
>> [0.329356] Modules linked in:
>> [0.332434] 
>> [0.333932] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 
>> 4.8.0-rc4-00163-g803ea3a #21
>> [0.341581] Hardware name: Hisilicon Hi1616 Evaluation Board (DT)
>> [0.347735] task: 8013e9dd task.stack: 8013e9dcc000
>> [0.353714] PC is at gic_raise_softirq+0x128/0x17c
>> [0.358550] LR is at gic_raise_softirq+0xa0/0x17c
>> [0.363298] pc : [] lr : [] pstate: 
>> 21c5
>> [0.370770] sp : 8013e9dcfde0
>> [0.374112] x29: 8013e9dcfde0 x28:  
>> [0.379476] x27: 0083207c x26: 08ca5d70 
>> [0.384841] x25: 00010001 x24: 08d63ff3 
>> [0.390205] x23:  x22: 08cb 
>> [0.395569] x21: 0884edb0 x20: 0001 
>> [0.400933] x19: 0001 x18:  
>> [0.406298] x17:  x16: 03010066 
>> [0.411661] x15: 08ca8000 x14: 0013 
>> [0.417025] x13:  x12: 0013 
>> [0.422389] x11: 0013 x10: 02e92aa7 
>> [0.427754] x9 :  x8 : 8413eb6ca668 
>> [0.433118] x7 : 8413eb6ca690 x6 :  
>> [0.438482] x5 : fffe x4 :  
>> [0.443845] x3 : 0040 x2 : 0041 
>> [0.449209] x1 :  x0 : 0001 
>> [0.454573] 
>> [0.456069] ---[ end trace b58e70f3295a8cd7 ]---
>> [0.460730] Call trace:
>> [0.463193] Exception stack(0x8013e9dcfc10 to 0x8013e9dcfd40)
>> [0.469699] fc00:   0001 
>> 0001
>> [0.477611] fc20: 8013e9dcfde0 0838c124 08d72228 
>> 8013e9dcff70
>> [0.485524] fc40: 08d72608 08ab02a4  
>> 
>> [0.493436] fc60:  3464313430303030  
>> 
>> [0.501348] fc80: 8013e9dcfc90 0836e678 8013e9dcfca0 
>> 0836e910
>> [0.509259] fca0: 8013e9dcfd30 0836ec10 0001 
>> 
>> [0.517171] fcc0: 0041 0040  
>> fffe
>> [0.525083] fce0:  8413eb6ca690 8413eb6ca668 
>> 
>> [0.532995] fd00: 02e92aa7 0013 0013 
>> 
>> [0.540907] fd20: 0013 08ca8000 03010066 
>> 
>> [0.548819] [] gic_raise_softirq+0x128/0x17c
>> [0.554713] [] smp_send_reschedule+0x34/0x3c
>> [0.560605] [] resched_curr+0x40/0x5c
>> [0.565881] [] check_preempt_curr+0x58/0xa0
>> [0.571685] [] ttwu_do_wakeup+0x18/0x80
>> [0.577136] [] ttwu_do_activate+0x78/0x88
>> [0.582763] [] try_to_wake_up+0x1f8/0x300
>> [0.588390] [] default_wake_function+0x10/0x18
>> [0.594458] [] __wake_up_common+0x5c/0x9c
>> [0.600085] [] __wake_up_locked+0x14/0x1c
>> [0.605712] [] complete+0x40/0x5c
>> [0.610635] [] secondary_start_kernel+0x148/0x1a8
>> [0.616965] [<000831a8>] 0x831a8
> 
> .
> 



Re: [RFC] Arm64 boot fail with numa enable in BIOS

2016-09-19 Thread Yisheng Xie


On 2016/9/19 22:07, Mark Rutland wrote:
> [adding LAKML, arm64 maintainers]
> 
> On Mon, Sep 19, 2016 at 09:05:26PM +0800, Yisheng Xie wrote:
>> hi all,
> 
> Can you modify the warning in cpumask.h to dump the bad CPU number? That
> would make it fairly clear if that's the case.
> 
hi Mark,
I dump the bad CPU number, it is 64,
And the cpumask get from task is ,.

[3.873044] select_task_rq: allowed 0, allow_cpumask ,
[3.879727] cpumask_check: cpu 64, nr_cpumask_bits:64, nr_cpu_ids= 64
[3.895989] [ cut here ]
[3.900652] WARNING: CPU: 16 PID: 103 at ./include/linux/cpumask.h:122 
try_to_wake_up+0x410/0x4ac

Thanks.
Yisheng Xie

> Thanks,
> Mark.
> 
>> [0.297337] Detected PIPT I-cache on CPU1
>> [0.297347] GICv3: CPU1: found redistributor 10001 region 
>> 1:0x4d14
>> [0.297356] CPU1: Booted secondary processor [410fd082]
>> [0.297375] [ cut here ]
>> [0.320390] WARNING: CPU: 1 PID: 0 at ./include/linux/cpumask.h:121 
>> gic_raise_softirq+0x128/0x17c
>> [0.329356] Modules linked in:
>> [0.332434] 
>> [0.333932] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 
>> 4.8.0-rc4-00163-g803ea3a #21
>> [0.341581] Hardware name: Hisilicon Hi1616 Evaluation Board (DT)
>> [0.347735] task: 8013e9dd task.stack: 8013e9dcc000
>> [0.353714] PC is at gic_raise_softirq+0x128/0x17c
>> [0.358550] LR is at gic_raise_softirq+0xa0/0x17c
>> [0.363298] pc : [] lr : [] pstate: 
>> 21c5
>> [0.370770] sp : 8013e9dcfde0
>> [0.374112] x29: 8013e9dcfde0 x28:  
>> [0.379476] x27: 0083207c x26: 08ca5d70 
>> [0.384841] x25: 00010001 x24: 08d63ff3 
>> [0.390205] x23:  x22: 08cb 
>> [0.395569] x21: 0884edb0 x20: 0001 
>> [0.400933] x19: 0001 x18:  
>> [0.406298] x17:  x16: 03010066 
>> [0.411661] x15: 08ca8000 x14: 0013 
>> [0.417025] x13:  x12: 0013 
>> [0.422389] x11: 0013 x10: 02e92aa7 
>> [0.427754] x9 :  x8 : 8413eb6ca668 
>> [0.433118] x7 : 8413eb6ca690 x6 :  
>> [0.438482] x5 : fffe x4 :  
>> [0.443845] x3 : 0040 x2 : 0041 
>> [0.449209] x1 :  x0 : 0001 
>> [0.454573] 
>> [0.456069] ---[ end trace b58e70f3295a8cd7 ]---
>> [0.460730] Call trace:
>> [0.463193] Exception stack(0x8013e9dcfc10 to 0x8013e9dcfd40)
>> [0.469699] fc00:   0001 
>> 0001
>> [0.477611] fc20: 8013e9dcfde0 0838c124 08d72228 
>> 8013e9dcff70
>> [0.485524] fc40: 08d72608 08ab02a4  
>> 
>> [0.493436] fc60:  3464313430303030  
>> 
>> [0.501348] fc80: 8013e9dcfc90 0836e678 8013e9dcfca0 
>> 0836e910
>> [0.509259] fca0: 8013e9dcfd30 0836ec10 0001 
>> 
>> [0.517171] fcc0: 0041 0040  
>> fffe
>> [0.525083] fce0:  8413eb6ca690 8413eb6ca668 
>> 
>> [0.532995] fd00: 02e92aa7 0013 0013 
>> 
>> [0.540907] fd20: 0013 08ca8000 03010066 
>> 
>> [0.548819] [] gic_raise_softirq+0x128/0x17c
>> [0.554713] [] smp_send_reschedule+0x34/0x3c
>> [0.560605] [] resched_curr+0x40/0x5c
>> [0.565881] [] check_preempt_curr+0x58/0xa0
>> [0.571685] [] ttwu_do_wakeup+0x18/0x80
>> [0.577136] [] ttwu_do_activate+0x78/0x88
>> [0.582763] [] try_to_wake_up+0x1f8/0x300
>> [0.588390] [] default_wake_function+0x10/0x18
>> [0.594458] [] __wake_up_common+0x5c/0x9c
>> [0.600085] [] __wake_up_locked+0x14/0x1c
>> [0.605712] [] complete+0x40/0x5c
>> [0.610635] [] secondary_start_kernel+0x148/0x1a8
>> [0.616965] [<000831a8>] 0x831a8
> 
> .
> 



Re: [PATCH 1/2] power: bq27xxx_battery: add configurable poll_interval by sysfs

2016-09-19 Thread Matt Ranostay
On Mon, Sep 19, 2016 at 12:46 PM, Sebastian Reichel  wrote:
> Hi,
>
> On Fri, Sep 16, 2016 at 08:42:54PM -0700, Matt Ranostay wrote:
>> Allow the poll_interval to be runtime configurable via an sysfs
>> entry.  This is needed for udev control of the poll interval.
>>
>> Signed-off-by: Matt Ranostay 
>> ---
>>  drivers/power/supply/bq27xxx_battery.c | 48 
>> +-
>>  1 file changed, 47 insertions(+), 1 deletion(-)
>
> New sysfs attributes should be documented in Documentation/ABI.

Yeah I should know better :)

>
> Also I'm not too keen to add this, as there is already the sysfs
> entry for the module parameter. I don't see any reason why udev
> should not be able to change that value, so fix udev instead of
> duplicating functionality in the kernel.

Yeah duplication is bad.  We are wondering if having a
POWER_SUPPLY_PROP_UPDATE_INTERVAL would be an more acceptable
solution. Of course this would need to be made generic and not a per
driver solution as it is now.

Thanks,

Matt

>
> -- Sebastian


Re: [PATCH 1/2] power: bq27xxx_battery: add configurable poll_interval by sysfs

2016-09-19 Thread Matt Ranostay
On Mon, Sep 19, 2016 at 12:46 PM, Sebastian Reichel  wrote:
> Hi,
>
> On Fri, Sep 16, 2016 at 08:42:54PM -0700, Matt Ranostay wrote:
>> Allow the poll_interval to be runtime configurable via an sysfs
>> entry.  This is needed for udev control of the poll interval.
>>
>> Signed-off-by: Matt Ranostay 
>> ---
>>  drivers/power/supply/bq27xxx_battery.c | 48 
>> +-
>>  1 file changed, 47 insertions(+), 1 deletion(-)
>
> New sysfs attributes should be documented in Documentation/ABI.

Yeah I should know better :)

>
> Also I'm not too keen to add this, as there is already the sysfs
> entry for the module parameter. I don't see any reason why udev
> should not be able to change that value, so fix udev instead of
> duplicating functionality in the kernel.

Yeah duplication is bad.  We are wondering if having a
POWER_SUPPLY_PROP_UPDATE_INTERVAL would be an more acceptable
solution. Of course this would need to be made generic and not a per
driver solution as it is now.

Thanks,

Matt

>
> -- Sebastian


Re: [PATCH] f2fs: fix to avoid slowing down background gc

2016-09-19 Thread Chao Yu
On 2016/9/20 10:54, Jaegeuk Kim wrote:
> On Tue, Sep 20, 2016 at 10:22:22AM +0800, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2016/9/20 6:12, Jaegeuk Kim wrote:
>>> Hi Chao,
>>>
>>> On Sun, Sep 18, 2016 at 07:52:27PM +0800, Chao Yu wrote:
 Previously, we will choose to speed up background gc when the below
 conditions are both satisfied:
 a. There are a number of invalid blocks
 b. There is not enough free space

 But, when space utilization is high (utilization > 60%), there will be
 not enough invalid blocks, result in slowing down background gc, after
 then there are more opportunities that triggering foreground gc due to
 high fragmented free space in fs.

 Remove condition a) in order to avoid slow down background gc speed in
 a high utilization fs.
>>>
>>> There exists a trade-off here: wear-out vs. eager gc for future speed-up.
>>> How about using a kind of f2fs's dirty level (e.g., BDF)?
>>
>> Yep, I think that f2fs can implement a mechanism which can provide more
>> dynamically adjustable GC speed in the specified scenario of user, by this, 
>> user
>> can choose the strategy which is more beneficial to aspect
>> (wear-out/performance) they care. Let me think a while, anyway I agree that 
>> BDF
>> is a good reference value here.
>>
>> And Before we can provide above ability, how about treat this patch as a 
>> fixing
>> patch, since it fixes to not adjust speed of GC according to utilization 
>> watermark?
> 
> Well, this is not a bug fix, but a very conservative policy. So, please let's
> make a better policy, if possible.

Alright, let's think about this.

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>

 Signed-off-by: Chao Yu 
 ---
  fs/f2fs/gc.h | 18 +++---
  1 file changed, 3 insertions(+), 15 deletions(-)

 diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
 index a993967..5d0a19c 100644
 --- a/fs/f2fs/gc.h
 +++ b/fs/f2fs/gc.h
 @@ -16,7 +16,6 @@
  #define DEF_GC_THREAD_MIN_SLEEP_TIME  3   /* milliseconds */
  #define DEF_GC_THREAD_MAX_SLEEP_TIME  6
  #define DEF_GC_THREAD_NOGC_SLEEP_TIME 30  /* wait 5 min */
 -#define LIMIT_INVALID_BLOCK   40 /* percentage over total user space 
 */
  #define LIMIT_FREE_BLOCK  40 /* percentage over invalid + free space */
  
  /* Search max. number of dirty segments to select a victim segment */
 @@ -52,11 +51,6 @@ static inline block_t free_user_blocks(struct 
 f2fs_sb_info *sbi)
<< sbi->log_blocks_per_seg;
  }
  
 -static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
 -{
 -  return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
 -}
 -
  static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
  {
block_t reclaimable_user_blocks = sbi->user_block_count -
 @@ -88,15 +82,9 @@ static inline void decrease_sleep_time(struct 
 f2fs_gc_kthread *gc_th,
  
  static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
  {
 -  block_t invalid_user_blocks = sbi->user_block_count -
 -  written_block_count(sbi);
/*
 -   * Background GC is triggered with the following conditions.
 -   * 1. There are a number of invalid blocks.
 -   * 2. There is not enough free space.
 +   * Background GC should speed up when there is not enough free blocks
 +   * in total unused (free + invalid) blocks.
 */
 -  if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
 -  free_user_blocks(sbi) < limit_free_user_blocks(sbi))
 -  return true;
 -  return false;
 +  return free_user_blocks(sbi) < limit_free_user_blocks(sbi);
  }
 -- 
 2.8.2.311.gee88674
>>>
>>> .
>>>
> 
> .
> 



Re: [PATCH] f2fs: fix to avoid slowing down background gc

2016-09-19 Thread Chao Yu
On 2016/9/20 10:54, Jaegeuk Kim wrote:
> On Tue, Sep 20, 2016 at 10:22:22AM +0800, Chao Yu wrote:
>> Hi Jaegeuk,
>>
>> On 2016/9/20 6:12, Jaegeuk Kim wrote:
>>> Hi Chao,
>>>
>>> On Sun, Sep 18, 2016 at 07:52:27PM +0800, Chao Yu wrote:
 Previously, we will choose to speed up background gc when the below
 conditions are both satisfied:
 a. There are a number of invalid blocks
 b. There is not enough free space

 But, when space utilization is high (utilization > 60%), there will be
 not enough invalid blocks, result in slowing down background gc, after
 then there are more opportunities that triggering foreground gc due to
 high fragmented free space in fs.

 Remove condition a) in order to avoid slow down background gc speed in
 a high utilization fs.
>>>
>>> There exists a trade-off here: wear-out vs. eager gc for future speed-up.
>>> How about using a kind of f2fs's dirty level (e.g., BDF)?
>>
>> Yep, I think that f2fs can implement a mechanism which can provide more
>> dynamically adjustable GC speed in the specified scenario of user, by this, 
>> user
>> can choose the strategy which is more beneficial to aspect
>> (wear-out/performance) they care. Let me think a while, anyway I agree that 
>> BDF
>> is a good reference value here.
>>
>> And Before we can provide above ability, how about treat this patch as a 
>> fixing
>> patch, since it fixes to not adjust speed of GC according to utilization 
>> watermark?
> 
> Well, this is not a bug fix, but a very conservative policy. So, please let's
> make a better policy, if possible.

Alright, let's think about this.

Thanks,

> 
> Thanks,
> 
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>

 Signed-off-by: Chao Yu 
 ---
  fs/f2fs/gc.h | 18 +++---
  1 file changed, 3 insertions(+), 15 deletions(-)

 diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
 index a993967..5d0a19c 100644
 --- a/fs/f2fs/gc.h
 +++ b/fs/f2fs/gc.h
 @@ -16,7 +16,6 @@
  #define DEF_GC_THREAD_MIN_SLEEP_TIME  3   /* milliseconds */
  #define DEF_GC_THREAD_MAX_SLEEP_TIME  6
  #define DEF_GC_THREAD_NOGC_SLEEP_TIME 30  /* wait 5 min */
 -#define LIMIT_INVALID_BLOCK   40 /* percentage over total user space 
 */
  #define LIMIT_FREE_BLOCK  40 /* percentage over invalid + free space */
  
  /* Search max. number of dirty segments to select a victim segment */
 @@ -52,11 +51,6 @@ static inline block_t free_user_blocks(struct 
 f2fs_sb_info *sbi)
<< sbi->log_blocks_per_seg;
  }
  
 -static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
 -{
 -  return (long)(sbi->user_block_count * LIMIT_INVALID_BLOCK) / 100;
 -}
 -
  static inline block_t limit_free_user_blocks(struct f2fs_sb_info *sbi)
  {
block_t reclaimable_user_blocks = sbi->user_block_count -
 @@ -88,15 +82,9 @@ static inline void decrease_sleep_time(struct 
 f2fs_gc_kthread *gc_th,
  
  static inline bool has_enough_invalid_blocks(struct f2fs_sb_info *sbi)
  {
 -  block_t invalid_user_blocks = sbi->user_block_count -
 -  written_block_count(sbi);
/*
 -   * Background GC is triggered with the following conditions.
 -   * 1. There are a number of invalid blocks.
 -   * 2. There is not enough free space.
 +   * Background GC should speed up when there is not enough free blocks
 +   * in total unused (free + invalid) blocks.
 */
 -  if (invalid_user_blocks > limit_invalid_user_blocks(sbi) &&
 -  free_user_blocks(sbi) < limit_free_user_blocks(sbi))
 -  return true;
 -  return false;
 +  return free_user_blocks(sbi) < limit_free_user_blocks(sbi);
  }
 -- 
 2.8.2.311.gee88674
>>>
>>> .
>>>
> 
> .
> 



[PATCH v2 4/6] f2fs: introduce cp_lock to protect updating of ckpt_flags

2016-09-19 Thread Chao Yu
This patch introduces spinlock to protect updating process of ckpt_flags
field in struct f2fs_checkpoint, it avoids incorrectly updating in race
condition.

Signed-off-by: Chao Yu 
---
 fs/f2fs/checkpoint.c | 28 
 fs/f2fs/f2fs.h   | 37 +
 fs/f2fs/recovery.c   |  2 +-
 fs/f2fs/segment.c|  4 ++--
 fs/f2fs/super.c  |  5 +++--
 5 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a366521..bc93afd 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -28,7 +28,7 @@ struct kmem_cache *inode_entry_slab;
 
 void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
 {
-   set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+   set_ckpt_flags(sbi, CP_ERROR_FLAG);
sbi->sb->s_flags |= MS_RDONLY;
if (!end_io)
f2fs_flush_merged_bios(sbi);
@@ -574,7 +574,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
block_t start_blk, orphan_blocks, i, j;
int err;
 
-   if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
+   if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
 
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
@@ -598,7 +598,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
f2fs_put_page(page, 1);
}
/* clear Orphan Flag */
-   clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
+   clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
return 0;
 }
 
@@ -1056,10 +1056,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
 
/* 2 cp  + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi, false);
+   spin_lock(>cp_lock);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
-   set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+   __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
else
-   clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+   __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+   spin_unlock(>cp_lock);
 
orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
@@ -1074,23 +1076,25 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
cp_payload_blks + data_sum_blocks +
orphan_blocks);
 
+   spin_lock(>cp_lock);
if (cpc->reason == CP_UMOUNT)
-   set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+   __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
-   clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+   __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 
if (cpc->reason == CP_FASTBOOT)
-   set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
+   __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
else
-   clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
+   __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
 
if (orphan_num)
-   set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+   __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
else
-   clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+   __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
-   set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+   __set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+   spin_unlock(>cp_lock);
 
/* update SIT/NAT bitmap */
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 53da455..7803808 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -817,6 +817,7 @@ struct f2fs_sb_info {
 
/* for checkpoint */
struct f2fs_checkpoint *ckpt;   /* raw checkpoint pointer */
+   spinlock_t cp_lock; /* for flag in ckpt */
struct inode *meta_inode;   /* cache meta blocks */
struct mutex cp_mutex;  /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem;   /* blocking FS operations */
@@ -1084,26 +1085,46 @@ static inline unsigned long long cur_cp_version(struct 
f2fs_checkpoint *cp)
return le64_to_cpu(cp->checkpoint_ver);
 }
 
-static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int 
f)
+static inline bool is_set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 {
+   struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+
return ckpt_flags & f;
 }
 
-static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
 {
-   unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+   

[PATCH v2 4/6] f2fs: introduce cp_lock to protect updating of ckpt_flags

2016-09-19 Thread Chao Yu
This patch introduces spinlock to protect updating process of ckpt_flags
field in struct f2fs_checkpoint, it avoids incorrectly updating in race
condition.

Signed-off-by: Chao Yu 
---
 fs/f2fs/checkpoint.c | 28 
 fs/f2fs/f2fs.h   | 37 +
 fs/f2fs/recovery.c   |  2 +-
 fs/f2fs/segment.c|  4 ++--
 fs/f2fs/super.c  |  5 +++--
 5 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index a366521..bc93afd 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -28,7 +28,7 @@ struct kmem_cache *inode_entry_slab;
 
 void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
 {
-   set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
+   set_ckpt_flags(sbi, CP_ERROR_FLAG);
sbi->sb->s_flags |= MS_RDONLY;
if (!end_io)
f2fs_flush_merged_bios(sbi);
@@ -574,7 +574,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
block_t start_blk, orphan_blocks, i, j;
int err;
 
-   if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
+   if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
return 0;
 
start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
@@ -598,7 +598,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi)
f2fs_put_page(page, 1);
}
/* clear Orphan Flag */
-   clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
+   clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
return 0;
 }
 
@@ -1056,10 +1056,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
 
/* 2 cp  + n data seg summary + orphan inode blocks */
data_sum_blocks = npages_for_summary_flush(sbi, false);
+   spin_lock(>cp_lock);
if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
-   set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+   __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
else
-   clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+   __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
+   spin_unlock(>cp_lock);
 
orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
@@ -1074,23 +1076,25 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, 
struct cp_control *cpc)
cp_payload_blks + data_sum_blocks +
orphan_blocks);
 
+   spin_lock(>cp_lock);
if (cpc->reason == CP_UMOUNT)
-   set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+   __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
else
-   clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
+   __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 
if (cpc->reason == CP_FASTBOOT)
-   set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
+   __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
else
-   clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
+   __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
 
if (orphan_num)
-   set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+   __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
else
-   clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
+   __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 
if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
-   set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+   __set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+   spin_unlock(>cp_lock);
 
/* update SIT/NAT bitmap */
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 53da455..7803808 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -817,6 +817,7 @@ struct f2fs_sb_info {
 
/* for checkpoint */
struct f2fs_checkpoint *ckpt;   /* raw checkpoint pointer */
+   spinlock_t cp_lock; /* for flag in ckpt */
struct inode *meta_inode;   /* cache meta blocks */
struct mutex cp_mutex;  /* checkpoint procedure lock */
struct rw_semaphore cp_rwsem;   /* blocking FS operations */
@@ -1084,26 +1085,46 @@ static inline unsigned long long cur_cp_version(struct 
f2fs_checkpoint *cp)
return le64_to_cpu(cp->checkpoint_ver);
 }
 
-static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int 
f)
+static inline bool is_set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f)
 {
+   struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+
return ckpt_flags & f;
 }
 
-static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
+static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f)
 {
-   unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags);
+   unsigned int ckpt_flags;
+

[PATCH 0/3] iMX7 PWM polarity control

2016-09-19 Thread Bhuvanchandra DV
This patchset depends on this patchset[1] and this patch[2] which adds support
for polarity control in imx-pwm driver.

- Use pwm polarity control on iMX7 based modules.
- Enable polarity control on Toradex Colibri iMX7D/S module.
- Add BL_ON GPIO control for Toradex Colibri iMX7D/S module.

[1] 
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/294027.html
[2] https://www.spinics.net/lists/arm-kernel/msg530818.html

Bhuvanchandra DV (3):
  arm: dts: imx7: Update #pwm-cells for PWM polarity control
  arm: dts: imx7-colibri: Use pwm polarity control
  arm: dts: imx7-colibri: Use enable-gpios for BL_ON

 Documentation/devicetree/bindings/pwm/imx-pwm.txt |  6 +++---
 arch/arm/boot/dts/imx7-colibri.dtsi   | 12 ++--
 arch/arm/boot/dts/imx7s.dtsi  |  8 
 3 files changed, 17 insertions(+), 9 deletions(-)

-- 
2.9.2



[PATCH 0/3] iMX7 PWM polarity control

2016-09-19 Thread Bhuvanchandra DV
This patchset depends on this patchset[1] and this patch[2] which adds support
for polarity control in imx-pwm driver.

- Use pwm polarity control on iMX7 based modules.
- Enable polarity control on Toradex Colibri iMX7D/S module.
- Add BL_ON GPIO control for Toradex Colibri iMX7D/S module.

[1] 
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/294027.html
[2] https://www.spinics.net/lists/arm-kernel/msg530818.html

Bhuvanchandra DV (3):
  arm: dts: imx7: Update #pwm-cells for PWM polarity control
  arm: dts: imx7-colibri: Use pwm polarity control
  arm: dts: imx7-colibri: Use enable-gpios for BL_ON

 Documentation/devicetree/bindings/pwm/imx-pwm.txt |  6 +++---
 arch/arm/boot/dts/imx7-colibri.dtsi   | 12 ++--
 arch/arm/boot/dts/imx7s.dtsi  |  8 
 3 files changed, 17 insertions(+), 9 deletions(-)

-- 
2.9.2



[PATCH 2/2] f2fs: put directory inodes before checkpoint in roll-forward recovery

2016-09-19 Thread Jaegeuk Kim
Before checkpoint, we'd be better drop any inodes.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/recovery.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 2b8a56d..509273a 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -630,6 +630,9 @@ out:
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
mutex_unlock(>cp_mutex);
 
+   /* let's drop all the directory inodes for clean checkpoint */
+   destroy_fsync_dnodes(_list);
+
if (!err && need_writecp) {
struct cp_control cpc = {
.reason = CP_RECOVERY,
@@ -637,7 +640,6 @@ out:
err = write_checkpoint(sbi, );
}
 
-   destroy_fsync_dnodes(_list);
kmem_cache_destroy(fsync_entry_slab);
return ret ? ret: err;
 }
-- 
2.8.3



Re: [PATCH RESEND] drm/ast: free correct pointer in astfb_create() error paths

2016-09-19 Thread Andrew Donnellan

On 20/09/16 11:56, Andrew Donnellan wrote:

In the err_free_vram and err_release_fbi error paths in astfb_create(), we
attempt to free afbdev->sysram. The only jumps to these error paths occur
before we assign afbdev->sysram = sysram. Free sysram instead.

Signed-off-by: Andrew Donnellan 

---

Found by Coverity Scan. Compile tested only.

Resending as it looks like this didn't hit dri-devel.


It did show up after all :)

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



[PATCH 2/2] f2fs: put directory inodes before checkpoint in roll-forward recovery

2016-09-19 Thread Jaegeuk Kim
Before checkpoint, we'd be better drop any inodes.

Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/recovery.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 2b8a56d..509273a 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -630,6 +630,9 @@ out:
set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
mutex_unlock(>cp_mutex);
 
+   /* let's drop all the directory inodes for clean checkpoint */
+   destroy_fsync_dnodes(_list);
+
if (!err && need_writecp) {
struct cp_control cpc = {
.reason = CP_RECOVERY,
@@ -637,7 +640,6 @@ out:
err = write_checkpoint(sbi, );
}
 
-   destroy_fsync_dnodes(_list);
kmem_cache_destroy(fsync_entry_slab);
return ret ? ret: err;
 }
-- 
2.8.3



Re: [PATCH RESEND] drm/ast: free correct pointer in astfb_create() error paths

2016-09-19 Thread Andrew Donnellan

On 20/09/16 11:56, Andrew Donnellan wrote:

In the err_free_vram and err_release_fbi error paths in astfb_create(), we
attempt to free afbdev->sysram. The only jumps to these error paths occur
before we assign afbdev->sysram = sysram. Free sysram instead.

Signed-off-by: Andrew Donnellan 

---

Found by Coverity Scan. Compile tested only.

Resending as it looks like this didn't hit dri-devel.


It did show up after all :)

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



  1   2   3   4   5   6   7   8   9   10   >