date:20161124

Re: [PATCH] auxdisplay: ht16k33: select CONFIG_FB_SYS_FOPS

2016-11-24 Thread Robin van der Gracht

Hi Arnd,

On Wed, 23 Nov 2016 14:06:49 +0100
Arnd Bergmann  wrote:

> The new driver caused a rare randconfig failure:
> 
> drivers/auxdisplay/ht16k33.o:(.data.ht16k33_fb_ops+0xc): undefined reference 
> to `fb_sys_read'
> drivers/auxdisplay/ht16k33.o:(.data.ht16k33_fb_ops+0x10): undefined reference 
> to `fb_sys_write'
> 
> This selects the respective helper module, like all other
> such drivers do.

Thanks for reporting this. You are right about the missing helper.
However, the fb_ops struct uses several helpers which are all missing.

static struct fb_ops ht16k33_fb_ops = {
.owner = THIS_MODULE,
.fb_read = fb_sys_read,
.fb_write = fb_sys_write,
.fb_fillrect = sys_fillrect,
.fb_copyarea = sys_copyarea,
.fb_imageblit = sys_imageblit,
.fb_mmap = ht16k33_mmap,
};

HT16K33 should also select:
FB_CFB_FILLRECT
FB_CFB_COPYAREA
FB_CFB_IMAGEBLIT

> 
> Fixes: 8992da44c680 ("auxdisplay: ht16k33: Driver for LED controller")
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/auxdisplay/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
> index a230ea797b92..b8bbfc64a1d1 100644
> --- a/drivers/auxdisplay/Kconfig
> +++ b/drivers/auxdisplay/Kconfig
> @@ -131,6 +131,7 @@ config IMG_ASCII_LCD
>  config HT16K33
>   tristate "Holtek Ht16K33 LED controller with keyscan"
>   depends on FB && OF && I2C && INPUT
> + select FB_SYS_FOPS
>   select INPUT_MATRIXKMAP
>   select FB_BACKLIGHT
>   help

Regards,
Robin

Re: [PATCH] auxdisplay: ht16k33: select CONFIG_FB_SYS_FOPS

2016-11-24 Thread Robin van der Gracht

Hi Arnd,

On Wed, 23 Nov 2016 14:06:49 +0100
Arnd Bergmann  wrote:

> The new driver caused a rare randconfig failure:
> 
> drivers/auxdisplay/ht16k33.o:(.data.ht16k33_fb_ops+0xc): undefined reference 
> to `fb_sys_read'
> drivers/auxdisplay/ht16k33.o:(.data.ht16k33_fb_ops+0x10): undefined reference 
> to `fb_sys_write'
> 
> This selects the respective helper module, like all other
> such drivers do.

Thanks for reporting this. You are right about the missing helper.
However, the fb_ops struct uses several helpers which are all missing.

static struct fb_ops ht16k33_fb_ops = {
.owner = THIS_MODULE,
.fb_read = fb_sys_read,
.fb_write = fb_sys_write,
.fb_fillrect = sys_fillrect,
.fb_copyarea = sys_copyarea,
.fb_imageblit = sys_imageblit,
.fb_mmap = ht16k33_mmap,
};

HT16K33 should also select:
FB_CFB_FILLRECT
FB_CFB_COPYAREA
FB_CFB_IMAGEBLIT

> 
> Fixes: 8992da44c680 ("auxdisplay: ht16k33: Driver for LED controller")
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/auxdisplay/Kconfig | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
> index a230ea797b92..b8bbfc64a1d1 100644
> --- a/drivers/auxdisplay/Kconfig
> +++ b/drivers/auxdisplay/Kconfig
> @@ -131,6 +131,7 @@ config IMG_ASCII_LCD
>  config HT16K33
>   tristate "Holtek Ht16K33 LED controller with keyscan"
>   depends on FB && OF && I2C && INPUT
> + select FB_SYS_FOPS
>   select INPUT_MATRIXKMAP
>   select FB_BACKLIGHT
>   help

Regards,
Robin

[PATCH] regulator: twl: drop of_match_ptr

2016-11-24 Thread Nicolae Rosia

We can safely drop of_match_ptr since the driver
depends on CONFIG_OF symbol

Signed-off-by: Nicolae Rosia 
---
Depends on "regulator: twl6030: add dependency on OF".
After sending that one I realised that twl-regulator can also
have that change. You can squash these two if you like.

 drivers/regulator/twl-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/twl-regulator.c 
b/drivers/regulator/twl-regulator.c
index 6c9ec84..48c060f 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -654,7 +654,7 @@ static struct platform_driver twlreg_driver = {
 */
.driver  = {
.name  = "twl4030_reg",
-   .of_match_table = of_match_ptr(twl_of_match),
+   .of_match_table = twl_of_match,
},
 };
 
-- 
2.9.3

[PATCH] regulator: twl: drop of_match_ptr

2016-11-24 Thread Nicolae Rosia

We can safely drop of_match_ptr since the driver
depends on CONFIG_OF symbol

Signed-off-by: Nicolae Rosia 
---
Depends on "regulator: twl6030: add dependency on OF".
After sending that one I realised that twl-regulator can also
have that change. You can squash these two if you like.

 drivers/regulator/twl-regulator.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/regulator/twl-regulator.c 
b/drivers/regulator/twl-regulator.c
index 6c9ec84..48c060f 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -654,7 +654,7 @@ static struct platform_driver twlreg_driver = {
 */
.driver  = {
.name  = "twl4030_reg",
-   .of_match_table = of_match_ptr(twl_of_match),
+   .of_match_table = twl_of_match,
},
 };
 
-- 
2.9.3

Re: Multiple problems with the Linux kernel on an AMD desktop

2016-11-24 Thread Borislav Petkov

On Thu, Nov 24, 2016 at 09:39:57PM -0200, Rogério Brito wrote:
> Before I go on describing the problems that I have, I want to say that I can
> bisect the kernel, apply patches and give feedback for the problems that I
> am seeing.

Good. We're going to need them.

Please checkout lates Linus kernel:

http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git

build it, boot it on your machine, catch dmesg and send it to me.

Thanks!

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Re: Multiple problems with the Linux kernel on an AMD desktop

2016-11-24 Thread Borislav Petkov

On Thu, Nov 24, 2016 at 09:39:57PM -0200, Rogério Brito wrote:
> Before I go on describing the problems that I have, I want to say that I can
> bisect the kernel, apply patches and give feedback for the problems that I
> am seeing.

Good. We're going to need them.

Please checkout lates Linus kernel:

http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git

build it, boot it on your machine, catch dmesg and send it to me.

Thanks!

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

[PATCH] regulator: twl6030: add dependency on OF

2016-11-24 Thread Nicolae Rosia

This driver was converted to device tree only,
add dependency on OF symbol and drop of_match_ptr

Signed-off-by: Nicolae Rosia 
---
 drivers/regulator/Kconfig | 1 +
 drivers/regulator/twl6030-regulator.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 936f7cc..ef70672 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -839,6 +839,7 @@ config REGULATOR_TPS80031
 config REGULATOR_TWL4030
tristate "TI TWL4030/TWL5030/TWL6030/TPS659x0 PMIC"
depends on TWL4030_CORE
+   depends on OF
help
  This driver supports the voltage regulators provided by
  this family of companion chips.
diff --git a/drivers/regulator/twl6030-regulator.c 
b/drivers/regulator/twl6030-regulator.c
index 4864b9d..1f327d7 100644
--- a/drivers/regulator/twl6030-regulator.c
+++ b/drivers/regulator/twl6030-regulator.c
@@ -773,7 +773,7 @@ static struct platform_driver twlreg_driver = {
 */
.driver  = {
.name  = "twl6030_reg",
-   .of_match_table = of_match_ptr(twl_of_match),
+   .of_match_table = twl_of_match,
},
 };
 
-- 
2.9.3

[PATCH] regulator: twl6030: add dependency on OF

2016-11-24 Thread Nicolae Rosia

This driver was converted to device tree only,
add dependency on OF symbol and drop of_match_ptr

Signed-off-by: Nicolae Rosia 
---
 drivers/regulator/Kconfig | 1 +
 drivers/regulator/twl6030-regulator.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 936f7cc..ef70672 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -839,6 +839,7 @@ config REGULATOR_TPS80031
 config REGULATOR_TWL4030
tristate "TI TWL4030/TWL5030/TWL6030/TPS659x0 PMIC"
depends on TWL4030_CORE
+   depends on OF
help
  This driver supports the voltage regulators provided by
  this family of companion chips.
diff --git a/drivers/regulator/twl6030-regulator.c 
b/drivers/regulator/twl6030-regulator.c
index 4864b9d..1f327d7 100644
--- a/drivers/regulator/twl6030-regulator.c
+++ b/drivers/regulator/twl6030-regulator.c
@@ -773,7 +773,7 @@ static struct platform_driver twlreg_driver = {
 */
.driver  = {
.name  = "twl6030_reg",
-   .of_match_table = of_match_ptr(twl_of_match),
+   .of_match_table = twl_of_match,
},
 };
 
-- 
2.9.3

Re: [PATCH] z3fold: use %z modifier for format string

2016-11-24 Thread Vitaly Wool

Hi Joe,

On Thu, Nov 24, 2016 at 6:08 PM, Joe Perches  wrote:
> On Thu, 2016-11-24 at 17:31 +0100, Arnd Bergmann wrote:
>> Printing a size_t requires the %zd format rather than %d:
>>
>> mm/z3fold.c: In function ‘init_z3fold’:
>> include/linux/kern_levels.h:4:18: error: format ‘%d’ expects argument of 
>> type ‘int’, but argument 2 has type ‘long unsigned int’ [-Werror=format=]
>>
>> Fixes: 50a50d2676c4 ("z3fold: don't fail kernel build if z3fold_header is 
>> too big")
>> Signed-off-by: Arnd Bergmann 
>> ---
>>  mm/z3fold.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>> index e282ba073e77..66ac7a7dc934 100644
>> --- a/mm/z3fold.c
>> +++ b/mm/z3fold.c
>> @@ -884,7 +884,7 @@ static int __init init_z3fold(void)
>>  {
>>   /* Fail the initialization if z3fold header won't fit in one chunk */
>>   if (sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED) {
>> - pr_err("z3fold: z3fold_header size (%d) is bigger than "
>> + pr_err("z3fold: z3fold_header size (%zd) is bigger than "
>>   "the chunk size (%d), can't proceed\n",
>>   sizeof(struct z3fold_header) , ZHDR_SIZE_ALIGNED);
>>   return -E2BIG;
>
> The embedded "z3fold: " prefix here should be removed
> as there's a pr_fmt that also adds it.
>
> The test looks like it should be a BUILD_BUG_ON rather
> than any runtime test too.

It used to be BUILD_BUG_ON but we deliberately changed that because
sizeof(spinlock_t) gets bloated in debug builds, so it just won't
build with default CHUNK_SIZE.

~vitaly

Re: [PATCH] z3fold: use %z modifier for format string

2016-11-24 Thread Vitaly Wool

Hi Joe,

On Thu, Nov 24, 2016 at 6:08 PM, Joe Perches  wrote:
> On Thu, 2016-11-24 at 17:31 +0100, Arnd Bergmann wrote:
>> Printing a size_t requires the %zd format rather than %d:
>>
>> mm/z3fold.c: In function ‘init_z3fold’:
>> include/linux/kern_levels.h:4:18: error: format ‘%d’ expects argument of 
>> type ‘int’, but argument 2 has type ‘long unsigned int’ [-Werror=format=]
>>
>> Fixes: 50a50d2676c4 ("z3fold: don't fail kernel build if z3fold_header is 
>> too big")
>> Signed-off-by: Arnd Bergmann 
>> ---
>>  mm/z3fold.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/mm/z3fold.c b/mm/z3fold.c
>> index e282ba073e77..66ac7a7dc934 100644
>> --- a/mm/z3fold.c
>> +++ b/mm/z3fold.c
>> @@ -884,7 +884,7 @@ static int __init init_z3fold(void)
>>  {
>>   /* Fail the initialization if z3fold header won't fit in one chunk */
>>   if (sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED) {
>> - pr_err("z3fold: z3fold_header size (%d) is bigger than "
>> + pr_err("z3fold: z3fold_header size (%zd) is bigger than "
>>   "the chunk size (%d), can't proceed\n",
>>   sizeof(struct z3fold_header) , ZHDR_SIZE_ALIGNED);
>>   return -E2BIG;
>
> The embedded "z3fold: " prefix here should be removed
> as there's a pr_fmt that also adds it.
>
> The test looks like it should be a BUILD_BUG_ON rather
> than any runtime test too.

It used to be BUILD_BUG_ON but we deliberately changed that because
sizeof(spinlock_t) gets bloated in debug builds, so it just won't
build with default CHUNK_SIZE.

~vitaly

Re: [PATCH 3/6] dax: add tracepoint infrastructure, PMD tracing

2016-11-24 Thread Al Viro

On Fri, Nov 25, 2016 at 06:06:42PM +1100, Dave Chinner wrote:

> > Tell that to Linus.  You had been in the room, IIRC, when that had been
> > brought up this year in Santa Fe.
> 
> No, I wasn't at KS or plumbers, so this is all news to me.

Sorry, thought you had been at KS ;-/  My apologies...

[snip bloody good points I fully agree with]

> I understand why there is a desire for stable tracepoints, and
> that's why I suggested that there should be an in-kernel API to
> declare stable tracepoints. That way we can have the best of both
> worlds - tracepoints that applications need to be stable can be
> declared, reviewed and explicitly marked as stable in full knowledge
> of what that implies. The rest of the vast body of tracepoints can
> be left as mutable with no stability or existence guarantees so that
> developers can continue to treat them in a way that best suits
> problem diagnosis without compromising the future development of the
> code being traced. If userspace finds some of those tracepoints
> useful, then they can be taken through the process of making them
> into a maintainable stable form and being marked as such.

My impression is that nobody (at least kernel-side) wants them to be
a stable ABI, so long as nobody in userland screams about their code
being broken, everything is fine.  As usual, if nobody notices an ABI
change, it hasn't happened.  The question is what happens when somebody
does.

> We already have distros mounting the tracing subsystem on
> /sys/kernel/tracing. Expose all the stable tracepoints there, and
> leave all the other tracepoints under /sys/kernel/debug/tracing.
> Simple, clear separation between stable and mutable diagnostic
> tracepoints for users, combined with a simple, clear in-kernel API
> and process for making tracepoints stable

Yep.  That kind of separation would be my preference as well - ideally,
with review for stable ones being a lot less casual that for unstable;
AFAICS what happens now is that we have no mechanisms for marking them as
stable or unstable and everything keeps going on hope that nobody will
cause a mess by creating such a userland dependency.  So far it's been mostly
working, but as the set of tracepoints (and their use) gets wider and wider,
IMO it's only matter of time until we get seriously screwed that way.

Basically, we are gambling on the next one to be cast in stone by userland
dependency being sane enough to make it possible to maintain it indefinitely
and I don't like the odds.

Re: [PATCH 3/6] dax: add tracepoint infrastructure, PMD tracing

2016-11-24 Thread Al Viro

On Fri, Nov 25, 2016 at 06:06:42PM +1100, Dave Chinner wrote:

> > Tell that to Linus.  You had been in the room, IIRC, when that had been
> > brought up this year in Santa Fe.
> 
> No, I wasn't at KS or plumbers, so this is all news to me.

Sorry, thought you had been at KS ;-/  My apologies...

[snip bloody good points I fully agree with]

> I understand why there is a desire for stable tracepoints, and
> that's why I suggested that there should be an in-kernel API to
> declare stable tracepoints. That way we can have the best of both
> worlds - tracepoints that applications need to be stable can be
> declared, reviewed and explicitly marked as stable in full knowledge
> of what that implies. The rest of the vast body of tracepoints can
> be left as mutable with no stability or existence guarantees so that
> developers can continue to treat them in a way that best suits
> problem diagnosis without compromising the future development of the
> code being traced. If userspace finds some of those tracepoints
> useful, then they can be taken through the process of making them
> into a maintainable stable form and being marked as such.

My impression is that nobody (at least kernel-side) wants them to be
a stable ABI, so long as nobody in userland screams about their code
being broken, everything is fine.  As usual, if nobody notices an ABI
change, it hasn't happened.  The question is what happens when somebody
does.

> We already have distros mounting the tracing subsystem on
> /sys/kernel/tracing. Expose all the stable tracepoints there, and
> leave all the other tracepoints under /sys/kernel/debug/tracing.
> Simple, clear separation between stable and mutable diagnostic
> tracepoints for users, combined with a simple, clear in-kernel API
> and process for making tracepoints stable

Yep.  That kind of separation would be my preference as well - ideally,
with review for stable ones being a lot less casual that for unstable;
AFAICS what happens now is that we have no mechanisms for marking them as
stable or unstable and everything keeps going on hope that nobody will
cause a mess by creating such a userland dependency.  So far it's been mostly
working, but as the set of tracepoints (and their use) gets wider and wider,
IMO it's only matter of time until we get seriously screwed that way.

Basically, we are gambling on the next one to be cast in stone by userland
dependency being sane enough to make it possible to maintain it indefinitely
and I don't like the odds.

Re: [PATCH] z3fold: use %z modifier for format string

2016-11-24 Thread Vitaly Wool

Hi Arnd,

On Thu, Nov 24, 2016 at 5:31 PM, Arnd Bergmann  wrote:
> Printing a size_t requires the %zd format rather than %d:
>
> mm/z3fold.c: In function ‘init_z3fold’:
> include/linux/kern_levels.h:4:18: error: format ‘%d’ expects argument of type 
> ‘int’, but argument 2 has type ‘long unsigned int’ [-Werror=format=]
>
> Fixes: 50a50d2676c4 ("z3fold: don't fail kernel build if z3fold_header is too 
> big")
> Signed-off-by: Arnd Bergmann 

Acked-by: Vitaly Wool 

And thanks :)

~vitaly

> ---
>  mm/z3fold.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index e282ba073e77..66ac7a7dc934 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -884,7 +884,7 @@ static int __init init_z3fold(void)
>  {
> /* Fail the initialization if z3fold header won't fit in one chunk */
> if (sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED) {
> -   pr_err("z3fold: z3fold_header size (%d) is bigger than "
> +   pr_err("z3fold: z3fold_header size (%zd) is bigger than "
> "the chunk size (%d), can't proceed\n",
> sizeof(struct z3fold_header) , ZHDR_SIZE_ALIGNED);
> return -E2BIG;
> --
> 2.9.0
>

Re: [PATCH] z3fold: use %z modifier for format string

2016-11-24 Thread Vitaly Wool

Hi Arnd,

On Thu, Nov 24, 2016 at 5:31 PM, Arnd Bergmann  wrote:
> Printing a size_t requires the %zd format rather than %d:
>
> mm/z3fold.c: In function ‘init_z3fold’:
> include/linux/kern_levels.h:4:18: error: format ‘%d’ expects argument of type 
> ‘int’, but argument 2 has type ‘long unsigned int’ [-Werror=format=]
>
> Fixes: 50a50d2676c4 ("z3fold: don't fail kernel build if z3fold_header is too 
> big")
> Signed-off-by: Arnd Bergmann 

Acked-by: Vitaly Wool 

And thanks :)

~vitaly

> ---
>  mm/z3fold.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/mm/z3fold.c b/mm/z3fold.c
> index e282ba073e77..66ac7a7dc934 100644
> --- a/mm/z3fold.c
> +++ b/mm/z3fold.c
> @@ -884,7 +884,7 @@ static int __init init_z3fold(void)
>  {
> /* Fail the initialization if z3fold header won't fit in one chunk */
> if (sizeof(struct z3fold_header) > ZHDR_SIZE_ALIGNED) {
> -   pr_err("z3fold: z3fold_header size (%d) is bigger than "
> +   pr_err("z3fold: z3fold_header size (%zd) is bigger than "
> "the chunk size (%d), can't proceed\n",
> sizeof(struct z3fold_header) , ZHDR_SIZE_ALIGNED);
> return -E2BIG;
> --
> 2.9.0
>

Re: [PATCH] PM / OPP: fix CPU device to be removed from OPP table in wrong order

2016-11-24 Thread Joonyoung Shim

Hi Viresh,

On 11/25/2016 03:57 PM, Viresh Kumar wrote:
> On 25-11-16, 10:54, Joonyoung Shim wrote:
>> I found this problem during system suspend/resume of Odroid-XU3 board.
>>
>> # rtcwake -m mem -s 3
>> wakeup from "mem" at Wed Apr  4 05:54:44 2001
>> [   15.965996] PM: Syncing filesystems ... done.
>> [   15.976333] Freezing user space processes ... (elapsed 0.002 seconds) 
>> done.
>> [   15.983287] Freezing remaining freezable tasks ... (elapsed 0.002 
>> seconds) done.
>> [   16.006951] wake enabled for irq 135
>> [   16.008782] smsc95xx 1-1.1:1.0 eth0: entering SUSPEND2 mode
>> [   16.094110] PM: suspend of devices complete after 95.038 msecs
>> [   16.105648] PM: late suspend of devices complete after 6.903 msecs
>> [   16.116356] PM: noirq suspend of devices complete after 5.912 msecs
>> [   16.121213] Disabling non-boot CPUs ...
>> [   16.154140] IRQ51 no longer affine to CPU1
>> [   16.154709] CPU1: shutdown
>> [   16.214148] IRQ52 no longer affine to CPU2
>> [   16.214646] CPU2: shutdown
>> [   16.273980] IRQ53 no longer affine to CPU3
>> [   16.274458] CPU3: shutdown
>> [   16.335093] IRQ54 no longer affine to CPU4
>> [   16.336033] CPU4: shutdown
>> [   16.389979] IRQ55 no longer affine to CPU5
>> [   16.390823] CPU5: shutdown
>> [   16.444829] IRQ56 no longer affine to CPU6
>> [   16.445621] CPU6: shutdown
>> [   16.509229] cpu cpu4: Failed to find opp_table: -19
>> [   16.514008] IRQ57 no longer affine to CPU7
>> [   16.514824] CPU7: shutdown
> 
> Hi,
> 
> Yes you have found a real bug it seems. I think that can be reproduced
> by a simple rmmmod of cpufreq-dt.ko module as well.
> 
> Though the solution you provided isn't good enough.
> 
> Consider for example a case where you do this:
> - offline CPU 4
> - suspend the system
> 
> You are going to get stuck in the exact same problem again.
> 
> I have sent a separate patch and cc'd you. Can you please verify that
> it works for you ?
> 

Thanks for the patch, i tested it and it is working well.

Thanks.

Re: [PATCH] PM / OPP: fix CPU device to be removed from OPP table in wrong order

2016-11-24 Thread Joonyoung Shim

Hi Viresh,

On 11/25/2016 03:57 PM, Viresh Kumar wrote:
> On 25-11-16, 10:54, Joonyoung Shim wrote:
>> I found this problem during system suspend/resume of Odroid-XU3 board.
>>
>> # rtcwake -m mem -s 3
>> wakeup from "mem" at Wed Apr  4 05:54:44 2001
>> [   15.965996] PM: Syncing filesystems ... done.
>> [   15.976333] Freezing user space processes ... (elapsed 0.002 seconds) 
>> done.
>> [   15.983287] Freezing remaining freezable tasks ... (elapsed 0.002 
>> seconds) done.
>> [   16.006951] wake enabled for irq 135
>> [   16.008782] smsc95xx 1-1.1:1.0 eth0: entering SUSPEND2 mode
>> [   16.094110] PM: suspend of devices complete after 95.038 msecs
>> [   16.105648] PM: late suspend of devices complete after 6.903 msecs
>> [   16.116356] PM: noirq suspend of devices complete after 5.912 msecs
>> [   16.121213] Disabling non-boot CPUs ...
>> [   16.154140] IRQ51 no longer affine to CPU1
>> [   16.154709] CPU1: shutdown
>> [   16.214148] IRQ52 no longer affine to CPU2
>> [   16.214646] CPU2: shutdown
>> [   16.273980] IRQ53 no longer affine to CPU3
>> [   16.274458] CPU3: shutdown
>> [   16.335093] IRQ54 no longer affine to CPU4
>> [   16.336033] CPU4: shutdown
>> [   16.389979] IRQ55 no longer affine to CPU5
>> [   16.390823] CPU5: shutdown
>> [   16.444829] IRQ56 no longer affine to CPU6
>> [   16.445621] CPU6: shutdown
>> [   16.509229] cpu cpu4: Failed to find opp_table: -19
>> [   16.514008] IRQ57 no longer affine to CPU7
>> [   16.514824] CPU7: shutdown
> 
> Hi,
> 
> Yes you have found a real bug it seems. I think that can be reproduced
> by a simple rmmmod of cpufreq-dt.ko module as well.
> 
> Though the solution you provided isn't good enough.
> 
> Consider for example a case where you do this:
> - offline CPU 4
> - suspend the system
> 
> You are going to get stuck in the exact same problem again.
> 
> I have sent a separate patch and cc'd you. Can you please verify that
> it works for you ?
> 

Thanks for the patch, i tested it and it is working well.

Thanks.

Re: [PATCH] PM / OPP: Allow inactive opp_device to be present in dev list

2016-11-24 Thread Joonyoung Shim

On 11/25/2016 03:53 PM, Viresh Kumar wrote:
> Joonyoung Shim reported an interesting problem on his ARM octa-core
> Odoroid-XU3 platform. During system suspend, dev_pm_opp_put_regulator()
> was failing for a struct device for which dev_pm_opp_set_regulator() is
> called earlier.
> 
> This happened because an earlier call to
> dev_pm_opp_of_cpumask_remove_table() function (from cpufreq-dt.c file)
> removed all the entries from opp_table->dev_list apart from the last CPU
> device in the cpumask of CPUs sharing the OPP.
> 
> But both dev_pm_opp_set_regulator() and dev_pm_opp_put_regulator()
> routines get CPU device for the first CPU in the cpumask. And so the OPP
> core failed to find the OPP table for the struct device.
> 
> This patch attempts to fix this problem by adding another field in the
> struct opp_device: inactive.
> 
> Instead of removing the entries from the list during
> dev_pm_opp_of_cpumask_remove_table() function call, we mark them as
> inactive. Such inactive devices will not be used by the core in most of
> the cases, like before, but will be used only at special places which
> need to take inactive devices into account.
> 
> All the devices are removed from the list together now and that happens
> only when the opp_table gets destroyed.
> 
> This patch is tested on Dual A15, Exynos5250 platform by compiling the
> cpufreq-dt driver as a module. The module is inserted/removed multiple
> times with combinations of CPU offline/online steps.
> 
> Signed-off-by: Viresh Kumar 

It's working well during system suspend/resume on my Odroid-XU3 board.

Tested-by: Joonyoung Shim 

Thanks.

Re: [PATCH] PM / OPP: Allow inactive opp_device to be present in dev list

2016-11-24 Thread Joonyoung Shim

On 11/25/2016 03:53 PM, Viresh Kumar wrote:
> Joonyoung Shim reported an interesting problem on his ARM octa-core
> Odoroid-XU3 platform. During system suspend, dev_pm_opp_put_regulator()
> was failing for a struct device for which dev_pm_opp_set_regulator() is
> called earlier.
> 
> This happened because an earlier call to
> dev_pm_opp_of_cpumask_remove_table() function (from cpufreq-dt.c file)
> removed all the entries from opp_table->dev_list apart from the last CPU
> device in the cpumask of CPUs sharing the OPP.
> 
> But both dev_pm_opp_set_regulator() and dev_pm_opp_put_regulator()
> routines get CPU device for the first CPU in the cpumask. And so the OPP
> core failed to find the OPP table for the struct device.
> 
> This patch attempts to fix this problem by adding another field in the
> struct opp_device: inactive.
> 
> Instead of removing the entries from the list during
> dev_pm_opp_of_cpumask_remove_table() function call, we mark them as
> inactive. Such inactive devices will not be used by the core in most of
> the cases, like before, but will be used only at special places which
> need to take inactive devices into account.
> 
> All the devices are removed from the list together now and that happens
> only when the opp_table gets destroyed.
> 
> This patch is tested on Dual A15, Exynos5250 platform by compiling the
> cpufreq-dt driver as a module. The module is inserted/removed multiple
> times with combinations of CPU offline/online steps.
> 
> Signed-off-by: Viresh Kumar 

It's working well during system suspend/resume on my Odroid-XU3 board.

Tested-by: Joonyoung Shim 

Thanks.

Re: automatic IRQ affinity for virtio

2016-11-24 Thread Christoph Hellwig

Btw, what's the best way to get any response to this series?
But this and the predecessor seem to have completly fallen on deaf
ears.

Re: automatic IRQ affinity for virtio

2016-11-24 Thread Christoph Hellwig

Btw, what's the best way to get any response to this series?
But this and the predecessor seem to have completly fallen on deaf
ears.

Re: [PATCH 3/6] dax: add tracepoint infrastructure, PMD tracing

2016-11-24 Thread Dave Chinner

On Fri, Nov 25, 2016 at 04:14:19AM +, Al Viro wrote:
> [Linus Cc'd]
> 
> On Fri, Nov 25, 2016 at 01:49:18PM +1100, Dave Chinner wrote:
> > > they have become parts of stable userland ABI and are to be maintained
> > > indefinitely.  Don't expect "tracepoints are special case" to prevent 
> > > that.
> > 
> > I call bullshit just like I always do when someone spouts this
> > "tracepoints are stable ABI" garbage.
> 
> > Quite frankly, anyone that wants to stop us from
> > adding/removing/changing tracepoints or the code that they are
> > reporting information about "because ABI" can go take a long walk
> > off a short cliff.  Diagnostic tracepoints are not part of the
> > stable ABI. End of story.
> 
> Tell that to Linus.  You had been in the room, IIRC, when that had been
> brought up this year in Santa Fe.

No, I wasn't at KS or plumbers, so this is all news to me. Beleive
me, if I was in the room when this discussion was in progress, you'd
remember it /very clearly/.

> "End of story" is not going to be
> yours (or mine, for that matter) to declare - Linus is the only one who
> can do that.  If he says "if userland code relies upon it, so that
> userland code needs to be fixed" - I'm very happy (and everyone involved
> can count upon quite a few free drinks from me at the next summit).  If
> it's "that userland code really shouldn't have relied upon it, and it's
> real unfortunate that it does, but we still get to keep it working" -
> too bad, "because ABI" is the reality and we will be the ones to take
> that long walk.

When the tracepoint infrastructure was added it was considered a
debugging tool and not stable - it was even exposed through
/sys/kernel/debug! We connected up the ~280 /debug/ tracepoints we
had in XFS at the time with the understanding it was a /diagnostic
tool/. We exposed all sorts of internal details we'd previously been
exposing with tracing through lcrash and kdb (and Irix before that)
so we could diagnose problems quickly on a running kernel.

The scope of tracepoints may have grown since then, but it does not
change the fact that many of the tracepoints that were added years
ago were done under the understanding that it was a mutable
interface and nobody could rely on any specific tracepoint detail
remaining unchanged.

We're still treating then as mutable diagnostic and debugging aids
across the kernel. In XFS, We've now got over *500* unique trace
events and *650* tracepoints; ignoring comments, *4%* of the entire
XFS kernel code base is tracing code.  We expose structure contents,
transaction states, locking algorithms, object life cycles, journal
operations, etc. All the new reverse mapping and shared data extent
code that has been merged in 4.8 and 4.9 has been extensively
exposed by tracepoints - these changes also modified a significant
number of existing tracepoints.

Put simply: every major and most minor pieces of functionality in
XFS are exposed via tracepoints.

Hence if the stable ABI tracepoint rules you've just described are
going to enforced, it will mean we will not be able to change
anything signficant in XFS because almost everything significant we
do involves changing tracepoints in some way. This leaves us with
three unacceptable choices:

1. stop developing XFS so we can maintain the stable
tracepoint ABI;

2. ignore the ABI rules and hope that Linus keeps pulling
code that obviously ignores the ABI rules; or

3. screw over our upstream/vanilla kernel users by removing
the tracepoints from Linus' tree and suck up the pain of
maintaining an out of tree patch for XFS developers and
distros so kernel tracepoint ABI rules can be ignored.

Nobody wins if these are the only choices we are being given.

I understand why there is a desire for stable tracepoints, and
that's why I suggested that there should be an in-kernel API to
declare stable tracepoints. That way we can have the best of both
worlds - tracepoints that applications need to be stable can be
declared, reviewed and explicitly marked as stable in full knowledge
of what that implies. The rest of the vast body of tracepoints can
be left as mutable with no stability or existence guarantees so that
developers can continue to treat them in a way that best suits
problem diagnosis without compromising the future development of the
code being traced. If userspace finds some of those tracepoints
useful, then they can be taken through the process of making them
into a maintainable stable form and being marked as such.

We already have distros mounting the tracing subsystem on
/sys/kernel/tracing. Expose all the stable tracepoints there, and
leave all the other tracepoints under /sys/kernel/debug/tracing.
Simple, clear separation between stable and mutable diagnostic
tracepoints for users, combined with a simple, clear in-kernel API
and process for making tracepoints stable

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com

Re: [PATCH 3/6] dax: add tracepoint infrastructure, PMD tracing

2016-11-24 Thread Dave Chinner

On Fri, Nov 25, 2016 at 04:14:19AM +, Al Viro wrote:
> [Linus Cc'd]
> 
> On Fri, Nov 25, 2016 at 01:49:18PM +1100, Dave Chinner wrote:
> > > they have become parts of stable userland ABI and are to be maintained
> > > indefinitely.  Don't expect "tracepoints are special case" to prevent 
> > > that.
> > 
> > I call bullshit just like I always do when someone spouts this
> > "tracepoints are stable ABI" garbage.
> 
> > Quite frankly, anyone that wants to stop us from
> > adding/removing/changing tracepoints or the code that they are
> > reporting information about "because ABI" can go take a long walk
> > off a short cliff.  Diagnostic tracepoints are not part of the
> > stable ABI. End of story.
> 
> Tell that to Linus.  You had been in the room, IIRC, when that had been
> brought up this year in Santa Fe.

No, I wasn't at KS or plumbers, so this is all news to me. Beleive
me, if I was in the room when this discussion was in progress, you'd
remember it /very clearly/.

> "End of story" is not going to be
> yours (or mine, for that matter) to declare - Linus is the only one who
> can do that.  If he says "if userland code relies upon it, so that
> userland code needs to be fixed" - I'm very happy (and everyone involved
> can count upon quite a few free drinks from me at the next summit).  If
> it's "that userland code really shouldn't have relied upon it, and it's
> real unfortunate that it does, but we still get to keep it working" -
> too bad, "because ABI" is the reality and we will be the ones to take
> that long walk.

When the tracepoint infrastructure was added it was considered a
debugging tool and not stable - it was even exposed through
/sys/kernel/debug! We connected up the ~280 /debug/ tracepoints we
had in XFS at the time with the understanding it was a /diagnostic
tool/. We exposed all sorts of internal details we'd previously been
exposing with tracing through lcrash and kdb (and Irix before that)
so we could diagnose problems quickly on a running kernel.

The scope of tracepoints may have grown since then, but it does not
change the fact that many of the tracepoints that were added years
ago were done under the understanding that it was a mutable
interface and nobody could rely on any specific tracepoint detail
remaining unchanged.

We're still treating then as mutable diagnostic and debugging aids
across the kernel. In XFS, We've now got over *500* unique trace
events and *650* tracepoints; ignoring comments, *4%* of the entire
XFS kernel code base is tracing code.  We expose structure contents,
transaction states, locking algorithms, object life cycles, journal
operations, etc. All the new reverse mapping and shared data extent
code that has been merged in 4.8 and 4.9 has been extensively
exposed by tracepoints - these changes also modified a significant
number of existing tracepoints.

Put simply: every major and most minor pieces of functionality in
XFS are exposed via tracepoints.

Hence if the stable ABI tracepoint rules you've just described are
going to enforced, it will mean we will not be able to change
anything signficant in XFS because almost everything significant we
do involves changing tracepoints in some way. This leaves us with
three unacceptable choices:

1. stop developing XFS so we can maintain the stable
tracepoint ABI;

2. ignore the ABI rules and hope that Linus keeps pulling
code that obviously ignores the ABI rules; or

3. screw over our upstream/vanilla kernel users by removing
the tracepoints from Linus' tree and suck up the pain of
maintaining an out of tree patch for XFS developers and
distros so kernel tracepoint ABI rules can be ignored.

Nobody wins if these are the only choices we are being given.

I understand why there is a desire for stable tracepoints, and
that's why I suggested that there should be an in-kernel API to
declare stable tracepoints. That way we can have the best of both
worlds - tracepoints that applications need to be stable can be
declared, reviewed and explicitly marked as stable in full knowledge
of what that implies. The rest of the vast body of tracepoints can
be left as mutable with no stability or existence guarantees so that
developers can continue to treat them in a way that best suits
problem diagnosis without compromising the future development of the
code being traced. If userspace finds some of those tracepoints
useful, then they can be taken through the process of making them
into a maintainable stable form and being marked as such.

We already have distros mounting the tracing subsystem on
/sys/kernel/tracing. Expose all the stable tracepoints there, and
leave all the other tracepoints under /sys/kernel/debug/tracing.
Simple, clear separation between stable and mutable diagnostic
tracepoints for users, combined with a simple, clear in-kernel API
and process for making tracepoints stable

Cheers,

Dave.
-- 
Dave Chinner
da...@fromorbit.com

Re: [PATCH] PM / OPP: fix CPU device to be removed from OPP table in wrong order

2016-11-24 Thread Viresh Kumar

On 25-11-16, 10:54, Joonyoung Shim wrote:
> I found this problem during system suspend/resume of Odroid-XU3 board.
> 
> # rtcwake -m mem -s 3
> wakeup from "mem" at Wed Apr  4 05:54:44 2001
> [   15.965996] PM: Syncing filesystems ... done.
> [   15.976333] Freezing user space processes ... (elapsed 0.002 seconds) done.
> [   15.983287] Freezing remaining freezable tasks ... (elapsed 0.002 seconds) 
> done.
> [   16.006951] wake enabled for irq 135
> [   16.008782] smsc95xx 1-1.1:1.0 eth0: entering SUSPEND2 mode
> [   16.094110] PM: suspend of devices complete after 95.038 msecs
> [   16.105648] PM: late suspend of devices complete after 6.903 msecs
> [   16.116356] PM: noirq suspend of devices complete after 5.912 msecs
> [   16.121213] Disabling non-boot CPUs ...
> [   16.154140] IRQ51 no longer affine to CPU1
> [   16.154709] CPU1: shutdown
> [   16.214148] IRQ52 no longer affine to CPU2
> [   16.214646] CPU2: shutdown
> [   16.273980] IRQ53 no longer affine to CPU3
> [   16.274458] CPU3: shutdown
> [   16.335093] IRQ54 no longer affine to CPU4
> [   16.336033] CPU4: shutdown
> [   16.389979] IRQ55 no longer affine to CPU5
> [   16.390823] CPU5: shutdown
> [   16.444829] IRQ56 no longer affine to CPU6
> [   16.445621] CPU6: shutdown
> [   16.509229] cpu cpu4: Failed to find opp_table: -19
> [   16.514008] IRQ57 no longer affine to CPU7
> [   16.514824] CPU7: shutdown

Hi,

Yes you have found a real bug it seems. I think that can be reproduced
by a simple rmmmod of cpufreq-dt.ko module as well.

Though the solution you provided isn't good enough.

Consider for example a case where you do this:
- offline CPU 4
- suspend the system

You are going to get stuck in the exact same problem again.

I have sent a separate patch and cc'd you. Can you please verify that
it works for you ?

-- 
viresh

Re: [PATCH] PM / OPP: fix CPU device to be removed from OPP table in wrong order

2016-11-24 Thread Viresh Kumar

On 25-11-16, 10:54, Joonyoung Shim wrote:
> I found this problem during system suspend/resume of Odroid-XU3 board.
> 
> # rtcwake -m mem -s 3
> wakeup from "mem" at Wed Apr  4 05:54:44 2001
> [   15.965996] PM: Syncing filesystems ... done.
> [   15.976333] Freezing user space processes ... (elapsed 0.002 seconds) done.
> [   15.983287] Freezing remaining freezable tasks ... (elapsed 0.002 seconds) 
> done.
> [   16.006951] wake enabled for irq 135
> [   16.008782] smsc95xx 1-1.1:1.0 eth0: entering SUSPEND2 mode
> [   16.094110] PM: suspend of devices complete after 95.038 msecs
> [   16.105648] PM: late suspend of devices complete after 6.903 msecs
> [   16.116356] PM: noirq suspend of devices complete after 5.912 msecs
> [   16.121213] Disabling non-boot CPUs ...
> [   16.154140] IRQ51 no longer affine to CPU1
> [   16.154709] CPU1: shutdown
> [   16.214148] IRQ52 no longer affine to CPU2
> [   16.214646] CPU2: shutdown
> [   16.273980] IRQ53 no longer affine to CPU3
> [   16.274458] CPU3: shutdown
> [   16.335093] IRQ54 no longer affine to CPU4
> [   16.336033] CPU4: shutdown
> [   16.389979] IRQ55 no longer affine to CPU5
> [   16.390823] CPU5: shutdown
> [   16.444829] IRQ56 no longer affine to CPU6
> [   16.445621] CPU6: shutdown
> [   16.509229] cpu cpu4: Failed to find opp_table: -19
> [   16.514008] IRQ57 no longer affine to CPU7
> [   16.514824] CPU7: shutdown

Hi,

Yes you have found a real bug it seems. I think that can be reproduced
by a simple rmmmod of cpufreq-dt.ko module as well.

Though the solution you provided isn't good enough.

Consider for example a case where you do this:
- offline CPU 4
- suspend the system

You are going to get stuck in the exact same problem again.

I have sent a separate patch and cc'd you. Can you please verify that
it works for you ?

-- 
viresh

Re: [PATCH] PM / OPP: Allow inactive opp_device to be present in dev list

2016-11-24 Thread Viresh Kumar

On 25-11-16, 12:23, Viresh Kumar wrote:
> Joonyoung Shim reported an interesting problem on his ARM octa-core
> Odoroid-XU3 platform. During system suspend, dev_pm_opp_put_regulator()
> was failing for a struct device for which dev_pm_opp_set_regulator() is
> called earlier.
> 
> This happened because an earlier call to
> dev_pm_opp_of_cpumask_remove_table() function (from cpufreq-dt.c file)
> removed all the entries from opp_table->dev_list apart from the last CPU
> device in the cpumask of CPUs sharing the OPP.
> 
> But both dev_pm_opp_set_regulator() and dev_pm_opp_put_regulator()
> routines get CPU device for the first CPU in the cpumask. And so the OPP
> core failed to find the OPP table for the struct device.
> 
> This patch attempts to fix this problem by adding another field in the
> struct opp_device: inactive.
> 
> Instead of removing the entries from the list during
> dev_pm_opp_of_cpumask_remove_table() function call, we mark them as
> inactive. Such inactive devices will not be used by the core in most of
> the cases, like before, but will be used only at special places which
> need to take inactive devices into account.
> 
> All the devices are removed from the list together now and that happens
> only when the opp_table gets destroyed.
> 
> This patch is tested on Dual A15, Exynos5250 platform by compiling the
> cpufreq-dt driver as a module. The module is inserted/removed multiple
> times with combinations of CPU offline/online steps.
> 
> Signed-off-by: Viresh Kumar 

@Rafael: Can you please add following while applying the patch ?

Cc:  # v4.4+

Somehow git send-email wasn't working properly for me as it was trying
to cc sta...@vger.kernel.org#v4.4+ and that was failing. I tried lots
of options including suppress-cc but nothing worked :(

-- 
viresh

Re: [PATCH] PM / OPP: Allow inactive opp_device to be present in dev list

2016-11-24 Thread Viresh Kumar

On 25-11-16, 12:23, Viresh Kumar wrote:
> Joonyoung Shim reported an interesting problem on his ARM octa-core
> Odoroid-XU3 platform. During system suspend, dev_pm_opp_put_regulator()
> was failing for a struct device for which dev_pm_opp_set_regulator() is
> called earlier.
> 
> This happened because an earlier call to
> dev_pm_opp_of_cpumask_remove_table() function (from cpufreq-dt.c file)
> removed all the entries from opp_table->dev_list apart from the last CPU
> device in the cpumask of CPUs sharing the OPP.
> 
> But both dev_pm_opp_set_regulator() and dev_pm_opp_put_regulator()
> routines get CPU device for the first CPU in the cpumask. And so the OPP
> core failed to find the OPP table for the struct device.
> 
> This patch attempts to fix this problem by adding another field in the
> struct opp_device: inactive.
> 
> Instead of removing the entries from the list during
> dev_pm_opp_of_cpumask_remove_table() function call, we mark them as
> inactive. Such inactive devices will not be used by the core in most of
> the cases, like before, but will be used only at special places which
> need to take inactive devices into account.
> 
> All the devices are removed from the list together now and that happens
> only when the opp_table gets destroyed.
> 
> This patch is tested on Dual A15, Exynos5250 platform by compiling the
> cpufreq-dt driver as a module. The module is inserted/removed multiple
> times with combinations of CPU offline/online steps.
> 
> Signed-off-by: Viresh Kumar 

@Rafael: Can you please add following while applying the patch ?

Cc:  # v4.4+

Somehow git send-email wasn't working properly for me as it was trying
to cc sta...@vger.kernel.org#v4.4+ and that was failing. I tried lots
of options including suppress-cc but nothing worked :(

-- 
viresh

[PATCH] PM / OPP: Allow inactive opp_device to be present in dev list

2016-11-24 Thread Viresh Kumar

Joonyoung Shim reported an interesting problem on his ARM octa-core
Odoroid-XU3 platform. During system suspend, dev_pm_opp_put_regulator()
was failing for a struct device for which dev_pm_opp_set_regulator() is
called earlier.

This happened because an earlier call to
dev_pm_opp_of_cpumask_remove_table() function (from cpufreq-dt.c file)
removed all the entries from opp_table->dev_list apart from the last CPU
device in the cpumask of CPUs sharing the OPP.

But both dev_pm_opp_set_regulator() and dev_pm_opp_put_regulator()
routines get CPU device for the first CPU in the cpumask. And so the OPP
core failed to find the OPP table for the struct device.

This patch attempts to fix this problem by adding another field in the
struct opp_device: inactive.

Instead of removing the entries from the list during
dev_pm_opp_of_cpumask_remove_table() function call, we mark them as
inactive. Such inactive devices will not be used by the core in most of
the cases, like before, but will be used only at special places which
need to take inactive devices into account.

All the devices are removed from the list together now and that happens
only when the opp_table gets destroyed.

This patch is tested on Dual A15, Exynos5250 platform by compiling the
cpufreq-dt driver as a module. The module is inserted/removed multiple
times with combinations of CPU offline/online steps.

Signed-off-by: Viresh Kumar 
---
 drivers/base/power/opp/core.c| 156 ++-
 drivers/base/power/opp/cpu.c |   4 +-
 drivers/base/power/opp/debugfs.c |   4 +-
 drivers/base/power/opp/of.c  |   2 +-
 drivers/base/power/opp/opp.h |   6 +-
 5 files changed, 116 insertions(+), 56 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 4c7c6da7a989..df3c8b3a62ea 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -40,14 +40,30 @@ do {
\
 "opp_table_lock protection");  \
 } while (0)
 
+/**
+ * _find_opp_dev - Returns existing opp_dev for opp table
+ *
+ * @dev: Device for which opp_dev needs to be found
+ * @opp_table: OPP table.
+ * @active_only: If true: find only for active entries. If false, find for both
+ * active and inactive entries.
+ */
 static struct opp_device *_find_opp_dev(const struct device *dev,
-   struct opp_table *opp_table)
+   struct opp_table *opp_table,
+   bool active_only)
 {
struct opp_device *opp_dev;
 
-   list_for_each_entry(opp_dev, _table->dev_list, node)
-   if (opp_dev->dev == dev)
-   return opp_dev;
+   list_for_each_entry(opp_dev, _table->dev_list, node) {
+   if (opp_dev->dev != dev)
+   continue;
+
+   /* Only return active entries ? */
+   if (active_only && opp_dev->inactive)
+   return NULL;
+
+   return opp_dev;
+   }
 
return NULL;
 }
@@ -55,6 +71,8 @@ static struct opp_device *_find_opp_dev(const struct device 
*dev,
 /**
  * _find_opp_table() - find opp_table struct using device pointer
  * @dev:   device pointer used to lookup OPP table
+ * @active_only: If true: find only for active entries. If false, find for both
+ * active and inactive entries.
  *
  * Search OPP table for one containing matching device. Does a RCU reader
  * operation to grab the pointer needed.
@@ -68,7 +86,7 @@ static struct opp_device *_find_opp_dev(const struct device 
*dev,
  *
  * For Writers, this function must be called with opp_table_lock held.
  */
-struct opp_table *_find_opp_table(struct device *dev)
+struct opp_table *_find_opp_table(struct device *dev, bool active_only)
 {
struct opp_table *opp_table;
 
@@ -80,7 +98,7 @@ struct opp_table *_find_opp_table(struct device *dev)
}
 
list_for_each_entry_rcu(opp_table, _tables, node)
-   if (_find_opp_dev(dev, opp_table))
+   if (_find_opp_dev(dev, opp_table, active_only))
return opp_table;
 
return ERR_PTR(-ENODEV);
@@ -199,7 +217,7 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct 
device *dev)
 
rcu_read_lock();
 
-   opp_table = _find_opp_table(dev);
+   opp_table = _find_opp_table(dev, true);
if (IS_ERR(opp_table))
clock_latency_ns = 0;
else
@@ -229,7 +247,7 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device 
*dev)
 
rcu_read_lock();
 
-   opp_table = _find_opp_table(dev);
+   opp_table = _find_opp_table(dev, true);
if (IS_ERR(opp_table)) {
rcu_read_unlock();
return 0;
@@ -302,7 +320,7 @@ struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device 
*dev)

[PATCH] PM / OPP: Allow inactive opp_device to be present in dev list

2016-11-24 Thread Viresh Kumar

Joonyoung Shim reported an interesting problem on his ARM octa-core
Odoroid-XU3 platform. During system suspend, dev_pm_opp_put_regulator()
was failing for a struct device for which dev_pm_opp_set_regulator() is
called earlier.

This happened because an earlier call to
dev_pm_opp_of_cpumask_remove_table() function (from cpufreq-dt.c file)
removed all the entries from opp_table->dev_list apart from the last CPU
device in the cpumask of CPUs sharing the OPP.

But both dev_pm_opp_set_regulator() and dev_pm_opp_put_regulator()
routines get CPU device for the first CPU in the cpumask. And so the OPP
core failed to find the OPP table for the struct device.

This patch attempts to fix this problem by adding another field in the
struct opp_device: inactive.

Instead of removing the entries from the list during
dev_pm_opp_of_cpumask_remove_table() function call, we mark them as
inactive. Such inactive devices will not be used by the core in most of
the cases, like before, but will be used only at special places which
need to take inactive devices into account.

All the devices are removed from the list together now and that happens
only when the opp_table gets destroyed.

This patch is tested on Dual A15, Exynos5250 platform by compiling the
cpufreq-dt driver as a module. The module is inserted/removed multiple
times with combinations of CPU offline/online steps.

Signed-off-by: Viresh Kumar 
---
 drivers/base/power/opp/core.c| 156 ++-
 drivers/base/power/opp/cpu.c |   4 +-
 drivers/base/power/opp/debugfs.c |   4 +-
 drivers/base/power/opp/of.c  |   2 +-
 drivers/base/power/opp/opp.h |   6 +-
 5 files changed, 116 insertions(+), 56 deletions(-)

diff --git a/drivers/base/power/opp/core.c b/drivers/base/power/opp/core.c
index 4c7c6da7a989..df3c8b3a62ea 100644
--- a/drivers/base/power/opp/core.c
+++ b/drivers/base/power/opp/core.c
@@ -40,14 +40,30 @@ do {
\
 "opp_table_lock protection");  \
 } while (0)
 
+/**
+ * _find_opp_dev - Returns existing opp_dev for opp table
+ *
+ * @dev: Device for which opp_dev needs to be found
+ * @opp_table: OPP table.
+ * @active_only: If true: find only for active entries. If false, find for both
+ * active and inactive entries.
+ */
 static struct opp_device *_find_opp_dev(const struct device *dev,
-   struct opp_table *opp_table)
+   struct opp_table *opp_table,
+   bool active_only)
 {
struct opp_device *opp_dev;
 
-   list_for_each_entry(opp_dev, _table->dev_list, node)
-   if (opp_dev->dev == dev)
-   return opp_dev;
+   list_for_each_entry(opp_dev, _table->dev_list, node) {
+   if (opp_dev->dev != dev)
+   continue;
+
+   /* Only return active entries ? */
+   if (active_only && opp_dev->inactive)
+   return NULL;
+
+   return opp_dev;
+   }
 
return NULL;
 }
@@ -55,6 +71,8 @@ static struct opp_device *_find_opp_dev(const struct device 
*dev,
 /**
  * _find_opp_table() - find opp_table struct using device pointer
  * @dev:   device pointer used to lookup OPP table
+ * @active_only: If true: find only for active entries. If false, find for both
+ * active and inactive entries.
  *
  * Search OPP table for one containing matching device. Does a RCU reader
  * operation to grab the pointer needed.
@@ -68,7 +86,7 @@ static struct opp_device *_find_opp_dev(const struct device 
*dev,
  *
  * For Writers, this function must be called with opp_table_lock held.
  */
-struct opp_table *_find_opp_table(struct device *dev)
+struct opp_table *_find_opp_table(struct device *dev, bool active_only)
 {
struct opp_table *opp_table;
 
@@ -80,7 +98,7 @@ struct opp_table *_find_opp_table(struct device *dev)
}
 
list_for_each_entry_rcu(opp_table, _tables, node)
-   if (_find_opp_dev(dev, opp_table))
+   if (_find_opp_dev(dev, opp_table, active_only))
return opp_table;
 
return ERR_PTR(-ENODEV);
@@ -199,7 +217,7 @@ unsigned long dev_pm_opp_get_max_clock_latency(struct 
device *dev)
 
rcu_read_lock();
 
-   opp_table = _find_opp_table(dev);
+   opp_table = _find_opp_table(dev, true);
if (IS_ERR(opp_table))
clock_latency_ns = 0;
else
@@ -229,7 +247,7 @@ unsigned long dev_pm_opp_get_max_volt_latency(struct device 
*dev)
 
rcu_read_lock();
 
-   opp_table = _find_opp_table(dev);
+   opp_table = _find_opp_table(dev, true);
if (IS_ERR(opp_table)) {
rcu_read_unlock();
return 0;
@@ -302,7 +320,7 @@ struct dev_pm_opp *dev_pm_opp_get_suspend_opp(struct device 
*dev)
 
opp_rcu_lockdep_assert();
 
-

RE: [PATCH net 1/2] r8152: fix the sw rx checksum is unavailable

2016-11-24 Thread Hayes Wang

> Mark Lord [mailto:ml...@pobox.com]
> > Sent: Friday, November 25, 2016 12:44 AM
> [...]
> > The bad data in this case is ASCII:
> >
> >  "SRC=m3400:/ TARGET=/m340"
> >
> > This data is what is seen in /run/mount/utab, a file that is read/written 
> > over NFS
> on
> > each boot.
> >
> >  "SRC=m3400:/ TARGET=/m3400 ROOT=/
> > ATTRS=nolock,addr=192.168.8.1\n"
> >
> > But how does this ASCII data end up at offset zero of the rx buffer??
> > Not possible -- this isn't even stale data, because only an rx_desc could
> > be at that offset in that buffer.
> >
> > So even if this were a platform memory coherency issue, one should still
> > never see ASCII data at the beginning of an rx buffer.  The driver NEVER
> > writes anything to the rx buffers.  Only the USB hardware ever does.
> >
> > And only the r8152 dongle/driver exhibits this issue.
> > Other USB dongles do not.  They *might* still have such issues,
> > but because they use software checksums, the bad packets are 
> > caught/rejected.
> 
> Do you test it by rebooting? Maybe you could try a patch
> commit 93fe9b183840 ("r8152: reset the bmu"). However, it should
> only occur for the first urb buffer after rx is reset. I don't
> think you would reset the rx frequently, so the situation seems
> to be different.

Forgive me. I provide wrong information. This is about RTL8153,
not RTL8152.

Best Regards,
Hayes

RE: [PATCH net 1/2] r8152: fix the sw rx checksum is unavailable

2016-11-24 Thread Hayes Wang

> Mark Lord [mailto:ml...@pobox.com]
> > Sent: Friday, November 25, 2016 12:44 AM
> [...]
> > The bad data in this case is ASCII:
> >
> >  "SRC=m3400:/ TARGET=/m340"
> >
> > This data is what is seen in /run/mount/utab, a file that is read/written 
> > over NFS
> on
> > each boot.
> >
> >  "SRC=m3400:/ TARGET=/m3400 ROOT=/
> > ATTRS=nolock,addr=192.168.8.1\n"
> >
> > But how does this ASCII data end up at offset zero of the rx buffer??
> > Not possible -- this isn't even stale data, because only an rx_desc could
> > be at that offset in that buffer.
> >
> > So even if this were a platform memory coherency issue, one should still
> > never see ASCII data at the beginning of an rx buffer.  The driver NEVER
> > writes anything to the rx buffers.  Only the USB hardware ever does.
> >
> > And only the r8152 dongle/driver exhibits this issue.
> > Other USB dongles do not.  They *might* still have such issues,
> > but because they use software checksums, the bad packets are 
> > caught/rejected.
> 
> Do you test it by rebooting? Maybe you could try a patch
> commit 93fe9b183840 ("r8152: reset the bmu"). However, it should
> only occur for the first urb buffer after rx is reset. I don't
> think you would reset the rx frequently, so the situation seems
> to be different.

Forgive me. I provide wrong information. This is about RTL8153,
not RTL8152.

Best Regards,
Hayes

[tip:efi/core] efi/libstub: Make efi_random_alloc() allocate below 4 GB on 32-bit

2016-11-24 Thread tip-bot for Ard Biesheuvel

Commit-ID:  018edcfac4c3b140366ad51b0907f3becb5bb624
Gitweb: http://git.kernel.org/tip/018edcfac4c3b140366ad51b0907f3becb5bb624
Author: Ard Biesheuvel 
AuthorDate: Thu, 24 Nov 2016 18:02:23 +
Committer:  Ingo Molnar 
CommitDate: Fri, 25 Nov 2016 07:15:23 +0100

efi/libstub: Make efi_random_alloc() allocate below 4 GB on 32-bit

The UEFI stub executes in the context of the firmware, which identity
maps the available system RAM, which implies that only memory below
4 GB can be used for allocations on 32-bit architectures, even on [L]PAE
capable hardware.

So ignore any reported memory above 4 GB in efi_random_alloc(). This
also fixes a reported build problem on ARM under -Os, where the 64-bit
logical shift relies on a software routine that the ARM decompressor does
not provide.

A second [minor] issue is also fixed, where the '+ 1' is moved out of
the shift, where it belongs: the reason for its presence is that a
memory region where start == end should count as a single slot, given
that 'end' takes the desired size and alignment of the allocation into
account.

To clarify the code in this regard, rename start/end to 'first_slot' and
'last_slot', respectively, and introduce 'region_end' to describe the
last usable address of the current region.

Reported-by: Arnd Bergmann 
Signed-off-by: Ard Biesheuvel 
Cc: Linus Torvalds 
Cc: Matt Fleming 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1480010543-25709-2-git-send-email-ard.biesheu...@linaro.org
Signed-off-by: Ingo Molnar 
---
 drivers/firmware/efi/libstub/random.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/firmware/efi/libstub/random.c 
b/drivers/firmware/efi/libstub/random.c
index 3a3feac..7e72954 100644
--- a/drivers/firmware/efi/libstub/random.c
+++ b/drivers/firmware/efi/libstub/random.c
@@ -45,19 +45,20 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t 
*md,
 unsigned long align_shift)
 {
unsigned long align = 1UL << align_shift;
-   u64 start, end;
+   u64 first_slot, last_slot, region_end;
 
if (md->type != EFI_CONVENTIONAL_MEMORY)
return 0;
 
-   start = round_up(md->phys_addr, align);
-   end = round_down(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - size,
-align);
+   region_end = min((u64)ULONG_MAX, md->phys_addr + 
md->num_pages*EFI_PAGE_SIZE - 1);
 
-   if (start > end)
+   first_slot = round_up(md->phys_addr, align);
+   last_slot = round_down(region_end - size + 1, align);
+
+   if (first_slot > last_slot)
return 0;
 
-   return (end - start + 1) >> align_shift;
+   return ((unsigned long)(last_slot - first_slot) >> align_shift) + 1;
 }
 
 /*

[tip:x86/asm] x86/boot/64: Use defines for page size

2016-11-24 Thread tip-bot for Borislav Petkov

Commit-ID:  9b032d21f6482ee305dcdec418c15153614b1dcc
Gitweb: http://git.kernel.org/tip/9b032d21f6482ee305dcdec418c15153614b1dcc
Author: Borislav Petkov 
AuthorDate: Thu, 24 Nov 2016 22:05:50 +0100
Committer:  Ingo Molnar 
CommitDate: Fri, 25 Nov 2016 07:11:29 +0100

x86/boot/64: Use defines for page size

... instead of naked numbers like the rest of the asm does in this file.

No code changed:

  # arch/x86/kernel/head_64.o:

   textdata bss dec hex filename
   1124  2908644096  296084   48494 head_64.o.before
   1124  2908644096  296084   48494 head_64.o.after

md5:
   87086e202588939296f66e892414ffe2  head_64.o.before.asm
   87086e202588939296f66e892414ffe2  head_64.o.after.asm

Signed-off-by: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20161124210550.15025-1...@alien8.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/head_64.S | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b07cd27..a15d381 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -112,20 +112,20 @@ startup_64:
movq%rdi, %rax
shrq$PGDIR_SHIFT, %rax
 
-   leaq(4096 + _KERNPG_TABLE)(%rbx), %rdx
+   leaq(PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
movq%rdx, 0(%rbx,%rax,8)
movq%rdx, 8(%rbx,%rax,8)
 
-   addq$4096, %rdx
+   addq$PAGE_SIZE, %rdx
movq%rdi, %rax
shrq$PUD_SHIFT, %rax
andl$(PTRS_PER_PUD-1), %eax
-   movq%rdx, 4096(%rbx,%rax,8)
+   movq%rdx, PAGE_SIZE(%rbx,%rax,8)
incl%eax
andl$(PTRS_PER_PUD-1), %eax
-   movq%rdx, 4096(%rbx,%rax,8)
+   movq%rdx, PAGE_SIZE(%rbx,%rax,8)
 
-   addq$8192, %rbx
+   addq$PAGE_SIZE * 2, %rbx
movq%rdi, %rax
shrq$PMD_SHIFT, %rdi
addq$(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax

[tip:efi/core] efi/libstub: Make efi_random_alloc() allocate below 4 GB on 32-bit

2016-11-24 Thread tip-bot for Ard Biesheuvel

Commit-ID:  018edcfac4c3b140366ad51b0907f3becb5bb624
Gitweb: http://git.kernel.org/tip/018edcfac4c3b140366ad51b0907f3becb5bb624
Author: Ard Biesheuvel 
AuthorDate: Thu, 24 Nov 2016 18:02:23 +
Committer:  Ingo Molnar 
CommitDate: Fri, 25 Nov 2016 07:15:23 +0100

efi/libstub: Make efi_random_alloc() allocate below 4 GB on 32-bit

The UEFI stub executes in the context of the firmware, which identity
maps the available system RAM, which implies that only memory below
4 GB can be used for allocations on 32-bit architectures, even on [L]PAE
capable hardware.

So ignore any reported memory above 4 GB in efi_random_alloc(). This
also fixes a reported build problem on ARM under -Os, where the 64-bit
logical shift relies on a software routine that the ARM decompressor does
not provide.

A second [minor] issue is also fixed, where the '+ 1' is moved out of
the shift, where it belongs: the reason for its presence is that a
memory region where start == end should count as a single slot, given
that 'end' takes the desired size and alignment of the allocation into
account.

To clarify the code in this regard, rename start/end to 'first_slot' and
'last_slot', respectively, and introduce 'region_end' to describe the
last usable address of the current region.

Reported-by: Arnd Bergmann 
Signed-off-by: Ard Biesheuvel 
Cc: Linus Torvalds 
Cc: Matt Fleming 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linux-...@vger.kernel.org
Link: 
http://lkml.kernel.org/r/1480010543-25709-2-git-send-email-ard.biesheu...@linaro.org
Signed-off-by: Ingo Molnar 
---
 drivers/firmware/efi/libstub/random.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/firmware/efi/libstub/random.c 
b/drivers/firmware/efi/libstub/random.c
index 3a3feac..7e72954 100644
--- a/drivers/firmware/efi/libstub/random.c
+++ b/drivers/firmware/efi/libstub/random.c
@@ -45,19 +45,20 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t 
*md,
 unsigned long align_shift)
 {
unsigned long align = 1UL << align_shift;
-   u64 start, end;
+   u64 first_slot, last_slot, region_end;
 
if (md->type != EFI_CONVENTIONAL_MEMORY)
return 0;
 
-   start = round_up(md->phys_addr, align);
-   end = round_down(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - size,
-align);
+   region_end = min((u64)ULONG_MAX, md->phys_addr + 
md->num_pages*EFI_PAGE_SIZE - 1);
 
-   if (start > end)
+   first_slot = round_up(md->phys_addr, align);
+   last_slot = round_down(region_end - size + 1, align);
+
+   if (first_slot > last_slot)
return 0;
 
-   return (end - start + 1) >> align_shift;
+   return ((unsigned long)(last_slot - first_slot) >> align_shift) + 1;
 }
 
 /*

[tip:x86/asm] x86/boot/64: Use defines for page size

2016-11-24 Thread tip-bot for Borislav Petkov

Commit-ID:  9b032d21f6482ee305dcdec418c15153614b1dcc
Gitweb: http://git.kernel.org/tip/9b032d21f6482ee305dcdec418c15153614b1dcc
Author: Borislav Petkov 
AuthorDate: Thu, 24 Nov 2016 22:05:50 +0100
Committer:  Ingo Molnar 
CommitDate: Fri, 25 Nov 2016 07:11:29 +0100

x86/boot/64: Use defines for page size

... instead of naked numbers like the rest of the asm does in this file.

No code changed:

  # arch/x86/kernel/head_64.o:

   textdata bss dec hex filename
   1124  2908644096  296084   48494 head_64.o.before
   1124  2908644096  296084   48494 head_64.o.after

md5:
   87086e202588939296f66e892414ffe2  head_64.o.before.asm
   87086e202588939296f66e892414ffe2  head_64.o.after.asm

Signed-off-by: Borislav Petkov 
Cc: Andy Lutomirski 
Cc: Borislav Petkov 
Cc: Brian Gerst 
Cc: Denys Vlasenko 
Cc: H. Peter Anvin 
Cc: Josh Poimboeuf 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Link: http://lkml.kernel.org/r/20161124210550.15025-1...@alien8.de
Signed-off-by: Ingo Molnar 
---
 arch/x86/kernel/head_64.S | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b07cd27..a15d381 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -112,20 +112,20 @@ startup_64:
movq%rdi, %rax
shrq$PGDIR_SHIFT, %rax
 
-   leaq(4096 + _KERNPG_TABLE)(%rbx), %rdx
+   leaq(PAGE_SIZE + _KERNPG_TABLE)(%rbx), %rdx
movq%rdx, 0(%rbx,%rax,8)
movq%rdx, 8(%rbx,%rax,8)
 
-   addq$4096, %rdx
+   addq$PAGE_SIZE, %rdx
movq%rdi, %rax
shrq$PUD_SHIFT, %rax
andl$(PTRS_PER_PUD-1), %eax
-   movq%rdx, 4096(%rbx,%rax,8)
+   movq%rdx, PAGE_SIZE(%rbx,%rax,8)
incl%eax
andl$(PTRS_PER_PUD-1), %eax
-   movq%rdx, 4096(%rbx,%rax,8)
+   movq%rdx, PAGE_SIZE(%rbx,%rax,8)
 
-   addq$8192, %rbx
+   addq$PAGE_SIZE * 2, %rbx
movq%rdi, %rax
shrq$PMD_SHIFT, %rdi
addq$(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax

[tip:locking/urgent] locking/selftest: Fix output since KERN_CONT changes

2016-11-24 Thread tip-bot for Michael Ellerman

Commit-ID:  2513940989fa2c56d0aeb4f5792d22804d92ab4c
Gitweb: http://git.kernel.org/tip/2513940989fa2c56d0aeb4f5792d22804d92ab4c
Author: Michael Ellerman 
AuthorDate: Fri, 25 Nov 2016 09:45:28 +1100
Committer:  Ingo Molnar 
CommitDate: Fri, 25 Nov 2016 07:12:19 +0100

locking/selftest: Fix output since KERN_CONT changes

Since the KERN_CONT changes the locking-selftest output is messed up, eg:

  
   | spin |wlock |rlock |mutex | wsem | rsem |
--
   A-A deadlock:
ok  |
ok  |
ok  |
ok  |
ok  |
ok  |

Use pr_cont() to get it looking normal again:

  
   | spin |wlock |rlock |mutex | wsem | rsem |
--
   A-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |

Reported-by: Christian Kujau 
Signed-off-by: Michael Ellerman 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linuxppc-...@ozlabs.org
Link: 
http://lkml.kernel.org/r/1480027528-934-1-git-send-email-...@ellerman.id.au
Signed-off-by: Ingo Molnar 
---
 lib/locking-selftest.c | 66 +-
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 872a15a..f3a217e 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -980,23 +980,23 @@ static void dotest(void (*testcase_fn)(void), int 
expected, int lockclass_mask)
 #ifndef CONFIG_PROVE_LOCKING
if (expected == FAILURE && debug_locks) {
expected_testcase_failures++;
-   printk("failed|");
+   pr_cont("failed|");
}
else
 #endif
if (debug_locks != expected) {
unexpected_testcase_failures++;
-   printk("FAILED|");
+   pr_cont("FAILED|");
 
dump_stack();
} else {
testcase_successes++;
-   printk("  ok  |");
+   pr_cont("  ok  |");
}
testcase_total++;
 
if (debug_locks_verbose)
-   printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
+   pr_cont(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
lockclass_mask, debug_locks, expected);
/*
 * Some tests (e.g. double-unlock) might corrupt the preemption
@@ -1021,26 +1021,26 @@ static inline void print_testname(const char *testname)
 #define DO_TESTCASE_1(desc, name, nr)  \
print_testname(desc"/"#nr); \
dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);  \
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_1B(desc, name, nr) \
print_testname(desc"/"#nr); \
dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);  \
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_3(desc, name, nr)  \
print_testname(desc"/"#nr); \
dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);   \
dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);\
dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);\
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_3RW(desc, name, nr)\
print_testname(desc"/"#nr); \
dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);\
dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);\
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_6(desc, name)  \
print_testname(desc);   \
@@ -1050,7 +1050,7 @@ static inline void print_testname(const char *testname)
dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);  \
dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);   \
dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);   \
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_6_SUCCESS(desc, name)  \
print_testname(desc);   \
@@ -1060,7 +1060,7 @@ static inline void print_testname(const char *testname)
dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX);  \
dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM);   \

[tip:locking/urgent] locking/selftest: Fix output since KERN_CONT changes

2016-11-24 Thread tip-bot for Michael Ellerman

Commit-ID:  2513940989fa2c56d0aeb4f5792d22804d92ab4c
Gitweb: http://git.kernel.org/tip/2513940989fa2c56d0aeb4f5792d22804d92ab4c
Author: Michael Ellerman 
AuthorDate: Fri, 25 Nov 2016 09:45:28 +1100
Committer:  Ingo Molnar 
CommitDate: Fri, 25 Nov 2016 07:12:19 +0100

locking/selftest: Fix output since KERN_CONT changes

Since the KERN_CONT changes the locking-selftest output is messed up, eg:

  
   | spin |wlock |rlock |mutex | wsem | rsem |
--
   A-A deadlock:
ok  |
ok  |
ok  |
ok  |
ok  |
ok  |

Use pr_cont() to get it looking normal again:

  
   | spin |wlock |rlock |mutex | wsem | rsem |
--
   A-A deadlock:  ok  |  ok  |  ok  |  ok  |  ok  |  ok  |

Reported-by: Christian Kujau 
Signed-off-by: Michael Ellerman 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: Thomas Gleixner 
Cc: linuxppc-...@ozlabs.org
Link: 
http://lkml.kernel.org/r/1480027528-934-1-git-send-email-...@ellerman.id.au
Signed-off-by: Ingo Molnar 
---
 lib/locking-selftest.c | 66 +-
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 872a15a..f3a217e 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -980,23 +980,23 @@ static void dotest(void (*testcase_fn)(void), int 
expected, int lockclass_mask)
 #ifndef CONFIG_PROVE_LOCKING
if (expected == FAILURE && debug_locks) {
expected_testcase_failures++;
-   printk("failed|");
+   pr_cont("failed|");
}
else
 #endif
if (debug_locks != expected) {
unexpected_testcase_failures++;
-   printk("FAILED|");
+   pr_cont("FAILED|");
 
dump_stack();
} else {
testcase_successes++;
-   printk("  ok  |");
+   pr_cont("  ok  |");
}
testcase_total++;
 
if (debug_locks_verbose)
-   printk(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
+   pr_cont(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
lockclass_mask, debug_locks, expected);
/*
 * Some tests (e.g. double-unlock) might corrupt the preemption
@@ -1021,26 +1021,26 @@ static inline void print_testname(const char *testname)
 #define DO_TESTCASE_1(desc, name, nr)  \
print_testname(desc"/"#nr); \
dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);  \
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_1B(desc, name, nr) \
print_testname(desc"/"#nr); \
dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);  \
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_3(desc, name, nr)  \
print_testname(desc"/"#nr); \
dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);   \
dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);\
dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);\
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_3RW(desc, name, nr)\
print_testname(desc"/"#nr); \
dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);\
dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);\
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_6(desc, name)  \
print_testname(desc);   \
@@ -1050,7 +1050,7 @@ static inline void print_testname(const char *testname)
dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);  \
dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);   \
dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);   \
-   printk("\n");
+   pr_cont("\n");
 
 #define DO_TESTCASE_6_SUCCESS(desc, name)  \
print_testname(desc);   \
@@ -1060,7 +1060,7 @@ static inline void print_testname(const char *testname)
dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX);  \
dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM);   \
dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM);   \
-   printk("\n");
+   pr_cont("\n");
 
 /*
  * 'read' variant: rlocks must not trigger.
@@ -1073,7 +1073,7 @@

Re: [PATCH] drm/hisilicon/hibmc: mark PM functions __maybe_unused

2016-11-24 Thread Daniel Vetter

On Thu, Nov 24, 2016 at 05:30:26PM +0100, Arnd Bergmann wrote:
> When CONFIG_PM_SLEEP is disabled, we get a harmless warning
> 
> drm/hisilicon/hibmc/hibmc_drm_drv.c:115:12: error: ‘hibmc_pm_resume’ defined 
> but not used [-Werror=unused-function]
> drm/hisilicon/hibmc/hibmc_drm_drv.c:97:12: error: ‘hibmc_pm_suspend’ defined 
> but not used [-Werror=unused-function]
> 
> Marking the functions as __maybe_unused avoids the warning without
> having to add an #ifdef.
> 
> Fixes: 5e0df3a08f3d ("drm/hisilicon/hibmc: Add hisilicon hibmc drm master 
> driver")
> Signed-off-by: Arnd Bergmann 

Applied to drm-misc, thx.
-Daniel

> ---
>  drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c 
> b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
> index 73ba8b05f1da..fd949df46717 100644
> --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
> +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
> @@ -94,7 +94,7 @@ static struct drm_driver hibmc_driver = {
>   .irq_handler= hibmc_drm_interrupt,
>  };
>  
> -static int hibmc_pm_suspend(struct device *dev)
> +static int __maybe_unused hibmc_pm_suspend(struct device *dev)
>  {
>   struct pci_dev *pdev = to_pci_dev(dev);
>   struct drm_device *drm_dev = pci_get_drvdata(pdev);
> @@ -112,7 +112,7 @@ static int hibmc_pm_suspend(struct device *dev)
>   return 0;
>  }
>  
> -static int hibmc_pm_resume(struct device *dev)
> +static int  __maybe_unused hibmc_pm_resume(struct device *dev)
>  {
>   struct pci_dev *pdev = to_pci_dev(dev);
>   struct drm_device *drm_dev = pci_get_drvdata(pdev);
> -- 
> 2.9.0
> 
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

Re: [PATCH] drm/hisilicon/hibmc: mark PM functions __maybe_unused

2016-11-24 Thread Daniel Vetter

On Thu, Nov 24, 2016 at 05:30:26PM +0100, Arnd Bergmann wrote:
> When CONFIG_PM_SLEEP is disabled, we get a harmless warning
> 
> drm/hisilicon/hibmc/hibmc_drm_drv.c:115:12: error: ‘hibmc_pm_resume’ defined 
> but not used [-Werror=unused-function]
> drm/hisilicon/hibmc/hibmc_drm_drv.c:97:12: error: ‘hibmc_pm_suspend’ defined 
> but not used [-Werror=unused-function]
> 
> Marking the functions as __maybe_unused avoids the warning without
> having to add an #ifdef.
> 
> Fixes: 5e0df3a08f3d ("drm/hisilicon/hibmc: Add hisilicon hibmc drm master 
> driver")
> Signed-off-by: Arnd Bergmann 

Applied to drm-misc, thx.
-Daniel

> ---
>  drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c 
> b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
> index 73ba8b05f1da..fd949df46717 100644
> --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
> +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
> @@ -94,7 +94,7 @@ static struct drm_driver hibmc_driver = {
>   .irq_handler= hibmc_drm_interrupt,
>  };
>  
> -static int hibmc_pm_suspend(struct device *dev)
> +static int __maybe_unused hibmc_pm_suspend(struct device *dev)
>  {
>   struct pci_dev *pdev = to_pci_dev(dev);
>   struct drm_device *drm_dev = pci_get_drvdata(pdev);
> @@ -112,7 +112,7 @@ static int hibmc_pm_suspend(struct device *dev)
>   return 0;
>  }
>  
> -static int hibmc_pm_resume(struct device *dev)
> +static int  __maybe_unused hibmc_pm_resume(struct device *dev)
>  {
>   struct pci_dev *pdev = to_pci_dev(dev);
>   struct drm_device *drm_dev = pci_get_drvdata(pdev);
> -- 
> 2.9.0
> 
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch

Re: [PATCH] clk: sunxi-ng: fix PLL_CPUX adjusting on H3

2016-11-24 Thread Maxime Ripard

On Fri, Nov 25, 2016 at 01:28:47AM +0100, meg...@megous.com wrote:
> From: Ondrej Jirman 
> 
> When adjusting PLL_CPUX on H3, the PLL is temporarily driven
> too high, and the system becomes unstable (oopses or hangs).
> 
> Add a notifier to avoid this situation by temporarily switching
> to a known stable 24 MHz oscillator.
> 
> Signed-off-by: Ondrej Jirman 
> Tested-by: Lutz Sammer 

Applied, thanks!
Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com


signature.asc
Description: PGP signature

Re: [PATCH] clk: sunxi-ng: fix PLL_CPUX adjusting on H3

2016-11-24 Thread Maxime Ripard

On Fri, Nov 25, 2016 at 01:28:47AM +0100, meg...@megous.com wrote:
> From: Ondrej Jirman 
> 
> When adjusting PLL_CPUX on H3, the PLL is temporarily driven
> too high, and the system becomes unstable (oopses or hangs).
> 
> Add a notifier to avoid this situation by temporarily switching
> to a known stable 24 MHz oscillator.
> 
> Signed-off-by: Ondrej Jirman 
> Tested-by: Lutz Sammer 

Applied, thanks!
Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com


signature.asc
Description: PGP signature

Re: [PATCH] staging: most: Eliminate symbolic permissions usage

2016-11-24 Thread Jason Litzinger

On Thu, Nov 24, 2016 at 03:10:29PM -0700, Jason Litzinger wrote:
> Fix checkpatch warnings regarding the use of symbolic permissions.
> 
> Where the MOST_CHANNEL_ATTR macro is used, convert to octal permissions
> over symbolic.
> 
> Where _ATTR is used directly, replace with _ATTR_RW/_ATTR_WO and
> update the show/store function names appropriately.
Please ignore this, the most driver was not checked in my config so my
test-compile was worthless (it doesn't).

Will submit again.

-Jason

Re: [RFC][PATCH 2/3] drm/bridge: adv7511: Add 200ms delay on power-on

2016-11-24 Thread Daniel Vetter

On Fri, Nov 25, 2016 at 1:23 AM, Laurent Pinchart
 wrote:
>> > Daniel, why do we have an API the is clearly related to interrupt handling
>> > but requires the caller to implement a workqueue ?
>>
>> Because in general you need that workqueue anyway, and up to now there was
>> no driver ever who didn't have a work-queue already.
>
> None of the bridge drivers in drivers/gpu/drm/bridge/ have workqueues. They
> call the HPD helpers from a threaded interrupt handler though. Sleeping in
> that context is fine, calling functions that might rely on interrupts from the
> same device to signal completion (such as reading EDID through .get_modes())
> isn't.

Hm, as long as they all use the bit-banging interfaces they'll
probably be all fine. For everyone else you need multiple layers of
work items to make sure you never end up stalling in an interrupt vs.
holding-mode_config.mutex deadlock. So still not convinced we need
this ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

Re: [PATCH] staging: most: Eliminate symbolic permissions usage

2016-11-24 Thread Jason Litzinger

On Thu, Nov 24, 2016 at 03:10:29PM -0700, Jason Litzinger wrote:
> Fix checkpatch warnings regarding the use of symbolic permissions.
> 
> Where the MOST_CHANNEL_ATTR macro is used, convert to octal permissions
> over symbolic.
> 
> Where _ATTR is used directly, replace with _ATTR_RW/_ATTR_WO and
> update the show/store function names appropriately.
Please ignore this, the most driver was not checked in my config so my
test-compile was worthless (it doesn't).

Will submit again.

-Jason

Re: [RFC][PATCH 2/3] drm/bridge: adv7511: Add 200ms delay on power-on

2016-11-24 Thread Daniel Vetter

On Fri, Nov 25, 2016 at 1:23 AM, Laurent Pinchart
 wrote:
>> > Daniel, why do we have an API the is clearly related to interrupt handling
>> > but requires the caller to implement a workqueue ?
>>
>> Because in general you need that workqueue anyway, and up to now there was
>> no driver ever who didn't have a work-queue already.
>
> None of the bridge drivers in drivers/gpu/drm/bridge/ have workqueues. They
> call the HPD helpers from a threaded interrupt handler though. Sleeping in
> that context is fine, calling functions that might rely on interrupts from the
> same device to signal completion (such as reading EDID through .get_modes())
> isn't.

Hm, as long as they all use the bit-banging interfaces they'll
probably be all fine. For everyone else you need multiple layers of
work items to make sure you never end up stalling in an interrupt vs.
holding-mode_config.mutex deadlock. So still not convinced we need
this ...
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch

RE: [PATCH net 1/2] r8152: fix the sw rx checksum is unavailable

2016-11-24 Thread Hayes Wang

Mark Lord [mailto:ml...@pobox.com]
> Sent: Friday, November 25, 2016 12:44 AM
[...]
> The bad data in this case is ASCII:
> 
>  "SRC=m3400:/ TARGET=/m340"
> 
> This data is what is seen in /run/mount/utab, a file that is read/written 
> over NFS on
> each boot.
> 
>  "SRC=m3400:/ TARGET=/m3400 ROOT=/
> ATTRS=nolock,addr=192.168.8.1\n"
> 
> But how does this ASCII data end up at offset zero of the rx buffer??
> Not possible -- this isn't even stale data, because only an rx_desc could
> be at that offset in that buffer.
> 
> So even if this were a platform memory coherency issue, one should still
> never see ASCII data at the beginning of an rx buffer.  The driver NEVER
> writes anything to the rx buffers.  Only the USB hardware ever does.
> 
> And only the r8152 dongle/driver exhibits this issue.
> Other USB dongles do not.  They *might* still have such issues,
> but because they use software checksums, the bad packets are caught/rejected.

Do you test it by rebooting? Maybe you could try a patch
commit 93fe9b183840 ("r8152: reset the bmu"). However, it should
only occur for the first urb buffer after rx is reset. I don't
think you would reset the rx frequently, so the situation seems
to be different.

Best Regards,
Hayes

RE: [PATCH net 1/2] r8152: fix the sw rx checksum is unavailable

2016-11-24 Thread Hayes Wang

Mark Lord [mailto:ml...@pobox.com]
> Sent: Friday, November 25, 2016 12:44 AM
[...]
> The bad data in this case is ASCII:
> 
>  "SRC=m3400:/ TARGET=/m340"
> 
> This data is what is seen in /run/mount/utab, a file that is read/written 
> over NFS on
> each boot.
> 
>  "SRC=m3400:/ TARGET=/m3400 ROOT=/
> ATTRS=nolock,addr=192.168.8.1\n"
> 
> But how does this ASCII data end up at offset zero of the rx buffer??
> Not possible -- this isn't even stale data, because only an rx_desc could
> be at that offset in that buffer.
> 
> So even if this were a platform memory coherency issue, one should still
> never see ASCII data at the beginning of an rx buffer.  The driver NEVER
> writes anything to the rx buffers.  Only the USB hardware ever does.
> 
> And only the r8152 dongle/driver exhibits this issue.
> Other USB dongles do not.  They *might* still have such issues,
> but because they use software checksums, the bad packets are caught/rejected.

Do you test it by rebooting? Maybe you could try a patch
commit 93fe9b183840 ("r8152: reset the bmu"). However, it should
only occur for the first urb buffer after rx is reset. I don't
think you would reset the rx frequently, so the situation seems
to be different.

Best Regards,
Hayes

Re: [PATCH 1/1 linux-next] ext4: add compatibility flag check

2016-11-24 Thread Fabian Frederick



> On 25 November 2016 at 06:26 Theodore Ts'o  wrote:
>
>
> On Thu, Nov 24, 2016 at 08:47:41PM +0100, Fabian Frederick wrote:
> > data=journal mount option should disable O_DIRECT access
> > (See Documentation/filesystems/ext4.txt) but open operations
> > using O_CREAT|O_RDWR|O_DIRECT|O_SYNC have no warning in return and file is
> > being
> > created. This patch adds vfs super_operations compatibility flag function
> > returning -EPERM in such a case.
> >
> > Signed-off-by: Fabian Frederick 
>
> The general practice by most file systems in Linux (for better or for
> worse) is to silently fall back to buffered I/O instead of failing the
> O_DIRECT open.  Feel free to try to convince linux-fsdevel otherwise,
> but that is the general and historical consensus of Linux file system
> developers.
>
> Cheers,

Thanks a lot Ted, I'll have a closer look at vfs/ext4 documentation and add some
details if required.

Regards,
Fabian

>
>                                       - Ted

Re: [PATCH 1/1 linux-next] ext4: add compatibility flag check

2016-11-24 Thread Fabian Frederick



> On 25 November 2016 at 06:26 Theodore Ts'o  wrote:
>
>
> On Thu, Nov 24, 2016 at 08:47:41PM +0100, Fabian Frederick wrote:
> > data=journal mount option should disable O_DIRECT access
> > (See Documentation/filesystems/ext4.txt) but open operations
> > using O_CREAT|O_RDWR|O_DIRECT|O_SYNC have no warning in return and file is
> > being
> > created. This patch adds vfs super_operations compatibility flag function
> > returning -EPERM in such a case.
> >
> > Signed-off-by: Fabian Frederick 
>
> The general practice by most file systems in Linux (for better or for
> worse) is to silently fall back to buffered I/O instead of failing the
> O_DIRECT open.  Feel free to try to convince linux-fsdevel otherwise,
> but that is the general and historical consensus of Linux file system
> developers.
>
> Cheers,

Thanks a lot Ted, I'll have a closer look at vfs/ext4 documentation and add some
details if required.

Regards,
Fabian

>
>                                       - Ted

Re: [PATCH v2] dmaengine: sun6i: fix the uninitialized value for v_lli

2016-11-24 Thread Vinod Koul

On Sun, Nov 20, 2016 at 12:43:56AM +0800, Hao Zhang wrote:
> dma_pool_alloc does not initialize the value of the newly allocated
> block for the v_lli, and the uninitilize value make the tests failed
> which is on pine64 with dmatest.
> we can fix it just change the "|=" to "=" for the v_lli->cfg.

Applied, thanks

-- 
~Vinod

Re: [PATCH v2] dmaengine: sun6i: fix the uninitialized value for v_lli

2016-11-24 Thread Vinod Koul

On Sun, Nov 20, 2016 at 12:43:56AM +0800, Hao Zhang wrote:
> dma_pool_alloc does not initialize the value of the newly allocated
> block for the v_lli, and the uninitilize value make the tests failed
> which is on pine64 with dmatest.
> we can fix it just change the "|=" to "=" for the v_lli->cfg.

Applied, thanks

-- 
~Vinod

RE: [PATCH net 1/2] r8152: fix the sw rx checksum is unavailable

2016-11-24 Thread Hayes Wang

Mark Lord [mailto:ml...@pobox.com]
> Sent: Thursday, November 24, 2016 11:25 PM
[...]
> x86 has near fully-coherent memory, so it is the "easy" platform
> to get things working on.  But Linux supports a very diverse number
> of platforms, with varying degrees of cache/memory coherency,
> and it can be tricky for things to work correctly on all of them.

However, I have test iperf on raspberry pi v1 which you suggest
for more than one day. I still couldn't reproduce your issue.

> If you are testing with the driver as currently in 4.4.34,
> then you won't even notice when things are screwing up,
> because the driver just silently drops packets.
> Or it passes them on without noticing that they have bad data.

I only drop the packet silently when the rx descriptor outside
the urb buffer. Then, I check the rx descriptor before checking
the length of the packet.

> Here (attached) is the instrumented driver I am using here now.
> I suggest you use it or something similar when testing,
> and not the stock driver.

I would test it again with your driver.

[...]
> Also, unrelated, but inside r8152_submit_rx() there is this code:
> 
>  /* The rx would be stopped, so skip submitting */
>  if (test_bit(RTL8152_UNPLUG, >flags) ||
>  !test_bit(WORK_ENABLE, >flags)
> || !netif_carrier_ok(tp->netdev))
> return 0;
> 
> If that "return 0" statement is ever executed, doesn't it result
> in the loss/leak of a buffer?

They would be found back by calling rtl_start_rx(), when the rx
is restarted.

Best Regards,
Hayes

RE: [PATCH net 1/2] r8152: fix the sw rx checksum is unavailable

2016-11-24 Thread Hayes Wang

Mark Lord [mailto:ml...@pobox.com]
> Sent: Thursday, November 24, 2016 11:25 PM
[...]
> x86 has near fully-coherent memory, so it is the "easy" platform
> to get things working on.  But Linux supports a very diverse number
> of platforms, with varying degrees of cache/memory coherency,
> and it can be tricky for things to work correctly on all of them.

However, I have test iperf on raspberry pi v1 which you suggest
for more than one day. I still couldn't reproduce your issue.

> If you are testing with the driver as currently in 4.4.34,
> then you won't even notice when things are screwing up,
> because the driver just silently drops packets.
> Or it passes them on without noticing that they have bad data.

I only drop the packet silently when the rx descriptor outside
the urb buffer. Then, I check the rx descriptor before checking
the length of the packet.

> Here (attached) is the instrumented driver I am using here now.
> I suggest you use it or something similar when testing,
> and not the stock driver.

I would test it again with your driver.

[...]
> Also, unrelated, but inside r8152_submit_rx() there is this code:
> 
>  /* The rx would be stopped, so skip submitting */
>  if (test_bit(RTL8152_UNPLUG, >flags) ||
>  !test_bit(WORK_ENABLE, >flags)
> || !netif_carrier_ok(tp->netdev))
> return 0;
> 
> If that "return 0" statement is ever executed, doesn't it result
> in the loss/leak of a buffer?

They would be found back by calling rtl_start_rx(), when the rx
is restarted.

Best Regards,
Hayes

Re: [PATCH] dmaengine: mv_xor: use builtin_platform_driver

2016-11-24 Thread Vinod Koul

On Fri, Nov 18, 2016 at 10:12:26PM +0800, Geliang Tang wrote:
> Use builtin_platform_driver() helper to simplify the code.

Applied, thanks


-- 
~Vinod

Re: [PATCH] dmaengine: mv_xor: use builtin_platform_driver

2016-11-24 Thread Vinod Koul

On Fri, Nov 18, 2016 at 10:12:26PM +0800, Geliang Tang wrote:
> Use builtin_platform_driver() helper to simplify the code.

Applied, thanks


-- 
~Vinod

RE: [PATCH V3 2/4] mfd: pv88080: MFD core support

2016-11-24 Thread Eric Hyeung Dong Jeong

On Monday, November 21, 2016 10:09 PM, Lee Jones Wrote:

>
> On Fri, 18 Nov 2016, Eric Jeong wrote:
> 
> >
> > From: Eric Jeong 
> >
> > This patch adds supports for PV88080 MFD core device.
> >
> > It provides communication through the I2C interface.
> > It contains the following components:
> > - Regulators
> > - Configurable GPIOs
> >
> > Kconfig and Makefile are updated to reflect support for PV88080 PMIC.
> >
> > Signed-off-by: Eric Jeong 
> >
> > ---
> > This patch applies against linux-next and next-20161117
> >
> > Hi,
> >
> > This patch adds MFD core driver for PV88080 PMIC.
> > This is done as part of the existing PV88080 regulator driver by
> > expending the driver for GPIO function support.
> >
> > Change since PATCH V2
> >  - Make one file insted of usging core and i2c file
> >  - Use devm_ function to be managed resource automatically
> >  - Separated mfd_cell and regmap_irq_chip declaration for clarification.
> >  - Updated Kconfig to use OF and assign yes to I2C
> >
> > Change since PATCH V1
> >  - Patch separated from PATCH V1
> >
> > Regards,
> > Eric Jeong, Dialog Semiconductor Ltd.
> >
> >
> >  drivers/mfd/Kconfig |   12 ++
> >  drivers/mfd/Makefile|1 +
> >  drivers/mfd/pv88080.c   |  331 
> > +++
> >  include/linux/mfd/pv88080.h |  222 +
> >  4 files changed, 566 insertions(+)
> >  create mode 100644 drivers/mfd/pv88080.c  create mode 100644
> > include/linux/mfd/pv88080.h
> >
> > diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index
> > 06dc9b0..75abf2d 100644
> > --- a/drivers/mfd/Kconfig
> > +++ b/drivers/mfd/Kconfig
> > @@ -792,6 +792,18 @@ config MFD_PM8921_CORE
> >   Say M here if you want to include support for PM8921 chip as a module.
> >   This will build a module called "pm8921-core".
> >
> > +config MFD_PV88080
> > +   tristate "Powerventure Semiconductor PV88080 PMIC Support"
> > +   select MFD_CORE
> > +   select REGMAP_I2C
> > +   select REGMAP_IRQ
> > +   depends on I2C=y && OF
> > +   help
> > + Say yes here for support for the Powerventure Semiconductor PV88080 
> > PMIC.
> > + This includes the I2C driver and core APIs.
> > + Additional drivers must be enabled in order to use the functionality
> > + of the device.
> > +
> >  config MFD_QCOM_RPM
> > tristate "Qualcomm Resource Power Manager (RPM)"
> > depends on ARCH_QCOM && OF
> > diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index
> > db39377..e9e16c6 100644
> > --- a/drivers/mfd/Makefile
> > +++ b/drivers/mfd/Makefile
> > @@ -173,6 +173,7 @@ obj-$(CONFIG_MFD_SI476X_CORE)   += si476x-core.o
> >  obj-$(CONFIG_MFD_CS5535)   += cs5535-mfd.o
> >  obj-$(CONFIG_MFD_OMAP_USB_HOST)+= omap-usb-host.o omap-usb-tll.o
> >  obj-$(CONFIG_MFD_PM8921_CORE)  += pm8921-core.o ssbi.o
> > +obj-$(CONFIG_MFD_PV88080)  += pv88080.o
> >  obj-$(CONFIG_MFD_QCOM_RPM) += qcom_rpm.o
> >  obj-$(CONFIG_MFD_SPMI_PMIC)+= qcom-spmi-pmic.o
> >  obj-$(CONFIG_TPS65911_COMPARATOR)  += tps65911-comparator.o
> > diff --git a/drivers/mfd/pv88080.c b/drivers/mfd/pv88080.c new file
> > mode 100644 index 000..518b44f
> > --- /dev/null
> > +++ b/drivers/mfd/pv88080.c
> > @@ -0,0 +1,331 @@
> > +/*
> > + * pv88080-i2c.c - I2C access driver for PV88080
> 
> Remove the filename.
> 
> They have a habit of becoming out of date (like now).

OK, I will do that.

> 
> > + * Copyright (C) 2016  Powerventure Semiconductor Ltd.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * as published by the Free Software Foundation; either version 2
> > + * of the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> 
> Alphabetical.

OK, I see.

> 
> > +#include 
> 
> This doesn't need to be separated from the rest.

OK.

> 
> > +#definePV88080_REG_EVENT_A_OFFSET  0
> > +#definePV88080_REG_EVENT_B_OFFSET  1
> > +#definePV88080_REG_EVENT_C_OFFSET  2
> 
> Spaces after 'define'.

OK I will do that.

> 
> > +static const struct resource regulators_aa_resources[] = {
> > +   {
> > +   .name   = "VDD_TEMP_FAULT",
> > +   .start  = PV88080_AA_IRQ_VDD_FLT,
> > +   .end= PV88080_AA_IRQ_OVER_TEMP,
> > +   .flags  = IORESOURCE_IRQ,
> > +   },
> > +};
> > +
> > +static const struct resource regulators_ba_resources[] = {
> > +   {
> > +   .name   = "VDD_TEMP_FAULT",
> > +   .start  = PV88080_BA_IRQ_VDD_FLT,
> > +

RE: [PATCH V3 2/4] mfd: pv88080: MFD core support

2016-11-24 Thread Eric Hyeung Dong Jeong

On Monday, November 21, 2016 10:09 PM, Lee Jones Wrote:

>
> On Fri, 18 Nov 2016, Eric Jeong wrote:
> 
> >
> > From: Eric Jeong 
> >
> > This patch adds supports for PV88080 MFD core device.
> >
> > It provides communication through the I2C interface.
> > It contains the following components:
> > - Regulators
> > - Configurable GPIOs
> >
> > Kconfig and Makefile are updated to reflect support for PV88080 PMIC.
> >
> > Signed-off-by: Eric Jeong 
> >
> > ---
> > This patch applies against linux-next and next-20161117
> >
> > Hi,
> >
> > This patch adds MFD core driver for PV88080 PMIC.
> > This is done as part of the existing PV88080 regulator driver by
> > expending the driver for GPIO function support.
> >
> > Change since PATCH V2
> >  - Make one file insted of usging core and i2c file
> >  - Use devm_ function to be managed resource automatically
> >  - Separated mfd_cell and regmap_irq_chip declaration for clarification.
> >  - Updated Kconfig to use OF and assign yes to I2C
> >
> > Change since PATCH V1
> >  - Patch separated from PATCH V1
> >
> > Regards,
> > Eric Jeong, Dialog Semiconductor Ltd.
> >
> >
> >  drivers/mfd/Kconfig |   12 ++
> >  drivers/mfd/Makefile|1 +
> >  drivers/mfd/pv88080.c   |  331 
> > +++
> >  include/linux/mfd/pv88080.h |  222 +
> >  4 files changed, 566 insertions(+)
> >  create mode 100644 drivers/mfd/pv88080.c  create mode 100644
> > include/linux/mfd/pv88080.h
> >
> > diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index
> > 06dc9b0..75abf2d 100644
> > --- a/drivers/mfd/Kconfig
> > +++ b/drivers/mfd/Kconfig
> > @@ -792,6 +792,18 @@ config MFD_PM8921_CORE
> >   Say M here if you want to include support for PM8921 chip as a module.
> >   This will build a module called "pm8921-core".
> >
> > +config MFD_PV88080
> > +   tristate "Powerventure Semiconductor PV88080 PMIC Support"
> > +   select MFD_CORE
> > +   select REGMAP_I2C
> > +   select REGMAP_IRQ
> > +   depends on I2C=y && OF
> > +   help
> > + Say yes here for support for the Powerventure Semiconductor PV88080 
> > PMIC.
> > + This includes the I2C driver and core APIs.
> > + Additional drivers must be enabled in order to use the functionality
> > + of the device.
> > +
> >  config MFD_QCOM_RPM
> > tristate "Qualcomm Resource Power Manager (RPM)"
> > depends on ARCH_QCOM && OF
> > diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index
> > db39377..e9e16c6 100644
> > --- a/drivers/mfd/Makefile
> > +++ b/drivers/mfd/Makefile
> > @@ -173,6 +173,7 @@ obj-$(CONFIG_MFD_SI476X_CORE)   += si476x-core.o
> >  obj-$(CONFIG_MFD_CS5535)   += cs5535-mfd.o
> >  obj-$(CONFIG_MFD_OMAP_USB_HOST)+= omap-usb-host.o omap-usb-tll.o
> >  obj-$(CONFIG_MFD_PM8921_CORE)  += pm8921-core.o ssbi.o
> > +obj-$(CONFIG_MFD_PV88080)  += pv88080.o
> >  obj-$(CONFIG_MFD_QCOM_RPM) += qcom_rpm.o
> >  obj-$(CONFIG_MFD_SPMI_PMIC)+= qcom-spmi-pmic.o
> >  obj-$(CONFIG_TPS65911_COMPARATOR)  += tps65911-comparator.o
> > diff --git a/drivers/mfd/pv88080.c b/drivers/mfd/pv88080.c new file
> > mode 100644 index 000..518b44f
> > --- /dev/null
> > +++ b/drivers/mfd/pv88080.c
> > @@ -0,0 +1,331 @@
> > +/*
> > + * pv88080-i2c.c - I2C access driver for PV88080
> 
> Remove the filename.
> 
> They have a habit of becoming out of date (like now).

OK, I will do that.

> 
> > + * Copyright (C) 2016  Powerventure Semiconductor Ltd.
> > + *
> > + * This program is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU General Public License
> > + * as published by the Free Software Foundation; either version 2
> > + * of the License, or (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + */
> > +
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> > +#include 
> 
> Alphabetical.

OK, I see.

> 
> > +#include 
> 
> This doesn't need to be separated from the rest.

OK.

> 
> > +#definePV88080_REG_EVENT_A_OFFSET  0
> > +#definePV88080_REG_EVENT_B_OFFSET  1
> > +#definePV88080_REG_EVENT_C_OFFSET  2
> 
> Spaces after 'define'.

OK I will do that.

> 
> > +static const struct resource regulators_aa_resources[] = {
> > +   {
> > +   .name   = "VDD_TEMP_FAULT",
> > +   .start  = PV88080_AA_IRQ_VDD_FLT,
> > +   .end= PV88080_AA_IRQ_OVER_TEMP,
> > +   .flags  = IORESOURCE_IRQ,
> > +   },
> > +};
> > +
> > +static const struct resource regulators_ba_resources[] = {
> > +   {
> > +   .name   = "VDD_TEMP_FAULT",
> > +   .start  = PV88080_BA_IRQ_VDD_FLT,
> > +   .end= PV88080_BA_IRQ_OVER_TEMP,
> > +   .flags  =

Re: [PATCH v2 2/2] dmaengine: omap-dma: Support for slave devices with data port window

2016-11-24 Thread Vinod Koul

On Thu, Nov 17, 2016 at 02:50:17PM +0200, Peter Ujfalusi wrote:
> @@ -921,11 +931,45 @@ static struct dma_async_tx_descriptor 
> *omap_dma_prep_slave_sg(
>  
>   d->ccr = c->ccr | CCR_SYNC_FRAME;
>   if (dir == DMA_DEV_TO_MEM) {
> - d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_CONSTANT;
>   d->csdp = CSDP_DST_BURST_64 | CSDP_DST_PACKED;
> +
> + d->ccr |= CCR_DST_AMODE_POSTINC;
> + if (port_window) {
> + d->ccr |= CCR_SRC_AMODE_DBLIDX;
> + d->ei = 1;
> + /*
> +  * One frame covers the port_window and by  configure
> +  * the source frame index to be -1 * (port_window - 1)
> +  * we instruct the sDMA that after a frame is processed
> +  * it should move back to the start of the window.
> +  */
> + d->fi = -(port_window - 1);
> +
> + if (port_window >= 64)
> + d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
> + else if (port_window >= 32)
> + d->csdp = CSDP_SRC_BURST_32 | CSDP_SRC_PACKED;
> + else if (port_window >= 16)
> + d->csdp = CSDP_SRC_BURST_16 | CSDP_SRC_PACKED;

this and other would look better with a switch..

-- 
~Vinod

Re: [PATCH v2 2/2] dmaengine: omap-dma: Support for slave devices with data port window

2016-11-24 Thread Vinod Koul

On Thu, Nov 17, 2016 at 02:50:17PM +0200, Peter Ujfalusi wrote:
> @@ -921,11 +931,45 @@ static struct dma_async_tx_descriptor 
> *omap_dma_prep_slave_sg(
>  
>   d->ccr = c->ccr | CCR_SYNC_FRAME;
>   if (dir == DMA_DEV_TO_MEM) {
> - d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_CONSTANT;
>   d->csdp = CSDP_DST_BURST_64 | CSDP_DST_PACKED;
> +
> + d->ccr |= CCR_DST_AMODE_POSTINC;
> + if (port_window) {
> + d->ccr |= CCR_SRC_AMODE_DBLIDX;
> + d->ei = 1;
> + /*
> +  * One frame covers the port_window and by  configure
> +  * the source frame index to be -1 * (port_window - 1)
> +  * we instruct the sDMA that after a frame is processed
> +  * it should move back to the start of the window.
> +  */
> + d->fi = -(port_window - 1);
> +
> + if (port_window >= 64)
> + d->csdp = CSDP_SRC_BURST_64 | CSDP_SRC_PACKED;
> + else if (port_window >= 32)
> + d->csdp = CSDP_SRC_BURST_32 | CSDP_SRC_PACKED;
> + else if (port_window >= 16)
> + d->csdp = CSDP_SRC_BURST_16 | CSDP_SRC_PACKED;

this and other would look better with a switch..

-- 
~Vinod

Re: [PATCH 02/10] ASoC: sunxi: Add support for A23/A33/H3 codec's analog path controls

2016-11-24 Thread Chen-Yu Tsai

On Fri, Nov 25, 2016 at 1:43 PM, Icenowy Zheng  wrote:
>
>
> 12.11.2016, 14:57, "Chen-Yu Tsai" :
>> The internal codec on A23/A33/H3 is split into 2 parts. The
>> analog path controls are routed through an embedded custom register
>> bus accessed through the PRCM block.
>>
>> The SoCs share a common set of inputs, outputs, and audio paths.
>> The following table lists the differences.
>>
>> 
>> | Feature \ SoC | A23 | A33 | H3 |
>> 
>> | Headphone | v | v | |
>> 
>> | Line Out | | | v |
>> 
>> | Phone In/Out | v | v | |
>> 
>>
>> Add an ASoC component driver for it. This should be tied to the codec
>> audio card as an auxiliary device. This patch adds the commont paths
>> and controls, and variant specific headphone out and line out.
>>
>> Signed-off-by: Chen-Yu Tsai 
>> ---
>>  sound/soc/sunxi/Kconfig | 8 +
>>  sound/soc/sunxi/Makefile | 1 +
>>  sound/soc/sunxi/sun8i-codec-analog.c | 665 
>> +++
>>  3 files changed, 674 insertions(+)
>>  create mode 100644 sound/soc/sunxi/sun8i-codec-analog.c
>>
>> diff --git a/sound/soc/sunxi/Kconfig b/sound/soc/sunxi/Kconfig
>> index dd2368297fd3..6c344e16aca4 100644
>> --- a/sound/soc/sunxi/Kconfig
>> +++ b/sound/soc/sunxi/Kconfig
>> @@ -9,6 +9,14 @@ config SND_SUN4I_CODEC
>>Select Y or M to add support for the Codec embedded in the 
>> Allwinner
>>A10 and affiliated SoCs.
>>
>> +config SND_SUN8I_CODEC_ANALOG
>> + tristate "Allwinner sun8i Codec Analog Controls Support"
>> + depends on MACH_SUN8I || COMPILE_TEST
>
> sun50i-a64 has a similar (or the same?) codec to A33.
>

I think the register offsets/fields were moved around again.
Why does Allwinner always do that... :/

ChenYu

>> + select REGMAP
>> + help
>> + Say Y or M if you want to add support for the analog controls for
>> + the codec embedded in newer Allwinner SoCs.
>> +
>>  config SND_SUN4I_I2S
>>  tristate "Allwinner A10 I2S Support"
>>  select SND_SOC_GENERIC_DMAENGINE_PCM
>> diff --git a/sound/soc/sunxi/Makefile b/sound/soc/sunxi/Makefile
>> index 604c7b842837..241c0df9ca0c 100644
>> --- a/sound/soc/sunxi/Makefile
>> +++ b/sound/soc/sunxi/Makefile
>> @@ -1,3 +1,4 @@
>>  obj-$(CONFIG_SND_SUN4I_CODEC) += sun4i-codec.o
>>  obj-$(CONFIG_SND_SUN4I_I2S) += sun4i-i2s.o
>>  obj-$(CONFIG_SND_SUN4I_SPDIF) += sun4i-spdif.o
>> +obj-$(CONFIG_SND_SUN8I_CODEC_ANALOG) += sun8i-codec-analog.o
>> diff --git a/sound/soc/sunxi/sun8i-codec-analog.c 
>> b/sound/soc/sunxi/sun8i-codec-analog.c
>> new file mode 100644
>> index ..222bbd440b1e
>> --- /dev/null
>> +++ b/sound/soc/sunxi/sun8i-codec-analog.c
>> @@ -0,0 +1,665 @@
>> +/*
>> + * This driver supports the analog controls for the internal codec
>> + * found in Allwinner's A31s, A23, A33 and H3 SoCs.
>> + *
>> + * Copyright 2016 Chen-Yu Tsai 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include 
>> +#include 
>> +#include 
>> +
>> +/* Codec analog control register offsets and bit fields */
>> +#define SUN8I_ADDA_HP_VOLC 0x00
>> +#define SUN8I_ADDA_HP_VOLC_PA_CLK_GATE 7
>> +#define SUN8I_ADDA_HP_VOLC_HP_VOL 0
>> +#define SUN8I_ADDA_LOMIXSC 0x01
>> +#define SUN8I_ADDA_LOMIXSC_MIC1 6
>> +#define SUN8I_ADDA_LOMIXSC_MIC2 5
>> +#define SUN8I_ADDA_LOMIXSC_PHONE 4
>> +#define SUN8I_ADDA_LOMIXSC_PHONEN 3
>> +#define SUN8I_ADDA_LOMIXSC_LINEINL 2
>> +#define SUN8I_ADDA_LOMIXSC_DACL 1
>> +#define SUN8I_ADDA_LOMIXSC_DACR 0
>> +#define SUN8I_ADDA_ROMIXSC 0x02
>> +#define SUN8I_ADDA_ROMIXSC_MIC1 6
>> +#define SUN8I_ADDA_ROMIXSC_MIC2 5
>> +#define SUN8I_ADDA_ROMIXSC_PHONE 4
>> +#define SUN8I_ADDA_ROMIXSC_PHONEP 3
>> +#define SUN8I_ADDA_ROMIXSC_LINEINR 2
>> +#define SUN8I_ADDA_ROMIXSC_DACR 1
>> +#define SUN8I_ADDA_ROMIXSC_DACL 0
>> +#define SUN8I_ADDA_DAC_PA_SRC 0x03
>> +#define SUN8I_ADDA_DAC_PA_SRC_DACAREN 7
>> +#define SUN8I_ADDA_DAC_PA_SRC_DACALEN 6
>> +#define SUN8I_ADDA_DAC_PA_SRC_RMIXEN 5
>> +#define SUN8I_ADDA_DAC_PA_SRC_LMIXEN 4
>> +#define SUN8I_ADDA_DAC_PA_SRC_RHPPAMUTE 3
>> +#define SUN8I_ADDA_DAC_PA_SRC_LHPPAMUTE 2
>> +#define SUN8I_ADDA_DAC_PA_SRC_RHPIS 1
>> +#define SUN8I_ADDA_DAC_PA_SRC_LHPIS

Re: [PATCH 02/10] ASoC: sunxi: Add support for A23/A33/H3 codec's analog path controls

2016-11-24 Thread Chen-Yu Tsai

On Fri, Nov 25, 2016 at 1:43 PM, Icenowy Zheng  wrote:
>
>
> 12.11.2016, 14:57, "Chen-Yu Tsai" :
>> The internal codec on A23/A33/H3 is split into 2 parts. The
>> analog path controls are routed through an embedded custom register
>> bus accessed through the PRCM block.
>>
>> The SoCs share a common set of inputs, outputs, and audio paths.
>> The following table lists the differences.
>>
>> 
>> | Feature \ SoC | A23 | A33 | H3 |
>> 
>> | Headphone | v | v | |
>> 
>> | Line Out | | | v |
>> 
>> | Phone In/Out | v | v | |
>> 
>>
>> Add an ASoC component driver for it. This should be tied to the codec
>> audio card as an auxiliary device. This patch adds the commont paths
>> and controls, and variant specific headphone out and line out.
>>
>> Signed-off-by: Chen-Yu Tsai 
>> ---
>>  sound/soc/sunxi/Kconfig | 8 +
>>  sound/soc/sunxi/Makefile | 1 +
>>  sound/soc/sunxi/sun8i-codec-analog.c | 665 
>> +++
>>  3 files changed, 674 insertions(+)
>>  create mode 100644 sound/soc/sunxi/sun8i-codec-analog.c
>>
>> diff --git a/sound/soc/sunxi/Kconfig b/sound/soc/sunxi/Kconfig
>> index dd2368297fd3..6c344e16aca4 100644
>> --- a/sound/soc/sunxi/Kconfig
>> +++ b/sound/soc/sunxi/Kconfig
>> @@ -9,6 +9,14 @@ config SND_SUN4I_CODEC
>>Select Y or M to add support for the Codec embedded in the 
>> Allwinner
>>A10 and affiliated SoCs.
>>
>> +config SND_SUN8I_CODEC_ANALOG
>> + tristate "Allwinner sun8i Codec Analog Controls Support"
>> + depends on MACH_SUN8I || COMPILE_TEST
>
> sun50i-a64 has a similar (or the same?) codec to A33.
>

I think the register offsets/fields were moved around again.
Why does Allwinner always do that... :/

ChenYu

>> + select REGMAP
>> + help
>> + Say Y or M if you want to add support for the analog controls for
>> + the codec embedded in newer Allwinner SoCs.
>> +
>>  config SND_SUN4I_I2S
>>  tristate "Allwinner A10 I2S Support"
>>  select SND_SOC_GENERIC_DMAENGINE_PCM
>> diff --git a/sound/soc/sunxi/Makefile b/sound/soc/sunxi/Makefile
>> index 604c7b842837..241c0df9ca0c 100644
>> --- a/sound/soc/sunxi/Makefile
>> +++ b/sound/soc/sunxi/Makefile
>> @@ -1,3 +1,4 @@
>>  obj-$(CONFIG_SND_SUN4I_CODEC) += sun4i-codec.o
>>  obj-$(CONFIG_SND_SUN4I_I2S) += sun4i-i2s.o
>>  obj-$(CONFIG_SND_SUN4I_SPDIF) += sun4i-spdif.o
>> +obj-$(CONFIG_SND_SUN8I_CODEC_ANALOG) += sun8i-codec-analog.o
>> diff --git a/sound/soc/sunxi/sun8i-codec-analog.c 
>> b/sound/soc/sunxi/sun8i-codec-analog.c
>> new file mode 100644
>> index ..222bbd440b1e
>> --- /dev/null
>> +++ b/sound/soc/sunxi/sun8i-codec-analog.c
>> @@ -0,0 +1,665 @@
>> +/*
>> + * This driver supports the analog controls for the internal codec
>> + * found in Allwinner's A31s, A23, A33 and H3 SoCs.
>> + *
>> + * Copyright 2016 Chen-Yu Tsai 
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License as published by
>> + * the Free Software Foundation; either version 2 of the License, or
>> + * (at your option) any later version.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
>> + * GNU General Public License for more details.
>> + */
>> +
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +#include 
>> +
>> +#include 
>> +#include 
>> +#include 
>> +
>> +/* Codec analog control register offsets and bit fields */
>> +#define SUN8I_ADDA_HP_VOLC 0x00
>> +#define SUN8I_ADDA_HP_VOLC_PA_CLK_GATE 7
>> +#define SUN8I_ADDA_HP_VOLC_HP_VOL 0
>> +#define SUN8I_ADDA_LOMIXSC 0x01
>> +#define SUN8I_ADDA_LOMIXSC_MIC1 6
>> +#define SUN8I_ADDA_LOMIXSC_MIC2 5
>> +#define SUN8I_ADDA_LOMIXSC_PHONE 4
>> +#define SUN8I_ADDA_LOMIXSC_PHONEN 3
>> +#define SUN8I_ADDA_LOMIXSC_LINEINL 2
>> +#define SUN8I_ADDA_LOMIXSC_DACL 1
>> +#define SUN8I_ADDA_LOMIXSC_DACR 0
>> +#define SUN8I_ADDA_ROMIXSC 0x02
>> +#define SUN8I_ADDA_ROMIXSC_MIC1 6
>> +#define SUN8I_ADDA_ROMIXSC_MIC2 5
>> +#define SUN8I_ADDA_ROMIXSC_PHONE 4
>> +#define SUN8I_ADDA_ROMIXSC_PHONEP 3
>> +#define SUN8I_ADDA_ROMIXSC_LINEINR 2
>> +#define SUN8I_ADDA_ROMIXSC_DACR 1
>> +#define SUN8I_ADDA_ROMIXSC_DACL 0
>> +#define SUN8I_ADDA_DAC_PA_SRC 0x03
>> +#define SUN8I_ADDA_DAC_PA_SRC_DACAREN 7
>> +#define SUN8I_ADDA_DAC_PA_SRC_DACALEN 6
>> +#define SUN8I_ADDA_DAC_PA_SRC_RMIXEN 5
>> +#define SUN8I_ADDA_DAC_PA_SRC_LMIXEN 4
>> +#define SUN8I_ADDA_DAC_PA_SRC_RHPPAMUTE 3
>> +#define SUN8I_ADDA_DAC_PA_SRC_LHPPAMUTE 2
>> +#define SUN8I_ADDA_DAC_PA_SRC_RHPIS 1
>> +#define SUN8I_ADDA_DAC_PA_SRC_LHPIS 0
>> +#define SUN8I_ADDA_PHONEIN_GCTRL 0x04
>> +#define

Re: module: extend 'rodata=off' boot cmdline parameter to module mappings

2016-11-24 Thread Jessica Yu


+++ AKASHI Takahiro [14/11/16 15:15 +0900]:

The current "rodata=off" parameter disables read-only kernel mappings
under CONFIG_DEBUG_RODATA:
   commit d2aa1acad22f ("mm/init: Add 'rodata=off' boot cmdline parameter
   to disable read-only kernel mappings")

This patch is a logical extension to module mappings ie. read-only mappings
at module loading can be disabled even if CONFIG_DEBUG_SET_MODULE_RONX
(mainly for debug use). Please note, however, that it only affects RO/RW
permissions, keeping NX set.

This is the first step to make CONFIG_DEBUG_SET_MODULE_RONX mandatory
(always-on) in the future as CONFIG_DEBUG_RODATA on x86 and arm64.

Suggested-by: and Acked-by: Mark Rutland 
Signed-off-by: AKASHI Takahiro 
Reviewed-by: Kees Cook 
Cc: Rusty Russell 


Hi Rusty, could I get an (n)ack for this patch? :-)

Thanks!

Jessica

Re: [PATCH] cpuset: Remove unused 'struct cpuset*' variable

2016-11-24 Thread Zefan Li

On 2016/11/25 12:55, Kirtika Ruchandani wrote:
> 'struct cpuset* cs' that is set but not used, was introduced in commit
> 1f7dd3e5a6e4 ("cgroup: fix handling of multi-destination migration from 
> subtree_control enabling").
> cpuset_cancel_attach() uses css_cs(css) instead. Compiling with W=1
> gives the folllowing harmless warning, which we'd like to fix to
> reduce the noise with W=1 in the kernel.
> 
> kernel/cpuset.c: In function ‘cpuset_cancel_attach’:
> kernel/cpuset.c:1502:17: warning: variable ‘cs’ set but not used 
> [-Wunused-but-set-variable]
>   struct cpuset *cs;
>  ^
> 
> Fixes: 1f7dd3e5a6e4 ("cgroup: fix handling of multi-destination migration 
> from subtree_control enabling").

This isn't a bug, so I don't think this tag is proper.

> Cc: Tejun Heo 
> Signed-off-by: Kirtika Ruchandani 

Acked-by: Zefan Li 

> ---
>  kernel/cpuset.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/kernel/cpuset.c b/kernel/cpuset.c
> index 29f815d..af51460 100644
> --- a/kernel/cpuset.c
> +++ b/kernel/cpuset.c
> @@ -1499,10 +1499,8 @@ static int cpuset_can_attach(struct cgroup_taskset 
> *tset)
>  static void cpuset_cancel_attach(struct cgroup_taskset *tset)
>  {
>   struct cgroup_subsys_state *css;
> - struct cpuset *cs;
>  
>   cgroup_taskset_first(tset, );
> - cs = css_cs(css);
>  
>   mutex_lock(_mutex);
>   css_cs(css)->attach_in_progress--;
>

Re: module: extend 'rodata=off' boot cmdline parameter to module mappings

2016-11-24 Thread Jessica Yu


+++ AKASHI Takahiro [14/11/16 15:15 +0900]:

The current "rodata=off" parameter disables read-only kernel mappings
under CONFIG_DEBUG_RODATA:
   commit d2aa1acad22f ("mm/init: Add 'rodata=off' boot cmdline parameter
   to disable read-only kernel mappings")

This patch is a logical extension to module mappings ie. read-only mappings
at module loading can be disabled even if CONFIG_DEBUG_SET_MODULE_RONX
(mainly for debug use). Please note, however, that it only affects RO/RW
permissions, keeping NX set.

This is the first step to make CONFIG_DEBUG_SET_MODULE_RONX mandatory
(always-on) in the future as CONFIG_DEBUG_RODATA on x86 and arm64.

Suggested-by: and Acked-by: Mark Rutland 
Signed-off-by: AKASHI Takahiro 
Reviewed-by: Kees Cook 
Cc: Rusty Russell 


Hi Rusty, could I get an (n)ack for this patch? :-)

Thanks!

Jessica

Re: [PATCH] cpuset: Remove unused 'struct cpuset*' variable

2016-11-24 Thread Zefan Li

On 2016/11/25 12:55, Kirtika Ruchandani wrote:
> 'struct cpuset* cs' that is set but not used, was introduced in commit
> 1f7dd3e5a6e4 ("cgroup: fix handling of multi-destination migration from 
> subtree_control enabling").
> cpuset_cancel_attach() uses css_cs(css) instead. Compiling with W=1
> gives the folllowing harmless warning, which we'd like to fix to
> reduce the noise with W=1 in the kernel.
> 
> kernel/cpuset.c: In function ‘cpuset_cancel_attach’:
> kernel/cpuset.c:1502:17: warning: variable ‘cs’ set but not used 
> [-Wunused-but-set-variable]
>   struct cpuset *cs;
>  ^
> 
> Fixes: 1f7dd3e5a6e4 ("cgroup: fix handling of multi-destination migration 
> from subtree_control enabling").

This isn't a bug, so I don't think this tag is proper.

> Cc: Tejun Heo 
> Signed-off-by: Kirtika Ruchandani 

Acked-by: Zefan Li 

> ---
>  kernel/cpuset.c | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/kernel/cpuset.c b/kernel/cpuset.c
> index 29f815d..af51460 100644
> --- a/kernel/cpuset.c
> +++ b/kernel/cpuset.c
> @@ -1499,10 +1499,8 @@ static int cpuset_can_attach(struct cgroup_taskset 
> *tset)
>  static void cpuset_cancel_attach(struct cgroup_taskset *tset)
>  {
>   struct cgroup_subsys_state *css;
> - struct cpuset *cs;
>  
>   cgroup_taskset_first(tset, );
> - cs = css_cs(css);
>  
>   mutex_lock(_mutex);
>   css_cs(css)->attach_in_progress--;
>

RE: [patch v3 1/1] platform/x86: move module mlx-platform from arch/x86 to drivers/platform/x86

2016-11-24 Thread Vadim Pasternak



> -Original Message-
> From: Vadim Pasternak
> Sent: Monday, November 14, 2016 10:10 AM
> To: 'Andy Shevchenko' 
> Cc: Thomas Gleixner ; dvh...@infradead.org; platform-
> driver-...@vger.kernel.org; x...@kernel.org; linux-kernel@vger.kernel.org;
> j...@resnulli.us; andriy.shevche...@linux.intel.com
> Subject: RE: [patch v3 1/1] platform/x86: move module mlx-platform from
> arch/x86 to drivers/platform/x86
> 
> 
> 
> > -Original Message-
> > From: Andy Shevchenko [mailto:andy.shevche...@gmail.com]
> > Sent: Monday, November 14, 2016 9:17 AM
> > To: Vadim Pasternak 
> > Cc: Thomas Gleixner ; dvh...@infradead.org;
> > platform- driver-...@vger.kernel.org; x...@kernel.org;
> > linux-kernel@vger.kernel.org; j...@resnulli.us;
> > andriy.shevche...@linux.intel.com
> > Subject: Re: [patch v3 1/1] platform/x86: move module mlx-platform
> > from
> > arch/x86 to drivers/platform/x86
> >
> > On Tue, Nov 8, 2016 at 8:19 AM, Vadim Pasternak 
> > wrote:
> > > Hi,
> > >
> > > Could this patch be merged to for-next for 4.10?
> > > When it's merged I'd like to submit another small patch on top of it.
> >
> 
> Hi Andy,
> Thanks for reply.
> 
> > First of all, please avoid top posting.
> 
> Sorry for that.
> I posted on top, because it doesn't come as reply to something (but it seems I
> should post at bottom in suc case).
> 
> > I will process the pdx86 mailing list and queue this week.

I am very sorry for reminding, but I see that it's still not merged.

Thanks,
Vadim.

> >
> > P.S. Are you sure that is the right fix "Remove "select MLX_PLATFORM"
> > from Kconfig, since it has unmet direct dependencies (X86 &&
> > X86_PLATFORM_DEVICES && X86_64)"?
> 
> There were two possibility: to leave "select MLX_PLATFORM" and add " depends
> on X86_64". Or just remove select.
> I decide to remove, having in mind the next considerations:
> If MLX_PLATFORM is not selected here - just nobody will activate hotplug 
> driver;
> If in the feature on some systems we'll have ACPI support - hotplug still is
> relevant.
> 
> >
> > >> On Mon, 31 Oct 2016, Vadim Pasternak wrote:
> > >>
> > >> > Since mlx-platform is not an architectural driver, it is moved
> > >> > out of arch/x86/platform to drivers/platform/x86.
> > >> > Relevant Makefile and Kconfig are updated.
> > >> >
> > >> > Signed-off-by: Vadim Pasternak 
> > >>
> > >> Acked-by: Thomas Gleixner 
> >
> > --
> > With Best Regards,
> > Andy Shevchenko

RE: [patch v3 1/1] platform/x86: move module mlx-platform from arch/x86 to drivers/platform/x86

2016-11-24 Thread Vadim Pasternak



> -Original Message-
> From: Vadim Pasternak
> Sent: Monday, November 14, 2016 10:10 AM
> To: 'Andy Shevchenko' 
> Cc: Thomas Gleixner ; dvh...@infradead.org; platform-
> driver-...@vger.kernel.org; x...@kernel.org; linux-kernel@vger.kernel.org;
> j...@resnulli.us; andriy.shevche...@linux.intel.com
> Subject: RE: [patch v3 1/1] platform/x86: move module mlx-platform from
> arch/x86 to drivers/platform/x86
> 
> 
> 
> > -Original Message-
> > From: Andy Shevchenko [mailto:andy.shevche...@gmail.com]
> > Sent: Monday, November 14, 2016 9:17 AM
> > To: Vadim Pasternak 
> > Cc: Thomas Gleixner ; dvh...@infradead.org;
> > platform- driver-...@vger.kernel.org; x...@kernel.org;
> > linux-kernel@vger.kernel.org; j...@resnulli.us;
> > andriy.shevche...@linux.intel.com
> > Subject: Re: [patch v3 1/1] platform/x86: move module mlx-platform
> > from
> > arch/x86 to drivers/platform/x86
> >
> > On Tue, Nov 8, 2016 at 8:19 AM, Vadim Pasternak 
> > wrote:
> > > Hi,
> > >
> > > Could this patch be merged to for-next for 4.10?
> > > When it's merged I'd like to submit another small patch on top of it.
> >
> 
> Hi Andy,
> Thanks for reply.
> 
> > First of all, please avoid top posting.
> 
> Sorry for that.
> I posted on top, because it doesn't come as reply to something (but it seems I
> should post at bottom in suc case).
> 
> > I will process the pdx86 mailing list and queue this week.

I am very sorry for reminding, but I see that it's still not merged.

Thanks,
Vadim.

> >
> > P.S. Are you sure that is the right fix "Remove "select MLX_PLATFORM"
> > from Kconfig, since it has unmet direct dependencies (X86 &&
> > X86_PLATFORM_DEVICES && X86_64)"?
> 
> There were two possibility: to leave "select MLX_PLATFORM" and add " depends
> on X86_64". Or just remove select.
> I decide to remove, having in mind the next considerations:
> If MLX_PLATFORM is not selected here - just nobody will activate hotplug 
> driver;
> If in the feature on some systems we'll have ACPI support - hotplug still is
> relevant.
> 
> >
> > >> On Mon, 31 Oct 2016, Vadim Pasternak wrote:
> > >>
> > >> > Since mlx-platform is not an architectural driver, it is moved
> > >> > out of arch/x86/platform to drivers/platform/x86.
> > >> > Relevant Makefile and Kconfig are updated.
> > >> >
> > >> > Signed-off-by: Vadim Pasternak 
> > >>
> > >> Acked-by: Thomas Gleixner 
> >
> > --
> > With Best Regards,
> > Andy Shevchenko

Re: [PATCH net-next] virtio-net: enable multiqueue by default

2016-11-24 Thread Jason Wang

On 2016年11月25日 12:43, Michael S. Tsirkin wrote:

On Fri, Nov 25, 2016 at 12:37:26PM +0800, Jason Wang wrote:

>We use single queue even if multiqueue is enabled and let admin to
>enable it through ethtool later. This is used to avoid possible
>regression (small packet TCP stream transmission). But looks like an
>overkill since:
>
>- single queue user can disable multiqueue when launching qemu
>- brings extra troubles for the management since it needs extra admin
>   tool in guest to enable multiqueue
>- multiqueue performs much better than single queue in most of the
>   cases
>
>So this patch enables multiqueue by default: if #queues is less than or
>equal to #vcpu, enable as much as queue pairs; if #queues is greater
>than #vcpu, enable #vcpu queue pairs.
>
>Cc: Hannes Frederic Sowa
>Cc: Michael S. Tsirkin
>Cc: Neil Horman
>Cc: Jeremy Eder
>Cc: Marko Myllynen
>Cc: Maxime Coquelin
>Signed-off-by: Jason Wang

OK at some level but all uses of num_online_cpus()
like this are racy versus hotplug.
I know we already have this bug but shouldn't we fix it
before we add more?

Not sure I get the point, do you mean adding get/put_online_cpus()? But 
is it a real bug? We don't do any cpu specific things so I believe it's 
not necessary (unless we want to keep #queues == #vcpus magically but I 
don't think so). Admin need to re-configure #queues after cpu hotplug if 
they wish.

Thanks

Re: [PATCH net-next] virtio-net: enable multiqueue by default

2016-11-24 Thread Jason Wang

On 2016年11月25日 12:43, Michael S. Tsirkin wrote:

On Fri, Nov 25, 2016 at 12:37:26PM +0800, Jason Wang wrote:

>We use single queue even if multiqueue is enabled and let admin to
>enable it through ethtool later. This is used to avoid possible
>regression (small packet TCP stream transmission). But looks like an
>overkill since:
>
>- single queue user can disable multiqueue when launching qemu
>- brings extra troubles for the management since it needs extra admin
>   tool in guest to enable multiqueue
>- multiqueue performs much better than single queue in most of the
>   cases
>
>So this patch enables multiqueue by default: if #queues is less than or
>equal to #vcpu, enable as much as queue pairs; if #queues is greater
>than #vcpu, enable #vcpu queue pairs.
>
>Cc: Hannes Frederic Sowa
>Cc: Michael S. Tsirkin
>Cc: Neil Horman
>Cc: Jeremy Eder
>Cc: Marko Myllynen
>Cc: Maxime Coquelin
>Signed-off-by: Jason Wang

OK at some level but all uses of num_online_cpus()
like this are racy versus hotplug.
I know we already have this bug but shouldn't we fix it
before we add more?

Not sure I get the point, do you mean adding get/put_online_cpus()? But 
is it a real bug? We don't do any cpu specific things so I believe it's 
not necessary (unless we want to keep #queues == #vcpus magically but I 
don't think so). Admin need to re-configure #queues after cpu hotplug if 
they wish.

Thanks

Re: [PATCH 1/1 linux-next] ext4: add compatibility flag check

2016-11-24 Thread Theodore Ts'o

On Thu, Nov 24, 2016 at 08:47:41PM +0100, Fabian Frederick wrote:
> data=journal mount option should disable O_DIRECT access
> (See Documentation/filesystems/ext4.txt) but open operations
> using O_CREAT|O_RDWR|O_DIRECT|O_SYNC have no warning in return and file is 
> being
> created. This patch adds vfs super_operations compatibility flag function
> returning -EPERM in such a case.
> 
> Signed-off-by: Fabian Frederick 

The general practice by most file systems in Linux (for better or for
worse) is to silently fall back to buffered I/O instead of failing the
O_DIRECT open.  Feel free to try to convince linux-fsdevel otherwise,
but that is the general and historical consensus of Linux file system
developers.

Cheers,

- Ted

Re: [PATCH 1/1 linux-next] ext4: add compatibility flag check

2016-11-24 Thread Theodore Ts'o

On Thu, Nov 24, 2016 at 08:47:41PM +0100, Fabian Frederick wrote:
> data=journal mount option should disable O_DIRECT access
> (See Documentation/filesystems/ext4.txt) but open operations
> using O_CREAT|O_RDWR|O_DIRECT|O_SYNC have no warning in return and file is 
> being
> created. This patch adds vfs super_operations compatibility flag function
> returning -EPERM in such a case.
> 
> Signed-off-by: Fabian Frederick 

The general practice by most file systems in Linux (for better or for
worse) is to silently fall back to buffered I/O instead of failing the
O_DIRECT open.  Feel free to try to convince linux-fsdevel otherwise,
but that is the general and historical consensus of Linux file system
developers.

Cheers,

- Ted

Re: [PATCH 1/1] dmaengine: imx-sdma - correct the dma transfer residue calculation

2016-11-24 Thread Vinod Koul

On Tue, Oct 11, 2016 at 02:13:41PM +0300, Nandor Han wrote:
> The residue calculation was taking in consideration that dma
> transaction status will be always retrieved in the dma callback
> used to inform that dma transfer is complete. However this is not
> the case for all subsystems that use dma. Some subsystems use a
> timer to check the dma status periodically.
> 
> Therefore the calculation was updated and residue is calculated
> accordingly by a) update the residue calculation taking in
> consideration the last used buffer index by using *buf_ptail* variable
> and b) chn_real_count (number of bytes transferred) is initialized to
> zero, when dma channel is created, to avoid using an uninitialized
> value in residue calculation when dma status is checked without
> waiting dma complete event.

Looks to be missed earier, so applying now

Thanks
-- 
~Vinod

Re: [PATCH 1/1] dmaengine: imx-sdma - correct the dma transfer residue calculation

2016-11-24 Thread Vinod Koul

On Tue, Oct 11, 2016 at 02:13:41PM +0300, Nandor Han wrote:
> The residue calculation was taking in consideration that dma
> transaction status will be always retrieved in the dma callback
> used to inform that dma transfer is complete. However this is not
> the case for all subsystems that use dma. Some subsystems use a
> timer to check the dma status periodically.
> 
> Therefore the calculation was updated and residue is calculated
> accordingly by a) update the residue calculation taking in
> consideration the last used buffer index by using *buf_ptail* variable
> and b) chn_real_count (number of bytes transferred) is initialized to
> zero, when dma channel is created, to avoid using an uninitialized
> value in residue calculation when dma status is checked without
> waiting dma complete event.

Looks to be missed earier, so applying now

Thanks
-- 
~Vinod

Re: [PATCH 4/7] ovl: add infrastructure for intercepting file ops

2016-11-24 Thread Amir Goldstein

On Thu, Nov 24, 2016 at 4:08 PM, Amir Goldstein  wrote:
> On Thu, Nov 24, 2016 at 3:51 PM, Miklos Szeredi  wrote:
>> On Thu, Nov 24, 2016 at 2:12 PM, Amir Goldstein  wrote:
>>> On Thu, Nov 24, 2016 at 2:03 PM, Miklos Szeredi  wrote:
 On Thu, Nov 24, 2016 at 12:52 PM, Amir Goldstein  
 wrote:
> On Thu, Nov 24, 2016 at 12:55 PM, Miklos Szeredi  
> wrote:

>> +   /*
>> +* These should be intercepted, but they are very 
>> unlikely to be
>> +* a problem in practice.  Leave them alone for now.
>
> It could also be handled in vfs helpers.
> Since these ops all start with establishing that src and dest are on
> the same sb,
> then the cost of copy up of src is the cost of clone_file_range from
> lower to upper,
> so it is probably worth to copy up src and leave those fops alone.
>
>> +*/
>> +   ofop->fops.copy_file_range = orig->copy_file_range;
>> +   ofop->fops.clone_file_range = orig->clone_file_range;
>> +   ofop->fops.dedupe_file_range = orig->dedupe_file_range;

 Not sure I understand.  Why should we copy up src?  Copy up is the
 problem not the solution.

>>>
>>> Maybe the idea is ill conceived, but the reasoning is:
>>> To avoid the corner case of cloning from a stale lower src,
>>> call d_real() in vfs helpers to always copy up src before cloning from it
>>> and pass the correct file onwards.
>>
>> Which correct file?  src is still the wrong one after calling d_real.
>> We need to clone-open src, just like we do in ovl_read_iter to get the
>> correct file.  But then what's the use of copying it up beforehand?
>>
>> We could move the whole logic into the vfs, but I don't really see the point.
>>

Here is a relevant use case (creating several clones),
although not directly related to ro/rw inconsistency, which
justified putting the logic in vfs.

X is a file in lower
lower is different fs then upper
upper supports clone/dedup/copy_range

for i in `seq 1 100`; do cp --reflink=auto X X${i}; done

With current code the src and destination files are on the same
mount (test in  ioctl_file_clone), but not on the same sb (test in
vfs_clone_file_range), so cp will fall back to 100 expensive data copies.

*If* instead we d_real() and clone-open src in start of vfs_clone_file_range
*after* verifying the dest file ops support clone, then we will get only one
expensive copy up and 100 cheap clones, so its a big win.

And for the case of src and dst inodes already on the same sb, we can
skip d_real() to avoid possible unneeded copy up, although a clone up
is going to be cheap anyway.

The so called worst case is that this was a one time clone (to X1),
but the cost in this case is not huge - 1 data copy up of X and 1 clone
X->X1 instead of just 1 data copy X->X1, so the difference is negligible.

Now it's true that this is heuristic, but arguably a good one.

Amir.

Re: [PATCH 4/7] ovl: add infrastructure for intercepting file ops

2016-11-24 Thread Amir Goldstein

On Thu, Nov 24, 2016 at 4:08 PM, Amir Goldstein  wrote:
> On Thu, Nov 24, 2016 at 3:51 PM, Miklos Szeredi  wrote:
>> On Thu, Nov 24, 2016 at 2:12 PM, Amir Goldstein  wrote:
>>> On Thu, Nov 24, 2016 at 2:03 PM, Miklos Szeredi  wrote:
 On Thu, Nov 24, 2016 at 12:52 PM, Amir Goldstein  
 wrote:
> On Thu, Nov 24, 2016 at 12:55 PM, Miklos Szeredi  
> wrote:

>> +   /*
>> +* These should be intercepted, but they are very 
>> unlikely to be
>> +* a problem in practice.  Leave them alone for now.
>
> It could also be handled in vfs helpers.
> Since these ops all start with establishing that src and dest are on
> the same sb,
> then the cost of copy up of src is the cost of clone_file_range from
> lower to upper,
> so it is probably worth to copy up src and leave those fops alone.
>
>> +*/
>> +   ofop->fops.copy_file_range = orig->copy_file_range;
>> +   ofop->fops.clone_file_range = orig->clone_file_range;
>> +   ofop->fops.dedupe_file_range = orig->dedupe_file_range;

 Not sure I understand.  Why should we copy up src?  Copy up is the
 problem not the solution.

>>>
>>> Maybe the idea is ill conceived, but the reasoning is:
>>> To avoid the corner case of cloning from a stale lower src,
>>> call d_real() in vfs helpers to always copy up src before cloning from it
>>> and pass the correct file onwards.
>>
>> Which correct file?  src is still the wrong one after calling d_real.
>> We need to clone-open src, just like we do in ovl_read_iter to get the
>> correct file.  But then what's the use of copying it up beforehand?
>>
>> We could move the whole logic into the vfs, but I don't really see the point.
>>

Here is a relevant use case (creating several clones),
although not directly related to ro/rw inconsistency, which
justified putting the logic in vfs.

X is a file in lower
lower is different fs then upper
upper supports clone/dedup/copy_range

for i in `seq 1 100`; do cp --reflink=auto X X${i}; done

With current code the src and destination files are on the same
mount (test in  ioctl_file_clone), but not on the same sb (test in
vfs_clone_file_range), so cp will fall back to 100 expensive data copies.

*If* instead we d_real() and clone-open src in start of vfs_clone_file_range
*after* verifying the dest file ops support clone, then we will get only one
expensive copy up and 100 cheap clones, so its a big win.

And for the case of src and dst inodes already on the same sb, we can
skip d_real() to avoid possible unneeded copy up, although a clone up
is going to be cheap anyway.

The so called worst case is that this was a one time clone (to X1),
but the cost in this case is not huge - 1 data copy up of X and 1 clone
X->X1 instead of just 1 data copy X->X1, so the difference is negligible.

Now it's true that this is heuristic, but arguably a good one.

Amir.

[PATCH v7 08/12] iommu/amd: Add sanity check of irq remap information of old dev table entry

2016-11-24 Thread Baoquan He

Firstly split the dev table entry copy into address translation part and
irq remapping part. Because these two parts could be configured to
be available indepentently.

Secondly check if IntCtl and IntTabLen are 10b and 1000b if they are
set.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  5 -
 drivers/iommu/amd_iommu_init.c  | 25 ++---
 drivers/iommu/amd_iommu_types.h |  8 
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index fc8ecfb..cea90d5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3640,11 +3640,6 @@ EXPORT_SYMBOL(amd_iommu_device_info);
 
 static struct irq_chip amd_ir_chip;
 
-#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
-#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
-#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
-#define DTE_IRQ_REMAP_ENABLE1ULL
-
 static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
 {
u64 dte;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 4233f26..4427c63 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -837,12 +837,12 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 
 static int copy_dev_tables(void)
 {
+   u64 int_ctl, int_tab_len, entry, last_entry = 0;
struct dev_table_entry *old_devtb = NULL;
u32 lo, hi, devid, old_devtb_size;
phys_addr_t old_devtb_phys;
-   u64 entry, last_entry = 0;
struct amd_iommu *iommu;
-   u16 dom_id, dte_v;
+   u16 dom_id, dte_v, irq_v;
static int copied;
 
for_each_iommu(iommu) {
@@ -881,8 +881,27 @@ static int copy_dev_tables(void)
amd_iommu_dev_table[devid] = old_devtb[devid];
dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
-   if (dte_v && dom_id)
+   if (dte_v && dom_id) {
+   amd_iommu_dev_table[devid].data[0]
+   = old_devtb[devid].data[0];
+   amd_iommu_dev_table[devid].data[1]
+   = old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+
+   irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
+   int_ctl = old_devtb[devid].data[2] & 
DTE_IRQ_REMAP_INTCTL_MASK;
+   int_tab_len = old_devtb[devid].data[2] & 
DTE_IRQ_TABLE_LEN_MASK;
+   if (irq_v && (int_ctl || int_tab_len)) {
+   if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
+(int_tab_len != DTE_IRQ_TABLE_LEN)) {
+   pr_err("Wrong old irq remapping flag: 
%#x\n", devid);
+   return -1;
+   }
+
+   amd_iommu_dev_table[devid].data[2]
+   = old_devtb[devid].data[2];
+   }
}
memunmap(old_devtb);
copied = 1;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 79ec841..b5ae18e 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -250,6 +250,14 @@
 
 #define GA_GUEST_NR0x1
 
+/* Bit value definition for dte irq remapping fields*/
+#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
+#define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
+#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1)
+#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
+#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
+#define DTE_IRQ_REMAP_ENABLE1ULL
+
 #define PAGE_MODE_NONE0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
-- 
2.5.5

[PATCH v7 08/12] iommu/amd: Add sanity check of irq remap information of old dev table entry

2016-11-24 Thread Baoquan He

Firstly split the dev table entry copy into address translation part and
irq remapping part. Because these two parts could be configured to
be available indepentently.

Secondly check if IntCtl and IntTabLen are 10b and 1000b if they are
set.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  5 -
 drivers/iommu/amd_iommu_init.c  | 25 ++---
 drivers/iommu/amd_iommu_types.h |  8 
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index fc8ecfb..cea90d5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3640,11 +3640,6 @@ EXPORT_SYMBOL(amd_iommu_device_info);
 
 static struct irq_chip amd_ir_chip;
 
-#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
-#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
-#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
-#define DTE_IRQ_REMAP_ENABLE1ULL
-
 static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
 {
u64 dte;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 4233f26..4427c63 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -837,12 +837,12 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 
 static int copy_dev_tables(void)
 {
+   u64 int_ctl, int_tab_len, entry, last_entry = 0;
struct dev_table_entry *old_devtb = NULL;
u32 lo, hi, devid, old_devtb_size;
phys_addr_t old_devtb_phys;
-   u64 entry, last_entry = 0;
struct amd_iommu *iommu;
-   u16 dom_id, dte_v;
+   u16 dom_id, dte_v, irq_v;
static int copied;
 
for_each_iommu(iommu) {
@@ -881,8 +881,27 @@ static int copy_dev_tables(void)
amd_iommu_dev_table[devid] = old_devtb[devid];
dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
-   if (dte_v && dom_id)
+   if (dte_v && dom_id) {
+   amd_iommu_dev_table[devid].data[0]
+   = old_devtb[devid].data[0];
+   amd_iommu_dev_table[devid].data[1]
+   = old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+
+   irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
+   int_ctl = old_devtb[devid].data[2] & 
DTE_IRQ_REMAP_INTCTL_MASK;
+   int_tab_len = old_devtb[devid].data[2] & 
DTE_IRQ_TABLE_LEN_MASK;
+   if (irq_v && (int_ctl || int_tab_len)) {
+   if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
+(int_tab_len != DTE_IRQ_TABLE_LEN)) {
+   pr_err("Wrong old irq remapping flag: 
%#x\n", devid);
+   return -1;
+   }
+
+   amd_iommu_dev_table[devid].data[2]
+   = old_devtb[devid].data[2];
+   }
}
memunmap(old_devtb);
copied = 1;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 79ec841..b5ae18e 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -250,6 +250,14 @@
 
 #define GA_GUEST_NR0x1
 
+/* Bit value definition for dte irq remapping fields*/
+#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
+#define DTE_IRQ_REMAP_INTCTL_MASK  (0x3ULL << 60)
+#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1)
+#define DTE_IRQ_REMAP_INTCTL(2ULL << 60)
+#define DTE_IRQ_TABLE_LEN   (8ULL << 1)
+#define DTE_IRQ_REMAP_ENABLE1ULL
+
 #define PAGE_MODE_NONE0x00
 #define PAGE_MODE_1_LEVEL 0x01
 #define PAGE_MODE_2_LEVEL 0x02
-- 
2.5.5

[PATCH v7 07/12] iommu/amd: Use is_attach_deferred call-back

2016-11-24 Thread Baoquan He

Implement call-back is_attach_deferred and use it to defer the
domain attach from iommu driver init to device driver init when
iommu is pre-enabled in kdump kernel.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index d5aef72..fc8ecfb 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -138,6 +138,7 @@ struct iommu_dev_data {
 PPR completions */
u32 errata;   /* Bitmap for errata to apply */
bool use_vapic;   /* Enable device to use vapic mode */
+   bool defer_attach;
 };
 
 /*
@@ -340,12 +341,17 @@ static u16 get_alias(struct device *dev)
 static struct iommu_dev_data *find_dev_data(u16 devid)
 {
struct iommu_dev_data *dev_data;
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
 
dev_data = search_dev_data(devid);
 
-   if (dev_data == NULL)
+   if (dev_data == NULL) {
dev_data = alloc_dev_data(devid);
 
+   if (translation_pre_enabled(iommu))
+   dev_data->defer_attach = true;
+   }
+
return dev_data;
 }
 
@@ -2315,11 +2321,18 @@ static void queue_add(struct dma_ops_domain *dma_dom,
 static struct protection_domain *get_domain(struct device *dev)
 {
struct protection_domain *domain;
+   struct iommu_domain *io_domain;
 
if (!check_device(dev))
return ERR_PTR(-EINVAL);
 
domain = get_dev_data(dev)->domain;
+   if (domain == NULL && get_dev_data(dev)->defer_attach) {
+   get_dev_data(dev)->defer_attach = false;
+   io_domain = iommu_get_domain_for_dev(dev);
+   domain = to_pdomain(io_domain);
+   attach_device(dev, domain);
+   }
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
 
@@ -3215,6 +3228,13 @@ static void amd_iommu_apply_dm_region(struct device *dev,
WARN_ON_ONCE(reserve_iova(_dom->iovad, start, end) == NULL);
 }
 
+static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+struct device *dev)
+{
+   struct iommu_dev_data *dev_data = dev->archdata.iommu;
+   return dev_data->defer_attach;
+}
+
 static const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
@@ -3231,6 +3251,7 @@ static const struct iommu_ops amd_iommu_ops = {
.get_dm_regions = amd_iommu_get_dm_regions,
.put_dm_regions = amd_iommu_put_dm_regions,
.apply_dm_region = amd_iommu_apply_dm_region,
+   .is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap  = AMD_IOMMU_PGSIZES,
 };
 
-- 
2.5.5

[PATCH v7 03/12] iommu/amd: Define bit fields for DTE particularly

2016-11-24 Thread Baoquan He

In amd-vi spec several bits of IO PTE fields and DTE fields are similar
so that both of them can share the same MACRO definition. However
defining their respecitve bit fields can make code more read-able. So
do it in this patch.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  8 
 drivers/iommu/amd_iommu_types.h | 18 ++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 754595e..0b0e50e 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1476,9 +1476,9 @@ static int iommu_map_page(struct protection_domain *dom,
 
if (count > 1) {
__pte = PAGE_SIZE_PTE(phys_addr, page_size);
-   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -1805,7 +1805,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
-   pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+   pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
flags = amd_iommu_dev_table[devid].data[1];
 
@@ -1848,7 +1848,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 static void clear_dte_entry(u16 devid)
 {
/* remove entry from the device table seen by the hardware */
-   amd_iommu_dev_table[devid].data[0]  = IOMMU_PTE_P | IOMMU_PTE_TV;
+   amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
 
amd_iommu_apply_erratum_63(devid);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 2bbc19d..6a4378f 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -265,7 +265,7 @@
 #define PM_LEVEL_INDEX(x, a)   (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
 #define PM_LEVEL_ENC(x)(((x) << 9) & 0xe00ULL)
 #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
-IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW)
 #define PM_PTE_LEVEL(pte)  (((pte) >> 9) & 0x7ULL)
 
 #define PM_MAP_4k  0
@@ -314,13 +314,23 @@
 #define PTE_LEVEL_PAGE_SIZE(level) \
(1ULL << (12 + (9 * (level
 
-#define IOMMU_PTE_P  (1ULL << 0)
-#define IOMMU_PTE_TV (1ULL << 1)
+/*
+ * Bit value definition for I/O PTE fields
+ */
+#define IOMMU_PTE_PR (1ULL << 0)
 #define IOMMU_PTE_U  (1ULL << 59)
 #define IOMMU_PTE_FC (1ULL << 60)
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+/*
+ * Bit value definition for DTE fields
+ */
+#define DTE_FLAG_V  (1ULL << 0)
+#define DTE_FLAG_TV (1ULL << 1)
+#define DTE_FLAG_IR (1ULL << 61)
+#define DTE_FLAG_IW (1ULL << 62)
+
 #define DTE_FLAG_IOTLB (1ULL << 32)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
@@ -342,7 +352,7 @@
 #define GCR3_VALID 0x01ULL
 
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
-#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
+#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
 #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
 
-- 
2.5.5

[PATCH v7 04/12] iommu/amd: Add function copy_dev_tables

2016-11-24 Thread Baoquan He

Add function copy_dev_tables to copy the old DEV table entries of the panicked
kernel to the new allocated DEV table. Since all iommus share the same DTE table
the copy only need be done once as long as the physical address of old DEV table
is retrieved from iommu reg. Besides, we also need to:

  - Check whether all IOMMUs actually use the same device table with the same 
size

  - Verify that the size of the old device table is the expected size.

  - Reserve the old domain id occupied in 1st kernel to avoid touching the old
io-page tables. Then on-flight DMA can continue looking it up.

And define MACRO DEV_DOMID_MASK to replace magic number 0xULL because
it need be reused in copy_dev_tables.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  2 +-
 drivers/iommu/amd_iommu_init.c  | 55 +
 drivers/iommu/amd_iommu_types.h |  1 +
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0b0e50e..d5aef72 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1838,7 +1838,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags|= tmp;
}
 
-   flags &= ~(0xUL);
+   flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
 
amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9458f7c..8fc9840 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -834,6 +834,61 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 }
 
 
+static int copy_dev_tables(void)
+{
+   struct dev_table_entry *old_devtb = NULL;
+   u32 lo, hi, devid, old_devtb_size;
+   phys_addr_t old_devtb_phys;
+   u64 entry, last_entry = 0;
+   struct amd_iommu *iommu;
+   u16 dom_id, dte_v;
+   static int copied;
+
+   for_each_iommu(iommu) {
+   if (!translation_pre_enabled(iommu)) {
+   pr_err("IOMMU:%d is not pre-enabled!/n",
+   iommu->index);
+   return -1;
+   }
+
+   /* All IOMMUs should use the same device table with the same 
size */
+   lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
+   hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
+   entry = (((u64) hi) << 32) + lo;
+   if (last_entry && last_entry != entry) {
+   pr_err("IOMMU:%d should use the same dev table as 
others!/n",
+   iommu->index);
+   return -1;
+   }
+   last_entry = entry;
+
+   old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
+   if (old_devtb_size != dev_table_size) {
+   pr_err("The device table size of IOMMU:%d is not 
expected!/n",
+   iommu->index);
+   return -1;
+   }
+
+   old_devtb_phys = entry & PAGE_MASK;
+   old_devtb = memremap(old_devtb_phys, dev_table_size, 
MEMREMAP_WB);
+   if (!old_devtb)
+   return -1;
+
+   if (copied)
+   continue;
+   for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+   amd_iommu_dev_table[devid] = old_devtb[devid];
+   dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
+   dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
+   if (dte_v && dom_id)
+   __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+   memunmap(old_devtb);
+   copied = 1;
+   }
+   return 0;
+}
+
 void amd_iommu_apply_erratum_63(u16 devid)
 {
int sysmgt;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 6a4378f..79ec841 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -336,6 +336,7 @@
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
 #define DTE_GLX_MASK   (3)
+#define DEV_DOMID_MASK 0xULL
 
 #define DTE_GCR3_VAL_A(x)  (((x) >> 12) & 0x7ULL)
 #define DTE_GCR3_VAL_B(x)  (((x) >> 15) & 0x0ULL)
-- 
2.5.5

[PATCH v7 01/12] iommu/amd: Detect pre enabled translation

2016-11-24 Thread Baoquan He

Add functions to check whether translation is already enabled in IOMMU.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c  | 25 +
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h |  4 
 3 files changed, 30 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 157e934..5ad1e023 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -251,6 +251,26 @@ static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+
+bool translation_pre_enabled(struct amd_iommu *iommu)
+{
+   return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
+}
+
+static void clear_translation_pre_enabled(struct amd_iommu *iommu)
+{
+   iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
+static void init_translation_status(struct amd_iommu *iommu)
+{
+   u32 ctrl;
+
+   ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+   if (ctrl & (1<flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
 static int iommu_pc_get_set_reg_val(struct amd_iommu *iommu,
u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write);
@@ -1389,6 +1409,11 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
 
iommu->int_enabled = false;
 
+   init_translation_status(iommu);
+
+   if (translation_pre_enabled(iommu))
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+
ret = init_iommu_from_acpi(iommu, h);
if (ret)
return ret;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 7eb60c1..9560183 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -93,4 +93,5 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 
f)
return !!(iommu->features & f);
 }
 
+extern bool translation_pre_enabled(struct amd_iommu *iommu);
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 0d91785..2bbc19d 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -416,6 +416,7 @@ extern struct kmem_cache *amd_iommu_irq_cache;
 #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
 
 
+
 /*
  * This struct is used to pass information about
  * incoming PPR faults around.
@@ -434,6 +435,8 @@ struct iommu_domain;
 struct irq_domain;
 struct amd_irte_ops;
 
+#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -566,6 +569,7 @@ struct amd_iommu {
struct amd_irte_ops *irte_ops;
 #endif
 
+   u32 flags;
volatile u64 __aligned(8) cmd_sem;
 };
 
-- 
2.5.5

[PATCH v7 07/12] iommu/amd: Use is_attach_deferred call-back

2016-11-24 Thread Baoquan He

Implement call-back is_attach_deferred and use it to defer the
domain attach from iommu driver init to device driver init when
iommu is pre-enabled in kdump kernel.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index d5aef72..fc8ecfb 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -138,6 +138,7 @@ struct iommu_dev_data {
 PPR completions */
u32 errata;   /* Bitmap for errata to apply */
bool use_vapic;   /* Enable device to use vapic mode */
+   bool defer_attach;
 };
 
 /*
@@ -340,12 +341,17 @@ static u16 get_alias(struct device *dev)
 static struct iommu_dev_data *find_dev_data(u16 devid)
 {
struct iommu_dev_data *dev_data;
+   struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
 
dev_data = search_dev_data(devid);
 
-   if (dev_data == NULL)
+   if (dev_data == NULL) {
dev_data = alloc_dev_data(devid);
 
+   if (translation_pre_enabled(iommu))
+   dev_data->defer_attach = true;
+   }
+
return dev_data;
 }
 
@@ -2315,11 +2321,18 @@ static void queue_add(struct dma_ops_domain *dma_dom,
 static struct protection_domain *get_domain(struct device *dev)
 {
struct protection_domain *domain;
+   struct iommu_domain *io_domain;
 
if (!check_device(dev))
return ERR_PTR(-EINVAL);
 
domain = get_dev_data(dev)->domain;
+   if (domain == NULL && get_dev_data(dev)->defer_attach) {
+   get_dev_data(dev)->defer_attach = false;
+   io_domain = iommu_get_domain_for_dev(dev);
+   domain = to_pdomain(io_domain);
+   attach_device(dev, domain);
+   }
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
 
@@ -3215,6 +3228,13 @@ static void amd_iommu_apply_dm_region(struct device *dev,
WARN_ON_ONCE(reserve_iova(_dom->iovad, start, end) == NULL);
 }
 
+static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+struct device *dev)
+{
+   struct iommu_dev_data *dev_data = dev->archdata.iommu;
+   return dev_data->defer_attach;
+}
+
 static const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
@@ -3231,6 +3251,7 @@ static const struct iommu_ops amd_iommu_ops = {
.get_dm_regions = amd_iommu_get_dm_regions,
.put_dm_regions = amd_iommu_put_dm_regions,
.apply_dm_region = amd_iommu_apply_dm_region,
+   .is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap  = AMD_IOMMU_PGSIZES,
 };
 
-- 
2.5.5

[PATCH v7 03/12] iommu/amd: Define bit fields for DTE particularly

2016-11-24 Thread Baoquan He

In amd-vi spec several bits of IO PTE fields and DTE fields are similar
so that both of them can share the same MACRO definition. However
defining their respecitve bit fields can make code more read-able. So
do it in this patch.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  8 
 drivers/iommu/amd_iommu_types.h | 18 ++
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 754595e..0b0e50e 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1476,9 +1476,9 @@ static int iommu_map_page(struct protection_domain *dom,
 
if (count > 1) {
__pte = PAGE_SIZE_PTE(phys_addr, page_size);
-   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
-   __pte = phys_addr | IOMMU_PTE_P | IOMMU_PTE_FC;
+   __pte = phys_addr | IOMMU_PTE_PR | IOMMU_PTE_FC;
 
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -1805,7 +1805,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
-   pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+   pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
 
flags = amd_iommu_dev_table[devid].data[1];
 
@@ -1848,7 +1848,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
 static void clear_dte_entry(u16 devid)
 {
/* remove entry from the device table seen by the hardware */
-   amd_iommu_dev_table[devid].data[0]  = IOMMU_PTE_P | IOMMU_PTE_TV;
+   amd_iommu_dev_table[devid].data[0]  = DTE_FLAG_V | DTE_FLAG_TV;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
 
amd_iommu_apply_erratum_63(devid);
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 2bbc19d..6a4378f 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -265,7 +265,7 @@
 #define PM_LEVEL_INDEX(x, a)   (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL)
 #define PM_LEVEL_ENC(x)(((x) << 9) & 0xe00ULL)
 #define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \
-IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW)
+IOMMU_PTE_PR | IOMMU_PTE_IR | IOMMU_PTE_IW)
 #define PM_PTE_LEVEL(pte)  (((pte) >> 9) & 0x7ULL)
 
 #define PM_MAP_4k  0
@@ -314,13 +314,23 @@
 #define PTE_LEVEL_PAGE_SIZE(level) \
(1ULL << (12 + (9 * (level
 
-#define IOMMU_PTE_P  (1ULL << 0)
-#define IOMMU_PTE_TV (1ULL << 1)
+/*
+ * Bit value definition for I/O PTE fields
+ */
+#define IOMMU_PTE_PR (1ULL << 0)
 #define IOMMU_PTE_U  (1ULL << 59)
 #define IOMMU_PTE_FC (1ULL << 60)
 #define IOMMU_PTE_IR (1ULL << 61)
 #define IOMMU_PTE_IW (1ULL << 62)
 
+/*
+ * Bit value definition for DTE fields
+ */
+#define DTE_FLAG_V  (1ULL << 0)
+#define DTE_FLAG_TV (1ULL << 1)
+#define DTE_FLAG_IR (1ULL << 61)
+#define DTE_FLAG_IW (1ULL << 62)
+
 #define DTE_FLAG_IOTLB (1ULL << 32)
 #define DTE_FLAG_GV(1ULL << 55)
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
@@ -342,7 +352,7 @@
 #define GCR3_VALID 0x01ULL
 
 #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
-#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P)
+#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
 #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK))
 #define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
 
-- 
2.5.5

[PATCH v7 04/12] iommu/amd: Add function copy_dev_tables

2016-11-24 Thread Baoquan He

Add function copy_dev_tables to copy the old DEV table entries of the panicked
kernel to the new allocated DEV table. Since all iommus share the same DTE table
the copy only need be done once as long as the physical address of old DEV table
is retrieved from iommu reg. Besides, we also need to:

  - Check whether all IOMMUs actually use the same device table with the same 
size

  - Verify that the size of the old device table is the expected size.

  - Reserve the old domain id occupied in 1st kernel to avoid touching the old
io-page tables. Then on-flight DMA can continue looking it up.

And define MACRO DEV_DOMID_MASK to replace magic number 0xULL because
it need be reused in copy_dev_tables.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c   |  2 +-
 drivers/iommu/amd_iommu_init.c  | 55 +
 drivers/iommu/amd_iommu_types.h |  1 +
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 0b0e50e..d5aef72 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1838,7 +1838,7 @@ static void set_dte_entry(u16 devid, struct 
protection_domain *domain, bool ats)
flags|= tmp;
}
 
-   flags &= ~(0xUL);
+   flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
 
amd_iommu_dev_table[devid].data[1]  = flags;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 9458f7c..8fc9840 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -834,6 +834,61 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
 }
 
 
+static int copy_dev_tables(void)
+{
+   struct dev_table_entry *old_devtb = NULL;
+   u32 lo, hi, devid, old_devtb_size;
+   phys_addr_t old_devtb_phys;
+   u64 entry, last_entry = 0;
+   struct amd_iommu *iommu;
+   u16 dom_id, dte_v;
+   static int copied;
+
+   for_each_iommu(iommu) {
+   if (!translation_pre_enabled(iommu)) {
+   pr_err("IOMMU:%d is not pre-enabled!/n",
+   iommu->index);
+   return -1;
+   }
+
+   /* All IOMMUs should use the same device table with the same 
size */
+   lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
+   hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
+   entry = (((u64) hi) << 32) + lo;
+   if (last_entry && last_entry != entry) {
+   pr_err("IOMMU:%d should use the same dev table as 
others!/n",
+   iommu->index);
+   return -1;
+   }
+   last_entry = entry;
+
+   old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
+   if (old_devtb_size != dev_table_size) {
+   pr_err("The device table size of IOMMU:%d is not 
expected!/n",
+   iommu->index);
+   return -1;
+   }
+
+   old_devtb_phys = entry & PAGE_MASK;
+   old_devtb = memremap(old_devtb_phys, dev_table_size, 
MEMREMAP_WB);
+   if (!old_devtb)
+   return -1;
+
+   if (copied)
+   continue;
+   for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+   amd_iommu_dev_table[devid] = old_devtb[devid];
+   dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
+   dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
+   if (dte_v && dom_id)
+   __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   }
+   memunmap(old_devtb);
+   copied = 1;
+   }
+   return 0;
+}
+
 void amd_iommu_apply_erratum_63(u16 devid)
 {
int sysmgt;
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 6a4378f..79ec841 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -336,6 +336,7 @@
 #define DTE_FLAG_MASK  (0x3ffULL << 32)
 #define DTE_GLX_SHIFT  (56)
 #define DTE_GLX_MASK   (3)
+#define DEV_DOMID_MASK 0xULL
 
 #define DTE_GCR3_VAL_A(x)  (((x) >> 12) & 0x7ULL)
 #define DTE_GCR3_VAL_B(x)  (((x) >> 15) & 0x0ULL)
-- 
2.5.5

[PATCH v7 01/12] iommu/amd: Detect pre enabled translation

2016-11-24 Thread Baoquan He

Add functions to check whether translation is already enabled in IOMMU.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c  | 25 +
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h |  4 
 3 files changed, 30 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 157e934..5ad1e023 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -251,6 +251,26 @@ static int amd_iommu_enable_interrupts(void);
 static int __init iommu_go_to_state(enum iommu_init_state state);
 static void init_device_table_dma(void);
 
+
+bool translation_pre_enabled(struct amd_iommu *iommu)
+{
+   return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
+}
+
+static void clear_translation_pre_enabled(struct amd_iommu *iommu)
+{
+   iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
+static void init_translation_status(struct amd_iommu *iommu)
+{
+   u32 ctrl;
+
+   ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+   if (ctrl & (1int_enabled = false;
 
+   init_translation_status(iommu);
+
+   if (translation_pre_enabled(iommu))
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+
ret = init_iommu_from_acpi(iommu, h);
if (ret)
return ret;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 7eb60c1..9560183 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -93,4 +93,5 @@ static inline bool iommu_feature(struct amd_iommu *iommu, u64 
f)
return !!(iommu->features & f);
 }
 
+extern bool translation_pre_enabled(struct amd_iommu *iommu);
 #endif /* _ASM_X86_AMD_IOMMU_PROTO_H  */
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 0d91785..2bbc19d 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -416,6 +416,7 @@ extern struct kmem_cache *amd_iommu_irq_cache;
 #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
 
 
+
 /*
  * This struct is used to pass information about
  * incoming PPR faults around.
@@ -434,6 +435,8 @@ struct iommu_domain;
 struct irq_domain;
 struct amd_irte_ops;
 
+#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED  (1 << 0)
+
 /*
  * This structure contains generic data for  IOMMU protection domains
  * independent of their use.
@@ -566,6 +569,7 @@ struct amd_iommu {
struct amd_irte_ops *irte_ops;
 #endif
 
+   u32 flags;
volatile u64 __aligned(8) cmd_sem;
 };
 
-- 
2.5.5

[PATCH v7 12/12] iommu/amd: Allocate memory below 4G for dev table if translation pre-enabled

2016-11-24 Thread Baoquan He

AMD pointed out it's unsafe to update the device-table while iommu
is enabled. It turns out that device-table pointer update is split
up into two 32bit writes in the IOMMU hardware. So updating it while
the IOMMU is enabled could have some nasty side effects.

The only way to work around this is to allocate the device-table below
4GB if translation is pre-enabled in kdump kernel. If allocation failed,
still use the old one.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index d362b63..f17f297 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2149,11 +2149,23 @@ static void early_enable_iommu(struct amd_iommu *iommu)
  */
 static void early_enable_iommus(void)
 {
+   struct dev_table_entry *dev_tbl;
struct amd_iommu *iommu;
bool is_pre_enabled = false;
 
for_each_iommu(iommu) {
if (translation_pre_enabled(iommu)) {
+   gfp_t gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;;
+
+   dev_tbl = (void *)__get_free_pages(gfp_flag,
+   get_order(dev_table_size));
+   if (dev_tbl != NULL) {
+   memcpy(dev_tbl, amd_iommu_dev_table, 
dev_table_size);
+   free_pages((unsigned long)amd_iommu_dev_table,
+   get_order(dev_table_size));
+   amd_iommu_dev_table = dev_tbl;
+   }
+
is_pre_enabled = true;
break;
}
-- 
2.5.5

[PATCH v7 00/12] Fix kdump faults on system with amd iommu

2016-11-24 Thread Baoquan He

This is v7 post.

The principle of the fix is similar to intel iommu. Just defer the assignment
of device to domain to device driver init. In this version of post, a new
call-back is_attach_deferred is added to iommu-ops, it's used to check whether
we need defer the domain attach/detach in iommu-core code.

v5:
bnx2 NIC can't reset itself during driver init. Post patch to reset
it during driver init. IO_PAGE_FAULT can't be seen anymore.

Below is link of v5 post.
https://lists.linuxfoundation.org/pipermail/iommu/2016-September/018527.html

v5->v6:
According to Joerg's comments made several below main changes:
- Add sanity check when copy old dev tables.

- If a device is set up with guest translations (DTE.GV=1), then don't
  copy that information but move the device over to an empty guest-cr3
  table and handle the faults in the PPR log (which just answer them
  with INVALID).

v6->v7:
Two main changes are made according to Joerg's suggestion:
- Add is_attach_deferred call-back to iommu-ops. With this domain
  can be deferred to device driver init cleanly.

- Allocate memory below 4G for dev table if translation pre-enabled.
  AMD engineer pointed out that it's unsafe to update the device-table
  while iommu is enabled. device-table pointer update is split up into
  two 32bit writes in the IOMMU hardware. So updating it while the IOMMU
  is enabled could have some nasty side effects.

Baoquan He (12):
  iommu/amd: Detect pre enabled translation
  iommu/amd: add several helper function
  iommu/amd: Define bit fields for DTE particularly
  iommu/amd: Add function copy_dev_tables
  iommu/amd: copy old trans table from old kernel
  iommu: Add is_attach_deferred call-back to iommu-ops
  iommu/amd: Use is_attach_deferred call-back
  iommu/amd: Add sanity check of irq remap information of old dev table
entry
  iommu/amd: Don't copy GCR3 table root pointer
  iommu/amd: Clear out the GV flag when handle deferred domain attach
  iommu: Assign the direct mapped domain to group->domain
  iommu/amd: Allocate memory below 4G for dev table if translation
pre-enabled

 drivers/iommu/amd_iommu.c   |  78 +---
 drivers/iommu/amd_iommu_init.c  | 201 +---
 drivers/iommu/amd_iommu_proto.h |   2 +
 drivers/iommu/amd_iommu_types.h |  53 ++-
 drivers/iommu/amd_iommu_v2.c|  18 +++-
 drivers/iommu/iommu.c   |   9 ++
 include/linux/iommu.h   |   1 +
 7 files changed, 313 insertions(+), 49 deletions(-)

-- 
2.5.5

[PATCH v7 11/12] iommu: Assign the direct mapped domain to group->domain

2016-11-24 Thread Baoquan He

In iommu_request_dm_for_dev(), devices of group have all been attached
to newly created direct mapped domain. We should store the domain into
group->domain so that it works for iommu_get_domain_for_dev() and
get_domain().

Signed-off-by: Baoquan He 
---
 drivers/iommu/iommu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0262eee..a2d1a8f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1612,6 +1612,7 @@ int iommu_request_dm_for_dev(struct device *dev)
if (group->default_domain)
iommu_domain_free(group->default_domain);
group->default_domain = dm_domain;
+   group->domain = dm_domain;
 
pr_info("Using direct mapping for device %s\n", dev_name(dev));
 
-- 
2.5.5

[PATCH v7 09/12] iommu/amd: Don't copy GCR3 table root pointer

2016-11-24 Thread Baoquan He

When in kdump kernel iommu is pre_enabled, if a device is set up with
guest translations (DTE.GV=1), then don't copy GCR3 table root pointer
but move the device over to an empty guest-cr3 table and handle the
faults in the PPR log (which answer them with INVALID). After all these
PPR faults are recoverable for the device and we should not allow the
device to change old-kernels data when we don't have to.

Signed-off-by: Baoquan He 
Suggested-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c   | 26 +++---
 drivers/iommu/amd_iommu_init.c  | 11 +++
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h | 22 ++
 drivers/iommu/amd_iommu_v2.c| 18 +-
 5 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index cea90d5..22520f6 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -120,28 +120,6 @@ int amd_iommu_max_glx_val = -1;
 static struct dma_map_ops amd_iommu_dma_ops;
 
 /*
- * This struct contains device specific data for the IOMMU
- */
-struct iommu_dev_data {
-   struct list_head list;/* For domain->dev_list */
-   struct list_head dev_data_list;   /* For global dev_data_list */
-   struct protection_domain *domain; /* Domain the device is bound to */
-   u16 devid;/* PCI Device ID */
-   u16 alias;/* Alias Device ID */
-   bool iommu_v2;/* Device can make use of IOMMUv2 */
-   bool passthrough; /* Device is identity mapped */
-   struct {
-   bool enabled;
-   int qdep;
-   } ats;/* ATS state */
-   bool pri_tlp; /* PASID TLB required for
-PPR completions */
-   u32 errata;   /* Bitmap for errata to apply */
-   bool use_vapic;   /* Enable device to use vapic mode */
-   bool defer_attach;
-};
-
-/*
  * general struct to manage commands send to an IOMMU
  */
 struct iommu_cmd {
@@ -355,10 +333,11 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
return dev_data;
 }
 
-static struct iommu_dev_data *get_dev_data(struct device *dev)
+struct iommu_dev_data *get_dev_data(struct device *dev)
 {
return dev->archdata.iommu;
 }
+EXPORT_SYMBOL(get_dev_data);
 
 /*
 * Find or create an IOMMU group for a acpihid device.
@@ -2378,6 +2357,7 @@ static int dir2prot(enum dma_data_direction direction)
else
return 0;
 }
+
 /*
  * This function contains common code for mapping of a physically
  * contiguous memory region into DMA address space. It is used by all
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 4427c63..d362b63 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -204,6 +204,7 @@ u16 *amd_iommu_alias_table;
  * for a specific device. It is also indexed by the PCI device id.
  */
 struct amd_iommu **amd_iommu_rlookup_table;
+EXPORT_SYMBOL(amd_iommu_rlookup_table);
 
 /*
  * This table is used to find the irq remapping table for a given device id
@@ -257,6 +258,7 @@ bool translation_pre_enabled(struct amd_iommu *iommu)
 {
return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 }
+EXPORT_SYMBOL(translation_pre_enabled);
 
 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 {
@@ -844,6 +846,7 @@ static int copy_dev_tables(void)
struct amd_iommu *iommu;
u16 dom_id, dte_v, irq_v;
static int copied;
+   u64 tmp;
 
for_each_iommu(iommu) {
if (!translation_pre_enabled(iommu)) {
@@ -887,6 +890,14 @@ static int copy_dev_tables(void)
amd_iommu_dev_table[devid].data[1]
= old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   /* If gcr3 table existed, mask it out */
+   if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
+   tmp = DTE_GCR3_VAL_B(~0ULL) << 
DTE_GCR3_SHIFT_B;
+   tmp |= DTE_GCR3_VAL_C(~0ULL) << 
DTE_GCR3_SHIFT_C;
+   amd_iommu_dev_table[devid].data[1] &= 
~tmp;
+   tmp = DTE_GCR3_VAL_A(~0ULL) << 
DTE_GCR3_SHIFT_A;
+   amd_iommu_dev_table[devid].data[0] &= 
~tmp;
+   }
}
 
irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 9560183..d6a2c36 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++

[PATCH v7 05/12] iommu/amd: copy old trans table from old kernel

2016-11-24 Thread Baoquan He

Here several things need be done:
- If iommu is pre-enabled in a normal kernel, just disable it and print
  warning.

- If failed to copy dev table of old kernel, continue to proceed as
  it does in normal kernel.

- Disable and Re-enable event/cmd buffer,  install the copied DTE table
  to reg, and detect and enable guest vapic.

- Flush all caches

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 51 --
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 8fc9840..4233f26 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 
+#include 
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
@@ -1481,9 +1482,12 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
iommu->int_enabled = false;
 
init_translation_status(iommu);
-
-   if (translation_pre_enabled(iommu))
-   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+   if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
+   iommu_disable(iommu);
+   clear_translation_pre_enabled(iommu);
+   pr_warn("Translation was enabled for IOMMU:%d but we are not in 
kdump mode\n",
+   iommu->index);
+   }
 
ret = init_iommu_from_acpi(iommu, h);
if (ret)
@@ -1975,8 +1979,7 @@ static int __init init_memory_definitions(struct 
acpi_table_header *table)
 }
 
 /*
- * Init the device table to not allow DMA access for devices and
- * suppress all page faults
+ * Init the device table to not allow DMA access for devices
  */
 static void init_device_table_dma(void)
 {
@@ -2117,9 +2120,43 @@ static void early_enable_iommu(struct amd_iommu *iommu)
 static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
+   bool is_pre_enabled = false;
 
-   for_each_iommu(iommu)
-   early_enable_iommu(iommu);
+   for_each_iommu(iommu) {
+   if (translation_pre_enabled(iommu)) {
+   is_pre_enabled = true;
+   break;
+   }
+   }
+
+   if (!is_pre_enabled) {
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
+   } else {
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+   if (copy_dev_tables()) {
+   pr_err("Failed to copy DEV table from previous 
kernel.\n");
+   /*
+* If failed to copy dev tables from old kernel, 
continue to proceed
+* as it does in normal kernel.
+*/
+   for_each_iommu(iommu) {
+   clear_translation_pre_enabled(iommu);
+   early_enable_iommu(iommu);
+   }
+   } else {
+   pr_info("Copied DEV table from previous kernel.\n");
+   for_each_iommu(iommu) {
+   iommu_disable_command_buffer(iommu);
+   iommu_disable_event_buffer(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_enable_ga(iommu);
+   iommu_set_device_table(iommu);
+   iommu_flush_all_caches(iommu);
+   }
+   }
+   }
 
 #ifdef CONFIG_IRQ_REMAP
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
-- 
2.5.5

[PATCH v7 11/12] iommu: Assign the direct mapped domain to group->domain

2016-11-24 Thread Baoquan He

In iommu_request_dm_for_dev(), devices of group have all been attached
to newly created direct mapped domain. We should store the domain into
group->domain so that it works for iommu_get_domain_for_dev() and
get_domain().

Signed-off-by: Baoquan He 
---
 drivers/iommu/iommu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0262eee..a2d1a8f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1612,6 +1612,7 @@ int iommu_request_dm_for_dev(struct device *dev)
if (group->default_domain)
iommu_domain_free(group->default_domain);
group->default_domain = dm_domain;
+   group->domain = dm_domain;
 
pr_info("Using direct mapping for device %s\n", dev_name(dev));
 
-- 
2.5.5

[PATCH v7 09/12] iommu/amd: Don't copy GCR3 table root pointer

2016-11-24 Thread Baoquan He

When in kdump kernel iommu is pre_enabled, if a device is set up with
guest translations (DTE.GV=1), then don't copy GCR3 table root pointer
but move the device over to an empty guest-cr3 table and handle the
faults in the PPR log (which answer them with INVALID). After all these
PPR faults are recoverable for the device and we should not allow the
device to change old-kernels data when we don't have to.

Signed-off-by: Baoquan He 
Suggested-by: Joerg Roedel 
---
 drivers/iommu/amd_iommu.c   | 26 +++---
 drivers/iommu/amd_iommu_init.c  | 11 +++
 drivers/iommu/amd_iommu_proto.h |  1 +
 drivers/iommu/amd_iommu_types.h | 22 ++
 drivers/iommu/amd_iommu_v2.c| 18 +-
 5 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index cea90d5..22520f6 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -120,28 +120,6 @@ int amd_iommu_max_glx_val = -1;
 static struct dma_map_ops amd_iommu_dma_ops;
 
 /*
- * This struct contains device specific data for the IOMMU
- */
-struct iommu_dev_data {
-   struct list_head list;/* For domain->dev_list */
-   struct list_head dev_data_list;   /* For global dev_data_list */
-   struct protection_domain *domain; /* Domain the device is bound to */
-   u16 devid;/* PCI Device ID */
-   u16 alias;/* Alias Device ID */
-   bool iommu_v2;/* Device can make use of IOMMUv2 */
-   bool passthrough; /* Device is identity mapped */
-   struct {
-   bool enabled;
-   int qdep;
-   } ats;/* ATS state */
-   bool pri_tlp; /* PASID TLB required for
-PPR completions */
-   u32 errata;   /* Bitmap for errata to apply */
-   bool use_vapic;   /* Enable device to use vapic mode */
-   bool defer_attach;
-};
-
-/*
  * general struct to manage commands send to an IOMMU
  */
 struct iommu_cmd {
@@ -355,10 +333,11 @@ static struct iommu_dev_data *find_dev_data(u16 devid)
return dev_data;
 }
 
-static struct iommu_dev_data *get_dev_data(struct device *dev)
+struct iommu_dev_data *get_dev_data(struct device *dev)
 {
return dev->archdata.iommu;
 }
+EXPORT_SYMBOL(get_dev_data);
 
 /*
 * Find or create an IOMMU group for a acpihid device.
@@ -2378,6 +2357,7 @@ static int dir2prot(enum dma_data_direction direction)
else
return 0;
 }
+
 /*
  * This function contains common code for mapping of a physically
  * contiguous memory region into DMA address space. It is used by all
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 4427c63..d362b63 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -204,6 +204,7 @@ u16 *amd_iommu_alias_table;
  * for a specific device. It is also indexed by the PCI device id.
  */
 struct amd_iommu **amd_iommu_rlookup_table;
+EXPORT_SYMBOL(amd_iommu_rlookup_table);
 
 /*
  * This table is used to find the irq remapping table for a given device id
@@ -257,6 +258,7 @@ bool translation_pre_enabled(struct amd_iommu *iommu)
 {
return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 }
+EXPORT_SYMBOL(translation_pre_enabled);
 
 static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 {
@@ -844,6 +846,7 @@ static int copy_dev_tables(void)
struct amd_iommu *iommu;
u16 dom_id, dte_v, irq_v;
static int copied;
+   u64 tmp;
 
for_each_iommu(iommu) {
if (!translation_pre_enabled(iommu)) {
@@ -887,6 +890,14 @@ static int copy_dev_tables(void)
amd_iommu_dev_table[devid].data[1]
= old_devtb[devid].data[1];
__set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+   /* If gcr3 table existed, mask it out */
+   if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
+   tmp = DTE_GCR3_VAL_B(~0ULL) << 
DTE_GCR3_SHIFT_B;
+   tmp |= DTE_GCR3_VAL_C(~0ULL) << 
DTE_GCR3_SHIFT_C;
+   amd_iommu_dev_table[devid].data[1] &= 
~tmp;
+   tmp = DTE_GCR3_VAL_A(~0ULL) << 
DTE_GCR3_SHIFT_A;
+   amd_iommu_dev_table[devid].data[0] &= 
~tmp;
+   }
}
 
irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 9560183..d6a2c36 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -94,4 +94,5 @@

[PATCH v7 05/12] iommu/amd: copy old trans table from old kernel

2016-11-24 Thread Baoquan He

Here several things need be done:
- If iommu is pre-enabled in a normal kernel, just disable it and print
  warning.

- If failed to copy dev table of old kernel, continue to proceed as
  it does in normal kernel.

- Disable and Re-enable event/cmd buffer,  install the copied DTE table
  to reg, and detect and enable guest vapic.

- Flush all caches

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 51 --
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 8fc9840..4233f26 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 
+#include 
 #include "amd_iommu_proto.h"
 #include "amd_iommu_types.h"
 #include "irq_remapping.h"
@@ -1481,9 +1482,12 @@ static int __init init_iommu_one(struct amd_iommu 
*iommu, struct ivhd_header *h)
iommu->int_enabled = false;
 
init_translation_status(iommu);
-
-   if (translation_pre_enabled(iommu))
-   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+   if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
+   iommu_disable(iommu);
+   clear_translation_pre_enabled(iommu);
+   pr_warn("Translation was enabled for IOMMU:%d but we are not in 
kdump mode\n",
+   iommu->index);
+   }
 
ret = init_iommu_from_acpi(iommu, h);
if (ret)
@@ -1975,8 +1979,7 @@ static int __init init_memory_definitions(struct 
acpi_table_header *table)
 }
 
 /*
- * Init the device table to not allow DMA access for devices and
- * suppress all page faults
+ * Init the device table to not allow DMA access for devices
  */
 static void init_device_table_dma(void)
 {
@@ -2117,9 +2120,43 @@ static void early_enable_iommu(struct amd_iommu *iommu)
 static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
+   bool is_pre_enabled = false;
 
-   for_each_iommu(iommu)
-   early_enable_iommu(iommu);
+   for_each_iommu(iommu) {
+   if (translation_pre_enabled(iommu)) {
+   is_pre_enabled = true;
+   break;
+   }
+   }
+
+   if (!is_pre_enabled) {
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
+   } else {
+   pr_warn("Translation is already enabled - trying to copy 
translation structures\n");
+   if (copy_dev_tables()) {
+   pr_err("Failed to copy DEV table from previous 
kernel.\n");
+   /*
+* If failed to copy dev tables from old kernel, 
continue to proceed
+* as it does in normal kernel.
+*/
+   for_each_iommu(iommu) {
+   clear_translation_pre_enabled(iommu);
+   early_enable_iommu(iommu);
+   }
+   } else {
+   pr_info("Copied DEV table from previous kernel.\n");
+   for_each_iommu(iommu) {
+   iommu_disable_command_buffer(iommu);
+   iommu_disable_event_buffer(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_enable_ga(iommu);
+   iommu_set_device_table(iommu);
+   iommu_flush_all_caches(iommu);
+   }
+   }
+   }
 
 #ifdef CONFIG_IRQ_REMAP
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
-- 
2.5.5

[PATCH v7 12/12] iommu/amd: Allocate memory below 4G for dev table if translation pre-enabled

2016-11-24 Thread Baoquan He

AMD pointed out it's unsafe to update the device-table while iommu
is enabled. It turns out that device-table pointer update is split
up into two 32bit writes in the IOMMU hardware. So updating it while
the IOMMU is enabled could have some nasty side effects.

The only way to work around this is to allocate the device-table below
4GB if translation is pre-enabled in kdump kernel. If allocation failed,
still use the old one.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index d362b63..f17f297 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2149,11 +2149,23 @@ static void early_enable_iommu(struct amd_iommu *iommu)
  */
 static void early_enable_iommus(void)
 {
+   struct dev_table_entry *dev_tbl;
struct amd_iommu *iommu;
bool is_pre_enabled = false;
 
for_each_iommu(iommu) {
if (translation_pre_enabled(iommu)) {
+   gfp_t gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;;
+
+   dev_tbl = (void *)__get_free_pages(gfp_flag,
+   get_order(dev_table_size));
+   if (dev_tbl != NULL) {
+   memcpy(dev_tbl, amd_iommu_dev_table, 
dev_table_size);
+   free_pages((unsigned long)amd_iommu_dev_table,
+   get_order(dev_table_size));
+   amd_iommu_dev_table = dev_tbl;
+   }
+
is_pre_enabled = true;
break;
}
-- 
2.5.5

[PATCH v7 00/12] Fix kdump faults on system with amd iommu

2016-11-24 Thread Baoquan He

This is v7 post.

The principle of the fix is similar to intel iommu. Just defer the assignment
of device to domain to device driver init. In this version of post, a new
call-back is_attach_deferred is added to iommu-ops, it's used to check whether
we need defer the domain attach/detach in iommu-core code.

v5:
bnx2 NIC can't reset itself during driver init. Post patch to reset
it during driver init. IO_PAGE_FAULT can't be seen anymore.

Below is link of v5 post.
https://lists.linuxfoundation.org/pipermail/iommu/2016-September/018527.html

v5->v6:
According to Joerg's comments made several below main changes:
- Add sanity check when copy old dev tables.

- If a device is set up with guest translations (DTE.GV=1), then don't
  copy that information but move the device over to an empty guest-cr3
  table and handle the faults in the PPR log (which just answer them
  with INVALID).

v6->v7:
Two main changes are made according to Joerg's suggestion:
- Add is_attach_deferred call-back to iommu-ops. With this domain
  can be deferred to device driver init cleanly.

- Allocate memory below 4G for dev table if translation pre-enabled.
  AMD engineer pointed out that it's unsafe to update the device-table
  while iommu is enabled. device-table pointer update is split up into
  two 32bit writes in the IOMMU hardware. So updating it while the IOMMU
  is enabled could have some nasty side effects.

Baoquan He (12):
  iommu/amd: Detect pre enabled translation
  iommu/amd: add several helper function
  iommu/amd: Define bit fields for DTE particularly
  iommu/amd: Add function copy_dev_tables
  iommu/amd: copy old trans table from old kernel
  iommu: Add is_attach_deferred call-back to iommu-ops
  iommu/amd: Use is_attach_deferred call-back
  iommu/amd: Add sanity check of irq remap information of old dev table
entry
  iommu/amd: Don't copy GCR3 table root pointer
  iommu/amd: Clear out the GV flag when handle deferred domain attach
  iommu: Assign the direct mapped domain to group->domain
  iommu/amd: Allocate memory below 4G for dev table if translation
pre-enabled

 drivers/iommu/amd_iommu.c   |  78 +---
 drivers/iommu/amd_iommu_init.c  | 201 +---
 drivers/iommu/amd_iommu_proto.h |   2 +
 drivers/iommu/amd_iommu_types.h |  53 ++-
 drivers/iommu/amd_iommu_v2.c|  18 +++-
 drivers/iommu/iommu.c   |   9 ++
 include/linux/iommu.h   |   1 +
 7 files changed, 313 insertions(+), 49 deletions(-)

-- 
2.5.5

[PATCH v7 10/12] iommu/amd: Clear out the GV flag when handle deferred domain attach

2016-11-24 Thread Baoquan He

When handle deferred domain attach, we need check if the domain is
v2. If not, should try to clear out the GV flag which could be
copied from the old device table entry.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 22520f6..3a8e4ae 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1839,6 +1839,11 @@ static void clear_dte_entry(u16 devid)
amd_iommu_apply_erratum_63(devid);
 }
 
+static void clear_dte_flag_gv(u16 devid)
+{
+   amd_iommu_dev_table[devid].data[0] &= (~DTE_FLAG_GV);
+}
+
 static void do_attach(struct iommu_dev_data *dev_data,
  struct protection_domain *domain)
 {
@@ -2299,6 +2304,7 @@ static void queue_add(struct dma_ops_domain *dma_dom,
  */
 static struct protection_domain *get_domain(struct device *dev)
 {
+   struct iommu_dev_data *dev_data = get_dev_data(dev);
struct protection_domain *domain;
struct iommu_domain *io_domain;
 
@@ -2306,11 +2312,21 @@ static struct protection_domain *get_domain(struct 
device *dev)
return ERR_PTR(-EINVAL);
 
domain = get_dev_data(dev)->domain;
-   if (domain == NULL && get_dev_data(dev)->defer_attach) {
+   if (domain == NULL && dev_data->defer_attach) {
+   u16 alias = amd_iommu_alias_table[dev_data->devid];
get_dev_data(dev)->defer_attach = false;
io_domain = iommu_get_domain_for_dev(dev);
domain = to_pdomain(io_domain);
attach_device(dev, domain);
+   /*
+* If the deferred attached domain is not v2, should clear out
+* the old GV flag.
+*/
+   if (!(domain->flags & PD_IOMMUV2_MASK)) {
+   clear_dte_flag_gv(dev_data->devid);
+   if (alias != dev_data->devid)
+   clear_dte_flag_gv(dev_data->devid);
+   }
}
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
-- 
2.5.5

[PATCH v7 06/12] iommu: Add is_attach_deferred call-back to iommu-ops

2016-11-24 Thread Baoquan He

This new call-back will be used to check if the domain attach need be
deferred for now. If yes, the domain attach/detach will return directly.

Signed-off-by: Baoquan He 
---
 drivers/iommu/iommu.c | 8 
 include/linux/iommu.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9a2f196..0262eee 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1083,6 +1083,10 @@ static int __iommu_attach_device(struct iommu_domain 
*domain,
 struct device *dev)
 {
int ret;
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return 0;
+
if (unlikely(domain->ops->attach_dev == NULL))
return -ENODEV;
 
@@ -1124,6 +1128,10 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
 static void __iommu_detach_device(struct iommu_domain *domain,
  struct device *dev)
 {
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return;
+
if (unlikely(domain->ops->detach_dev == NULL))
return;
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 436dc21..e179313 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -200,6 +200,7 @@ struct iommu_ops {
u32 (*domain_get_windows)(struct iommu_domain *domain);
 
int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
+   bool (*is_attach_deferred)(struct iommu_domain *domain, struct device 
*dev);
 
unsigned long pgsize_bitmap;
 };
-- 
2.5.5

[PATCH v7 10/12] iommu/amd: Clear out the GV flag when handle deferred domain attach

2016-11-24 Thread Baoquan He

When handle deferred domain attach, we need check if the domain is
v2. If not, should try to clear out the GV flag which could be
copied from the old device table entry.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 22520f6..3a8e4ae 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -1839,6 +1839,11 @@ static void clear_dte_entry(u16 devid)
amd_iommu_apply_erratum_63(devid);
 }
 
+static void clear_dte_flag_gv(u16 devid)
+{
+   amd_iommu_dev_table[devid].data[0] &= (~DTE_FLAG_GV);
+}
+
 static void do_attach(struct iommu_dev_data *dev_data,
  struct protection_domain *domain)
 {
@@ -2299,6 +2304,7 @@ static void queue_add(struct dma_ops_domain *dma_dom,
  */
 static struct protection_domain *get_domain(struct device *dev)
 {
+   struct iommu_dev_data *dev_data = get_dev_data(dev);
struct protection_domain *domain;
struct iommu_domain *io_domain;
 
@@ -2306,11 +2312,21 @@ static struct protection_domain *get_domain(struct 
device *dev)
return ERR_PTR(-EINVAL);
 
domain = get_dev_data(dev)->domain;
-   if (domain == NULL && get_dev_data(dev)->defer_attach) {
+   if (domain == NULL && dev_data->defer_attach) {
+   u16 alias = amd_iommu_alias_table[dev_data->devid];
get_dev_data(dev)->defer_attach = false;
io_domain = iommu_get_domain_for_dev(dev);
domain = to_pdomain(io_domain);
attach_device(dev, domain);
+   /*
+* If the deferred attached domain is not v2, should clear out
+* the old GV flag.
+*/
+   if (!(domain->flags & PD_IOMMUV2_MASK)) {
+   clear_dte_flag_gv(dev_data->devid);
+   if (alias != dev_data->devid)
+   clear_dte_flag_gv(dev_data->devid);
+   }
}
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
-- 
2.5.5

[PATCH v7 06/12] iommu: Add is_attach_deferred call-back to iommu-ops

2016-11-24 Thread Baoquan He

This new call-back will be used to check if the domain attach need be
deferred for now. If yes, the domain attach/detach will return directly.

Signed-off-by: Baoquan He 
---
 drivers/iommu/iommu.c | 8 
 include/linux/iommu.h | 1 +
 2 files changed, 9 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9a2f196..0262eee 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1083,6 +1083,10 @@ static int __iommu_attach_device(struct iommu_domain 
*domain,
 struct device *dev)
 {
int ret;
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return 0;
+
if (unlikely(domain->ops->attach_dev == NULL))
return -ENODEV;
 
@@ -1124,6 +1128,10 @@ EXPORT_SYMBOL_GPL(iommu_attach_device);
 static void __iommu_detach_device(struct iommu_domain *domain,
  struct device *dev)
 {
+   if ((domain->ops->is_attach_deferred != NULL) &&
+   domain->ops->is_attach_deferred(domain, dev))
+   return;
+
if (unlikely(domain->ops->detach_dev == NULL))
return;
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 436dc21..e179313 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -200,6 +200,7 @@ struct iommu_ops {
u32 (*domain_get_windows)(struct iommu_domain *domain);
 
int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
+   bool (*is_attach_deferred)(struct iommu_domain *domain, struct device 
*dev);
 
unsigned long pgsize_bitmap;
 };
-- 
2.5.5

[PATCH v7 02/12] iommu/amd: add several helper function

2016-11-24 Thread Baoquan He

Move per iommu enabling code into a wrapper function early_enable_iommu().
This can make later kdump change easier.

And also add iommu_disable_command_buffer and iommu_disable_event_buffer
for later usage.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 42 +++---
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5ad1e023..9458f7c 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -625,6 +625,14 @@ static void iommu_enable_command_buffer(struct amd_iommu 
*iommu)
amd_iommu_reset_cmd_buffer(iommu);
 }
 
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_command_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+}
+
 static void __init free_command_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
@@ -657,6 +665,14 @@ static void iommu_enable_event_buffer(struct amd_iommu 
*iommu)
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 }
 
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_event_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+}
+
 static void __init free_event_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
@@ -2026,6 +2042,19 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
 #endif
 }
 
+static void early_enable_iommu(struct amd_iommu *iommu)
+{
+   iommu_disable(iommu);
+   iommu_init_flags(iommu);
+   iommu_set_device_table(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_set_exclusion_range(iommu);
+   iommu_enable_ga(iommu);
+   iommu_enable(iommu);
+   iommu_flush_all_caches(iommu);
+}
+
 /*
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
@@ -2034,17 +2063,8 @@ static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
 
-   for_each_iommu(iommu) {
-   iommu_disable(iommu);
-   iommu_init_flags(iommu);
-   iommu_set_device_table(iommu);
-   iommu_enable_command_buffer(iommu);
-   iommu_enable_event_buffer(iommu);
-   iommu_set_exclusion_range(iommu);
-   iommu_enable_ga(iommu);
-   iommu_enable(iommu);
-   iommu_flush_all_caches(iommu);
-   }
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
 
 #ifdef CONFIG_IRQ_REMAP
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
-- 
2.5.5

[PATCH v7 02/12] iommu/amd: add several helper function

2016-11-24 Thread Baoquan He

Move per iommu enabling code into a wrapper function early_enable_iommu().
This can make later kdump change easier.

And also add iommu_disable_command_buffer and iommu_disable_event_buffer
for later usage.

Signed-off-by: Baoquan He 
---
 drivers/iommu/amd_iommu_init.c | 42 +++---
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5ad1e023..9458f7c 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -625,6 +625,14 @@ static void iommu_enable_command_buffer(struct amd_iommu 
*iommu)
amd_iommu_reset_cmd_buffer(iommu);
 }
 
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_command_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+}
+
 static void __init free_command_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
@@ -657,6 +665,14 @@ static void iommu_enable_event_buffer(struct amd_iommu 
*iommu)
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 }
 
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_event_buffer(struct amd_iommu *iommu)
+{
+   iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+}
+
 static void __init free_event_buffer(struct amd_iommu *iommu)
 {
free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
@@ -2026,6 +2042,19 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
 #endif
 }
 
+static void early_enable_iommu(struct amd_iommu *iommu)
+{
+   iommu_disable(iommu);
+   iommu_init_flags(iommu);
+   iommu_set_device_table(iommu);
+   iommu_enable_command_buffer(iommu);
+   iommu_enable_event_buffer(iommu);
+   iommu_set_exclusion_range(iommu);
+   iommu_enable_ga(iommu);
+   iommu_enable(iommu);
+   iommu_flush_all_caches(iommu);
+}
+
 /*
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
@@ -2034,17 +2063,8 @@ static void early_enable_iommus(void)
 {
struct amd_iommu *iommu;
 
-   for_each_iommu(iommu) {
-   iommu_disable(iommu);
-   iommu_init_flags(iommu);
-   iommu_set_device_table(iommu);
-   iommu_enable_command_buffer(iommu);
-   iommu_enable_event_buffer(iommu);
-   iommu_set_exclusion_range(iommu);
-   iommu_enable_ga(iommu);
-   iommu_enable(iommu);
-   iommu_flush_all_caches(iommu);
-   }
+   for_each_iommu(iommu)
+   early_enable_iommu(iommu);
 
 #ifdef CONFIG_IRQ_REMAP
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
-- 
2.5.5

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1682 matches

Mail list logo