date:20180614

[PATCH v3 2/3] mtd: rawnand: denali_dt: use dev as a shorthand of >dev

2018-06-14 Thread Masahiro Yamada

The probe function references >dev many times.  Add 'dev' as
a shorthand.

Signed-off-by: Masahiro Yamada 
---

Changes in v3: None
Changes in v2: None

 drivers/mtd/nand/raw/denali_dt.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/mtd/nand/raw/denali_dt.c b/drivers/mtd/nand/raw/denali_dt.c
index ce6239d..afaae37 100644
--- a/drivers/mtd/nand/raw/denali_dt.c
+++ b/drivers/mtd/nand/raw/denali_dt.c
@@ -81,38 +81,39 @@ MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
 
 static int denali_dt_probe(struct platform_device *pdev)
 {
+   struct device *dev = >dev;
struct resource *res;
struct denali_dt *dt;
const struct denali_dt_data *data;
struct denali_nand_info *denali;
int ret;
 
-   dt = devm_kzalloc(>dev, sizeof(*dt), GFP_KERNEL);
+   dt = devm_kzalloc(dev, sizeof(*dt), GFP_KERNEL);
if (!dt)
return -ENOMEM;
denali = >denali;
 
-   data = of_device_get_match_data(>dev);
+   data = of_device_get_match_data(dev);
if (data) {
denali->revision = data->revision;
denali->caps = data->caps;
denali->ecc_caps = data->ecc_caps;
}
 
-   denali->dev = >dev;
+   denali->dev = dev;
denali->irq = platform_get_irq(pdev, 0);
if (denali->irq < 0) {
-   dev_err(>dev, "no irq defined\n");
+   dev_err(dev, "no irq defined\n");
return denali->irq;
}
 
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "denali_reg");
-   denali->reg = devm_ioremap_resource(>dev, res);
+   denali->reg = devm_ioremap_resource(dev, res);
if (IS_ERR(denali->reg))
return PTR_ERR(denali->reg);
 
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
-   denali->host = devm_ioremap_resource(>dev, res);
+   denali->host = devm_ioremap_resource(dev, res);
if (IS_ERR(denali->host))
return PTR_ERR(denali->host);
 
@@ -120,19 +121,19 @@ static int denali_dt_probe(struct platform_device *pdev)
 * A single anonymous clock is supported for the backward compatibility.
 * New platforms should support all the named clocks.
 */
-   dt->clk = devm_clk_get(>dev, "nand");
+   dt->clk = devm_clk_get(dev, "nand");
if (IS_ERR(dt->clk))
-   dt->clk = devm_clk_get(>dev, NULL);
+   dt->clk = devm_clk_get(dev, NULL);
if (IS_ERR(dt->clk)) {
-   dev_err(>dev, "no clk available\n");
+   dev_err(dev, "no clk available\n");
return PTR_ERR(dt->clk);
}
 
-   dt->clk_x = devm_clk_get(>dev, "nand_x");
+   dt->clk_x = devm_clk_get(dev, "nand_x");
if (IS_ERR(dt->clk_x))
dt->clk_x = NULL;
 
-   dt->clk_ecc = devm_clk_get(>dev, "ecc");
+   dt->clk_ecc = devm_clk_get(dev, "ecc");
if (IS_ERR(dt->clk_ecc))
dt->clk_ecc = NULL;
 
@@ -155,7 +156,7 @@ static int denali_dt_probe(struct platform_device *pdev)
 * Hardcode the clock rates for the backward compatibility.
 * This works for both SOCFPGA and UniPhier.
 */
-   dev_notice(>dev,
+   dev_notice(dev,
   "necessary clock is missing. default clock rates are 
used.\n");
denali->clk_x_rate = 2;
}
-- 
2.7.4

[PATCH v3 2/3] mtd: rawnand: denali_dt: use dev as a shorthand of >dev

2018-06-14 Thread Masahiro Yamada

The probe function references >dev many times.  Add 'dev' as
a shorthand.

Signed-off-by: Masahiro Yamada 
---

Changes in v3: None
Changes in v2: None

 drivers/mtd/nand/raw/denali_dt.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/mtd/nand/raw/denali_dt.c b/drivers/mtd/nand/raw/denali_dt.c
index ce6239d..afaae37 100644
--- a/drivers/mtd/nand/raw/denali_dt.c
+++ b/drivers/mtd/nand/raw/denali_dt.c
@@ -81,38 +81,39 @@ MODULE_DEVICE_TABLE(of, denali_nand_dt_ids);
 
 static int denali_dt_probe(struct platform_device *pdev)
 {
+   struct device *dev = >dev;
struct resource *res;
struct denali_dt *dt;
const struct denali_dt_data *data;
struct denali_nand_info *denali;
int ret;
 
-   dt = devm_kzalloc(>dev, sizeof(*dt), GFP_KERNEL);
+   dt = devm_kzalloc(dev, sizeof(*dt), GFP_KERNEL);
if (!dt)
return -ENOMEM;
denali = >denali;
 
-   data = of_device_get_match_data(>dev);
+   data = of_device_get_match_data(dev);
if (data) {
denali->revision = data->revision;
denali->caps = data->caps;
denali->ecc_caps = data->ecc_caps;
}
 
-   denali->dev = >dev;
+   denali->dev = dev;
denali->irq = platform_get_irq(pdev, 0);
if (denali->irq < 0) {
-   dev_err(>dev, "no irq defined\n");
+   dev_err(dev, "no irq defined\n");
return denali->irq;
}
 
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "denali_reg");
-   denali->reg = devm_ioremap_resource(>dev, res);
+   denali->reg = devm_ioremap_resource(dev, res);
if (IS_ERR(denali->reg))
return PTR_ERR(denali->reg);
 
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nand_data");
-   denali->host = devm_ioremap_resource(>dev, res);
+   denali->host = devm_ioremap_resource(dev, res);
if (IS_ERR(denali->host))
return PTR_ERR(denali->host);
 
@@ -120,19 +121,19 @@ static int denali_dt_probe(struct platform_device *pdev)
 * A single anonymous clock is supported for the backward compatibility.
 * New platforms should support all the named clocks.
 */
-   dt->clk = devm_clk_get(>dev, "nand");
+   dt->clk = devm_clk_get(dev, "nand");
if (IS_ERR(dt->clk))
-   dt->clk = devm_clk_get(>dev, NULL);
+   dt->clk = devm_clk_get(dev, NULL);
if (IS_ERR(dt->clk)) {
-   dev_err(>dev, "no clk available\n");
+   dev_err(dev, "no clk available\n");
return PTR_ERR(dt->clk);
}
 
-   dt->clk_x = devm_clk_get(>dev, "nand_x");
+   dt->clk_x = devm_clk_get(dev, "nand_x");
if (IS_ERR(dt->clk_x))
dt->clk_x = NULL;
 
-   dt->clk_ecc = devm_clk_get(>dev, "ecc");
+   dt->clk_ecc = devm_clk_get(dev, "ecc");
if (IS_ERR(dt->clk_ecc))
dt->clk_ecc = NULL;
 
@@ -155,7 +156,7 @@ static int denali_dt_probe(struct platform_device *pdev)
 * Hardcode the clock rates for the backward compatibility.
 * This works for both SOCFPGA and UniPhier.
 */
-   dev_notice(>dev,
+   dev_notice(dev,
   "necessary clock is missing. default clock rates are 
used.\n");
denali->clk_x_rate = 2;
}
-- 
2.7.4

Re: [PATCH v2] x86/e820: put !E820_TYPE_RAM regions into memblock.reserved

2018-06-14 Thread Naoya Horiguchi

On Thu, Jun 14, 2018 at 11:30:34PM +0200, Oscar Salvador wrote:
> On Thu, Jun 14, 2018 at 06:34:55AM +, Naoya Horiguchi wrote:
> > On Thu, Jun 14, 2018 at 07:38:59AM +0200, Oscar Salvador wrote:
> > > On Thu, Jun 14, 2018 at 05:16:18AM +, Naoya Horiguchi wrote:
> > ...
> > > > 
> > > > My concern is that there are a few E820 memory types rather than
> > > > E820_TYPE_RAM and E820_TYPE_RESERVED, and I'm not sure that putting them
> > > > all into memblock.reserved is really acceptable.
> > > 
> > > Hi Naoya,
> > > 
> > > Maybe you could just add to memblock.reserved, all unavailable ranges 
> > > within
> > > E820_TYPE_RAM.
> > > Actually, in your original patch, you are walking memblock.memory, which 
> > > should
> > > only contain E820_TYPE_RAM ranges (talking about x86).
> > > 
> > > So I think the below would to the trick as well?
> > > 
> > > @@ -1248,6 +1276,7 @@ void __init e820__memblock_setup(void)
> > >  {
> > > int i;
> > > u64 end;
> > > +   u64 next = 0;
> > >  
> > > /*
> > >  * The bootstrap memblock region count maximum is 128 entries
> > >  
> > > @@ -1269,6 +1299,14 @@ void __init e820__memblock_setup(void)
> > >  
> > > if (entry->type != E820_TYPE_RAM && entry->type != 
> > > E820_TYPE_RESERVED_KERN)
> > > continue;
> > >
> > > +   
> > > +   if (entry->type == E820_TYPE_RAM)
> > > +   if (next < entry->addr) {
> > > + memblock_reserve (next, next + (entry->addr - 
> > > next));
> > > + next = end;
> > > + }
> > > 
> > > With the above patch, I can no longer see the issues either.
> > 
> > I double-checked and this change looks good to me.
> > 
> > > 
> > > Although, there is a difference between this and your original patch.
> > > In your original patch, you are just zeroing the pages, while with this 
> > > one (or with your second patch),
> > > we will zero the page in reserve_bootmem_region(), but that function also 
> > > init
> > > some other fields of the struct page:
> > > 
> > > mm_zero_struct_page(page);
> > > set_page_links(page, zone, nid, pfn);
> > > init_page_count(page);
> > > page_mapcount_reset(page);
> > > page_cpupid_reset_last(page);
> > > 
> > > So I am not sure we want to bother doing that for pages that are really 
> > > unreachable.
> > 
> > I think that considering that /proc/kpageflags can check them, some data
> > (even if it's trivial) might be better than just zeros.
> > 
> > Here's the updated patch.
> > Thanks for the suggestion and testing!
> > 
> > ---
> > From: Naoya Horiguchi 
> > Date: Thu, 14 Jun 2018 14:44:36 +0900
> > Subject: [PATCH] x86/e820: put !E820_TYPE_RAM regions into memblock.reserved
> > 
> > There is a kernel panic that is triggered when reading /proc/kpageflags
> > on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]':
> > 
> >   BUG: unable to handle kernel paging request at fffe
> >   PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0
> >   Oops:  [#1] SMP PTI
> >   CPU: 2 PID: 1728 Comm: page-types Not tainted 
> > 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160
> >   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 
> > 04/01/2014
> >   RIP: 0010:stable_page_flags+0x27/0x3c0
> >   Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 89 
> > fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 
> > c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7
> >   RSP: 0018:bbd44111fde0 EFLAGS: 00010202
> >   RAX: fffe RBX: 7fffeff9 RCX: 
> >   RDX: 0001 RSI: 0202 RDI: ed1182fff5c0
> >   RBP:  R08: 0001 R09: 0001
> >   R10: bbd44111fed8 R11:  R12: ed1182fff5c0
> >   R13: 000bffd7 R14: 02fff5c0 R15: bbd44111ff10
> >   FS:  7efc4335a500() GS:93a5bfc0() 
> > knlGS:
> >   CS:  0010 DS:  ES:  CR0: 80050033
> >   CR2: fffe CR3: b2a58000 CR4: 001406e0
> >   Call Trace:
> >kpageflags_read+0xc7/0x120
> >proc_reg_read+0x3c/0x60
> >__vfs_read+0x36/0x170
> >vfs_read+0x89/0x130
> >ksys_pread64+0x71/0x90
> >do_syscall_64+0x5b/0x160
> >entry_SYSCALL_64_after_hwframe+0x44/0xa9
> >   RIP: 0033:0x7efc42e75e23
> >   Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 00 
> > 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 01 f0 
> > ff ff 73 34 c3 48 83 ec 08 e8 db d3 01 00 48 89 04 24
> > 
> > According to kernel bisection, this problem became visible due to commit
> > f7f99100d8d9 which changes how struct pages are initialized.
> > 
> > Memblock layout affects the pfn ranges covered by node/zone. Consider
> > that we have a VM with 2 NUMA nodes and each node has 4GB memory, and
> > the

Re: [PATCH v2] x86/e820: put !E820_TYPE_RAM regions into memblock.reserved

2018-06-14 Thread Naoya Horiguchi

On Thu, Jun 14, 2018 at 11:30:34PM +0200, Oscar Salvador wrote:
> On Thu, Jun 14, 2018 at 06:34:55AM +, Naoya Horiguchi wrote:
> > On Thu, Jun 14, 2018 at 07:38:59AM +0200, Oscar Salvador wrote:
> > > On Thu, Jun 14, 2018 at 05:16:18AM +, Naoya Horiguchi wrote:
> > ...
> > > > 
> > > > My concern is that there are a few E820 memory types rather than
> > > > E820_TYPE_RAM and E820_TYPE_RESERVED, and I'm not sure that putting them
> > > > all into memblock.reserved is really acceptable.
> > > 
> > > Hi Naoya,
> > > 
> > > Maybe you could just add to memblock.reserved, all unavailable ranges 
> > > within
> > > E820_TYPE_RAM.
> > > Actually, in your original patch, you are walking memblock.memory, which 
> > > should
> > > only contain E820_TYPE_RAM ranges (talking about x86).
> > > 
> > > So I think the below would to the trick as well?
> > > 
> > > @@ -1248,6 +1276,7 @@ void __init e820__memblock_setup(void)
> > >  {
> > > int i;
> > > u64 end;
> > > +   u64 next = 0;
> > >  
> > > /*
> > >  * The bootstrap memblock region count maximum is 128 entries
> > >  
> > > @@ -1269,6 +1299,14 @@ void __init e820__memblock_setup(void)
> > >  
> > > if (entry->type != E820_TYPE_RAM && entry->type != 
> > > E820_TYPE_RESERVED_KERN)
> > > continue;
> > >
> > > +   
> > > +   if (entry->type == E820_TYPE_RAM)
> > > +   if (next < entry->addr) {
> > > + memblock_reserve (next, next + (entry->addr - 
> > > next));
> > > + next = end;
> > > + }
> > > 
> > > With the above patch, I can no longer see the issues either.
> > 
> > I double-checked and this change looks good to me.
> > 
> > > 
> > > Although, there is a difference between this and your original patch.
> > > In your original patch, you are just zeroing the pages, while with this 
> > > one (or with your second patch),
> > > we will zero the page in reserve_bootmem_region(), but that function also 
> > > init
> > > some other fields of the struct page:
> > > 
> > > mm_zero_struct_page(page);
> > > set_page_links(page, zone, nid, pfn);
> > > init_page_count(page);
> > > page_mapcount_reset(page);
> > > page_cpupid_reset_last(page);
> > > 
> > > So I am not sure we want to bother doing that for pages that are really 
> > > unreachable.
> > 
> > I think that considering that /proc/kpageflags can check them, some data
> > (even if it's trivial) might be better than just zeros.
> > 
> > Here's the updated patch.
> > Thanks for the suggestion and testing!
> > 
> > ---
> > From: Naoya Horiguchi 
> > Date: Thu, 14 Jun 2018 14:44:36 +0900
> > Subject: [PATCH] x86/e820: put !E820_TYPE_RAM regions into memblock.reserved
> > 
> > There is a kernel panic that is triggered when reading /proc/kpageflags
> > on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]':
> > 
> >   BUG: unable to handle kernel paging request at fffe
> >   PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0
> >   Oops:  [#1] SMP PTI
> >   CPU: 2 PID: 1728 Comm: page-types Not tainted 
> > 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160
> >   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 
> > 04/01/2014
> >   RIP: 0010:stable_page_flags+0x27/0x3c0
> >   Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 89 
> > fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 
> > c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7
> >   RSP: 0018:bbd44111fde0 EFLAGS: 00010202
> >   RAX: fffe RBX: 7fffeff9 RCX: 
> >   RDX: 0001 RSI: 0202 RDI: ed1182fff5c0
> >   RBP:  R08: 0001 R09: 0001
> >   R10: bbd44111fed8 R11:  R12: ed1182fff5c0
> >   R13: 000bffd7 R14: 02fff5c0 R15: bbd44111ff10
> >   FS:  7efc4335a500() GS:93a5bfc0() 
> > knlGS:
> >   CS:  0010 DS:  ES:  CR0: 80050033
> >   CR2: fffe CR3: b2a58000 CR4: 001406e0
> >   Call Trace:
> >kpageflags_read+0xc7/0x120
> >proc_reg_read+0x3c/0x60
> >__vfs_read+0x36/0x170
> >vfs_read+0x89/0x130
> >ksys_pread64+0x71/0x90
> >do_syscall_64+0x5b/0x160
> >entry_SYSCALL_64_after_hwframe+0x44/0xa9
> >   RIP: 0033:0x7efc42e75e23
> >   Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 00 
> > 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 01 f0 
> > ff ff 73 34 c3 48 83 ec 08 e8 db d3 01 00 48 89 04 24
> > 
> > According to kernel bisection, this problem became visible due to commit
> > f7f99100d8d9 which changes how struct pages are initialized.
> > 
> > Memblock layout affects the pfn ranges covered by node/zone. Consider
> > that we have a VM with 2 NUMA nodes and each node has 4GB memory, and
> > the

Re: [PATCH v2] x86/e820: put !E820_TYPE_RAM regions into memblock.reserved

2018-06-14 Thread Naoya Horiguchi

On Thu, Jun 14, 2018 at 01:24:37PM +0200, Oscar Salvador wrote:
> On Thu, Jun 14, 2018 at 09:21:03AM +0200, Oscar Salvador wrote:
> > On Thu, Jun 14, 2018 at 06:34:55AM +, Naoya Horiguchi wrote:
> > > On Thu, Jun 14, 2018 at 07:38:59AM +0200, Oscar Salvador wrote:
> > > > On Thu, Jun 14, 2018 at 05:16:18AM +, Naoya Horiguchi wrote:
> > > ...
> > > > > 
> > > > > My concern is that there are a few E820 memory types rather than
> > > > > E820_TYPE_RAM and E820_TYPE_RESERVED, and I'm not sure that putting 
> > > > > them
> > > > > all into memblock.reserved is really acceptable.
> > > > 
> > > > Hi Naoya,
> > > > 
> > > > Maybe you could just add to memblock.reserved, all unavailable ranges 
> > > > within
> > > > E820_TYPE_RAM.
> > > > Actually, in your original patch, you are walking memblock.memory, 
> > > > which should
> > > > only contain E820_TYPE_RAM ranges (talking about x86).
> > > > 
> > > > So I think the below would to the trick as well?
> > > > 
> > > > @@ -1248,6 +1276,7 @@ void __init e820__memblock_setup(void)
> > > >  {
> > > > int i;
> > > > u64 end;
> > > > +   u64 next = 0;
> > > >  
> > > > /*
> > > >  * The bootstrap memblock region count maximum is 128 entries
> > > >  
> > > > @@ -1269,6 +1299,14 @@ void __init e820__memblock_setup(void)
> > > >  
> > > > if (entry->type != E820_TYPE_RAM && entry->type != 
> > > > E820_TYPE_RESERVED_KERN)
> > > > continue;
> > > >
> > > > +   
> > > > +   if (entry->type == E820_TYPE_RAM)
> > > > +   if (next < entry->addr) {
> > > > +   memblock_reserve (next, next + 
> > > > (entry->addr - next));
> > > > +   next = end;
> > > > +   }
> > > > 
> > > > With the above patch, I can no longer see the issues either.
> > > 
> > > I double-checked and this change looks good to me.
> > > 
> > > > 
> > > > Although, there is a difference between this and your original patch.
> > > > In your original patch, you are just zeroing the pages, while with this 
> > > > one (or with your second patch),
> > > > we will zero the page in reserve_bootmem_region(), but that function 
> > > > also init
> > > > some other fields of the struct page:
> > > > 
> > > > mm_zero_struct_page(page);
> > > > set_page_links(page, zone, nid, pfn);
> > > > init_page_count(page);
> > > > page_mapcount_reset(page);
> > > > page_cpupid_reset_last(page);
> > > > 
> > > > So I am not sure we want to bother doing that for pages that are really 
> > > > unreachable.
> > > 
> > > I think that considering that /proc/kpageflags can check them, some data
> > > (even if it's trivial) might be better than just zeros.
> > > 
> > > Here's the updated patch.
> > > Thanks for the suggestion and testing!
> > > 
> > > ---
> > > From: Naoya Horiguchi 
> > > Date: Thu, 14 Jun 2018 14:44:36 +0900
> > > Subject: [PATCH] x86/e820: put !E820_TYPE_RAM regions into 
> > > memblock.reserved
> > > 
> > > There is a kernel panic that is triggered when reading /proc/kpageflags
> > > on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]':
> > > 
> > >   BUG: unable to handle kernel paging request at fffe
> > >   PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0
> > >   Oops:  [#1] SMP PTI
> > >   CPU: 2 PID: 1728 Comm: page-types Not tainted 
> > > 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160
> > >   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > > 1.11.0-2.fc28 04/01/2014
> > >   RIP: 0010:stable_page_flags+0x27/0x3c0
> > >   Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 89 
> > > fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 
> > > c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7
> > >   RSP: 0018:bbd44111fde0 EFLAGS: 00010202
> > >   RAX: fffe RBX: 7fffeff9 RCX: 
> > >   RDX: 0001 RSI: 0202 RDI: ed1182fff5c0
> > >   RBP:  R08: 0001 R09: 0001
> > >   R10: bbd44111fed8 R11:  R12: ed1182fff5c0
> > >   R13: 000bffd7 R14: 02fff5c0 R15: bbd44111ff10
> > >   FS:  7efc4335a500() GS:93a5bfc0() 
> > > knlGS:
> > >   CS:  0010 DS:  ES:  CR0: 80050033
> > >   CR2: fffe CR3: b2a58000 CR4: 001406e0
> > >   Call Trace:
> > >kpageflags_read+0xc7/0x120
> > >proc_reg_read+0x3c/0x60
> > >__vfs_read+0x36/0x170
> > >vfs_read+0x89/0x130
> > >ksys_pread64+0x71/0x90
> > >do_syscall_64+0x5b/0x160
> > >entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > >   RIP: 0033:0x7efc42e75e23
> > >   Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 00 
> > > 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 01 f0 
> > > ff ff 73 34 c3 48 83 ec 08 e8 db d3

Re: [PATCH v2] x86/e820: put !E820_TYPE_RAM regions into memblock.reserved

2018-06-14 Thread Naoya Horiguchi

On Thu, Jun 14, 2018 at 01:24:37PM +0200, Oscar Salvador wrote:
> On Thu, Jun 14, 2018 at 09:21:03AM +0200, Oscar Salvador wrote:
> > On Thu, Jun 14, 2018 at 06:34:55AM +, Naoya Horiguchi wrote:
> > > On Thu, Jun 14, 2018 at 07:38:59AM +0200, Oscar Salvador wrote:
> > > > On Thu, Jun 14, 2018 at 05:16:18AM +, Naoya Horiguchi wrote:
> > > ...
> > > > > 
> > > > > My concern is that there are a few E820 memory types rather than
> > > > > E820_TYPE_RAM and E820_TYPE_RESERVED, and I'm not sure that putting 
> > > > > them
> > > > > all into memblock.reserved is really acceptable.
> > > > 
> > > > Hi Naoya,
> > > > 
> > > > Maybe you could just add to memblock.reserved, all unavailable ranges 
> > > > within
> > > > E820_TYPE_RAM.
> > > > Actually, in your original patch, you are walking memblock.memory, 
> > > > which should
> > > > only contain E820_TYPE_RAM ranges (talking about x86).
> > > > 
> > > > So I think the below would to the trick as well?
> > > > 
> > > > @@ -1248,6 +1276,7 @@ void __init e820__memblock_setup(void)
> > > >  {
> > > > int i;
> > > > u64 end;
> > > > +   u64 next = 0;
> > > >  
> > > > /*
> > > >  * The bootstrap memblock region count maximum is 128 entries
> > > >  
> > > > @@ -1269,6 +1299,14 @@ void __init e820__memblock_setup(void)
> > > >  
> > > > if (entry->type != E820_TYPE_RAM && entry->type != 
> > > > E820_TYPE_RESERVED_KERN)
> > > > continue;
> > > >
> > > > +   
> > > > +   if (entry->type == E820_TYPE_RAM)
> > > > +   if (next < entry->addr) {
> > > > +   memblock_reserve (next, next + 
> > > > (entry->addr - next));
> > > > +   next = end;
> > > > +   }
> > > > 
> > > > With the above patch, I can no longer see the issues either.
> > > 
> > > I double-checked and this change looks good to me.
> > > 
> > > > 
> > > > Although, there is a difference between this and your original patch.
> > > > In your original patch, you are just zeroing the pages, while with this 
> > > > one (or with your second patch),
> > > > we will zero the page in reserve_bootmem_region(), but that function 
> > > > also init
> > > > some other fields of the struct page:
> > > > 
> > > > mm_zero_struct_page(page);
> > > > set_page_links(page, zone, nid, pfn);
> > > > init_page_count(page);
> > > > page_mapcount_reset(page);
> > > > page_cpupid_reset_last(page);
> > > > 
> > > > So I am not sure we want to bother doing that for pages that are really 
> > > > unreachable.
> > > 
> > > I think that considering that /proc/kpageflags can check them, some data
> > > (even if it's trivial) might be better than just zeros.
> > > 
> > > Here's the updated patch.
> > > Thanks for the suggestion and testing!
> > > 
> > > ---
> > > From: Naoya Horiguchi 
> > > Date: Thu, 14 Jun 2018 14:44:36 +0900
> > > Subject: [PATCH] x86/e820: put !E820_TYPE_RAM regions into 
> > > memblock.reserved
> > > 
> > > There is a kernel panic that is triggered when reading /proc/kpageflags
> > > on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]':
> > > 
> > >   BUG: unable to handle kernel paging request at fffe
> > >   PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0
> > >   Oops:  [#1] SMP PTI
> > >   CPU: 2 PID: 1728 Comm: page-types Not tainted 
> > > 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160
> > >   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > > 1.11.0-2.fc28 04/01/2014
> > >   RIP: 0010:stable_page_flags+0x27/0x3c0
> > >   Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 89 
> > > fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 
> > > c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7
> > >   RSP: 0018:bbd44111fde0 EFLAGS: 00010202
> > >   RAX: fffe RBX: 7fffeff9 RCX: 
> > >   RDX: 0001 RSI: 0202 RDI: ed1182fff5c0
> > >   RBP:  R08: 0001 R09: 0001
> > >   R10: bbd44111fed8 R11:  R12: ed1182fff5c0
> > >   R13: 000bffd7 R14: 02fff5c0 R15: bbd44111ff10
> > >   FS:  7efc4335a500() GS:93a5bfc0() 
> > > knlGS:
> > >   CS:  0010 DS:  ES:  CR0: 80050033
> > >   CR2: fffe CR3: b2a58000 CR4: 001406e0
> > >   Call Trace:
> > >kpageflags_read+0xc7/0x120
> > >proc_reg_read+0x3c/0x60
> > >__vfs_read+0x36/0x170
> > >vfs_read+0x89/0x130
> > >ksys_pread64+0x71/0x90
> > >do_syscall_64+0x5b/0x160
> > >entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > >   RIP: 0033:0x7efc42e75e23
> > >   Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 00 
> > > 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 01 f0 
> > > ff ff 73 34 c3 48 83 ec 08 e8 db d3

Re: [PATCH v1] mm: zero remaining unavailable struct pages (Re: kernel panic in reading /proc/kpageflags when enabling RAM-simulated PMEM)

2018-06-14 Thread Naoya Horiguchi

On Thu, Jun 14, 2018 at 09:00:50AM +0200, Michal Hocko wrote:
> On Thu 14-06-18 05:16:18, Naoya Horiguchi wrote:
> > On Wed, Jun 13, 2018 at 11:07:00AM +0200, Michal Hocko wrote:
> > > On Wed 13-06-18 05:41:08, Naoya Horiguchi wrote:
> > > [...]
> > > > From: Naoya Horiguchi 
> > > > Date: Wed, 13 Jun 2018 12:43:27 +0900
> > > > Subject: [PATCH] mm: zero remaining unavailable struct pages
> > > >
> > > > There is a kernel panic that is triggered when reading /proc/kpageflags
> > > > on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]':
> > > >
> > > >   BUG: unable to handle kernel paging request at fffe
> > > >   PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0
> > > >   Oops:  [#1] SMP PTI
> > > >   CPU: 2 PID: 1728 Comm: page-types Not tainted 
> > > > 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160
> > > >   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > > > 1.11.0-2.fc28 04/01/2014
> > > >   RIP: 0010:stable_page_flags+0x27/0x3c0
> > > >   Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 
> > > > 89 fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 
> > > > 00 f6 c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7
> > > >   RSP: 0018:bbd44111fde0 EFLAGS: 00010202
> > > >   RAX: fffe RBX: 7fffeff9 RCX: 
> > > >   RDX: 0001 RSI: 0202 RDI: ed1182fff5c0
> > > >   RBP:  R08: 0001 R09: 0001
> > > >   R10: bbd44111fed8 R11:  R12: ed1182fff5c0
> > > >   R13: 000bffd7 R14: 02fff5c0 R15: bbd44111ff10
> > > >   FS:  7efc4335a500() GS:93a5bfc0() 
> > > > knlGS:
> > > >   CS:  0010 DS:  ES:  CR0: 80050033
> > > >   CR2: fffe CR3: b2a58000 CR4: 001406e0
> > > >   Call Trace:
> > > >kpageflags_read+0xc7/0x120
> > > >proc_reg_read+0x3c/0x60
> > > >__vfs_read+0x36/0x170
> > > >vfs_read+0x89/0x130
> > > >ksys_pread64+0x71/0x90
> > > >do_syscall_64+0x5b/0x160
> > > >entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > > >   RIP: 0033:0x7efc42e75e23
> > > >   Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 
> > > > 00 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 
> > > > 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 db d3 01 00 48 89 04 24
> > > >
> > > > According to kernel bisection, this problem became visible due to commit
> > > > f7f99100d8d9 which changes how struct pages are initialized.
> > > >
> > > > Memblock layout affects the pfn ranges covered by node/zone. Consider
> > > > that we have a VM with 2 NUMA nodes and each node has 4GB memory, and
> > > > the default (no memmap= given) memblock layout is like below:
> > > >
> > > >   MEMBLOCK configuration:
> > > >memory size = 0x0001fff75c00 reserved size = 0x0300c000
> > > >memory.cnt  = 0x4
> > > >memory[0x0] [0x1000-0x0009efff], 
> > > > 0x0009e000 bytes on node 0 flags: 0x0
> > > >memory[0x1] [0x0010-0xbffd6fff], 
> > > > 0xbfed7000 bytes on node 0 flags: 0x0
> > > >memory[0x2] [0x0001-0x00013fff], 
> > > > 0x4000 bytes on node 0 flags: 0x0
> > > >memory[0x3] [0x00014000-0x00023fff], 
> > > > 0x0001 bytes on node 1 flags: 0x0
> > > >...
> > > >
> > > > If you give memmap=1G!4G (so it just covers memory[0x2]),
> > > > the range [0x1-0x13fff] is gone:
> > > >
> > > >   MEMBLOCK configuration:
> > > >memory size = 0x0001bff75c00 reserved size = 0x0300c000
> > > >memory.cnt  = 0x3
> > > >memory[0x0] [0x1000-0x0009efff], 
> > > > 0x0009e000 bytes on node 0 flags: 0x0
> > > >memory[0x1] [0x0010-0xbffd6fff], 
> > > > 0xbfed7000 bytes on node 0 flags: 0x0
> > > >memory[0x2] [0x00014000-0x00023fff], 
> > > > 0x0001 bytes on node 1 flags: 0x0
> > > >...
> > > >
> > > > This causes shrinking node 0's pfn range because it is calculated by
> > > > the address range of memblock.memory. So some of struct pages in the
> > > > gap range are left uninitialized.
> > > >
> > > > We have a function zero_resv_unavail() which does zeroing the struct
> > > > pages outside memblock.memory, but currently it covers only the reserved
> > > > unavailable range (i.e. memblock.memory && !memblock.reserved).
> > > > This patch extends it to cover all unavailable range, which fixes
> > > > the reported issue.
> > >
> > > Thanks for pin pointing this down Naoya! I am wondering why we cannot
> > > simply mark the excluded ranges to be reserved instead.
> > 
> > I tried your idea with the change below, and it also fixes the kernel panic.
> > 
> > ---
> > diff --git a/arch/x86/kernel/e820.c

Re: [PATCH v1] mm: zero remaining unavailable struct pages (Re: kernel panic in reading /proc/kpageflags when enabling RAM-simulated PMEM)

2018-06-14 Thread Naoya Horiguchi

On Thu, Jun 14, 2018 at 09:00:50AM +0200, Michal Hocko wrote:
> On Thu 14-06-18 05:16:18, Naoya Horiguchi wrote:
> > On Wed, Jun 13, 2018 at 11:07:00AM +0200, Michal Hocko wrote:
> > > On Wed 13-06-18 05:41:08, Naoya Horiguchi wrote:
> > > [...]
> > > > From: Naoya Horiguchi 
> > > > Date: Wed, 13 Jun 2018 12:43:27 +0900
> > > > Subject: [PATCH] mm: zero remaining unavailable struct pages
> > > >
> > > > There is a kernel panic that is triggered when reading /proc/kpageflags
> > > > on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]':
> > > >
> > > >   BUG: unable to handle kernel paging request at fffe
> > > >   PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0
> > > >   Oops:  [#1] SMP PTI
> > > >   CPU: 2 PID: 1728 Comm: page-types Not tainted 
> > > > 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160
> > > >   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > > > 1.11.0-2.fc28 04/01/2014
> > > >   RIP: 0010:stable_page_flags+0x27/0x3c0
> > > >   Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 
> > > > 89 fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 
> > > > 00 f6 c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7
> > > >   RSP: 0018:bbd44111fde0 EFLAGS: 00010202
> > > >   RAX: fffe RBX: 7fffeff9 RCX: 
> > > >   RDX: 0001 RSI: 0202 RDI: ed1182fff5c0
> > > >   RBP:  R08: 0001 R09: 0001
> > > >   R10: bbd44111fed8 R11:  R12: ed1182fff5c0
> > > >   R13: 000bffd7 R14: 02fff5c0 R15: bbd44111ff10
> > > >   FS:  7efc4335a500() GS:93a5bfc0() 
> > > > knlGS:
> > > >   CS:  0010 DS:  ES:  CR0: 80050033
> > > >   CR2: fffe CR3: b2a58000 CR4: 001406e0
> > > >   Call Trace:
> > > >kpageflags_read+0xc7/0x120
> > > >proc_reg_read+0x3c/0x60
> > > >__vfs_read+0x36/0x170
> > > >vfs_read+0x89/0x130
> > > >ksys_pread64+0x71/0x90
> > > >do_syscall_64+0x5b/0x160
> > > >entry_SYSCALL_64_after_hwframe+0x44/0xa9
> > > >   RIP: 0033:0x7efc42e75e23
> > > >   Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 
> > > > 00 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 
> > > > 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 db d3 01 00 48 89 04 24
> > > >
> > > > According to kernel bisection, this problem became visible due to commit
> > > > f7f99100d8d9 which changes how struct pages are initialized.
> > > >
> > > > Memblock layout affects the pfn ranges covered by node/zone. Consider
> > > > that we have a VM with 2 NUMA nodes and each node has 4GB memory, and
> > > > the default (no memmap= given) memblock layout is like below:
> > > >
> > > >   MEMBLOCK configuration:
> > > >memory size = 0x0001fff75c00 reserved size = 0x0300c000
> > > >memory.cnt  = 0x4
> > > >memory[0x0] [0x1000-0x0009efff], 
> > > > 0x0009e000 bytes on node 0 flags: 0x0
> > > >memory[0x1] [0x0010-0xbffd6fff], 
> > > > 0xbfed7000 bytes on node 0 flags: 0x0
> > > >memory[0x2] [0x0001-0x00013fff], 
> > > > 0x4000 bytes on node 0 flags: 0x0
> > > >memory[0x3] [0x00014000-0x00023fff], 
> > > > 0x0001 bytes on node 1 flags: 0x0
> > > >...
> > > >
> > > > If you give memmap=1G!4G (so it just covers memory[0x2]),
> > > > the range [0x1-0x13fff] is gone:
> > > >
> > > >   MEMBLOCK configuration:
> > > >memory size = 0x0001bff75c00 reserved size = 0x0300c000
> > > >memory.cnt  = 0x3
> > > >memory[0x0] [0x1000-0x0009efff], 
> > > > 0x0009e000 bytes on node 0 flags: 0x0
> > > >memory[0x1] [0x0010-0xbffd6fff], 
> > > > 0xbfed7000 bytes on node 0 flags: 0x0
> > > >memory[0x2] [0x00014000-0x00023fff], 
> > > > 0x0001 bytes on node 1 flags: 0x0
> > > >...
> > > >
> > > > This causes shrinking node 0's pfn range because it is calculated by
> > > > the address range of memblock.memory. So some of struct pages in the
> > > > gap range are left uninitialized.
> > > >
> > > > We have a function zero_resv_unavail() which does zeroing the struct
> > > > pages outside memblock.memory, but currently it covers only the reserved
> > > > unavailable range (i.e. memblock.memory && !memblock.reserved).
> > > > This patch extends it to cover all unavailable range, which fixes
> > > > the reported issue.
> > >
> > > Thanks for pin pointing this down Naoya! I am wondering why we cannot
> > > simply mark the excluded ranges to be reserved instead.
> > 
> > I tried your idea with the change below, and it also fixes the kernel panic.
> > 
> > ---
> > diff --git a/arch/x86/kernel/e820.c

[PATCH RFC] tracing: Call triggers only if event passes filter checks

2018-06-14 Thread Joel Fernandes

From: "Joel Fernandes (Google)" 

Currently, trace event triggers are called regardless of if the event
filter checks pass or fail. Thus if one were to enable event triggers
and filters at the same time, then the triggers will always be called
even if the filter checks didn't pass.

This is a problem for a usecase I was experimenting with: measuring the
time preemption is disabled using synthetic events and dump the stack
using the stacktrace trigger if the total preempt off time was greater
than a threshold. Following are the commands for the same:

Create synthetic event:

echo 'preemptdisable u64 lat' >> \
  /sys/kernel/debug/tracing/synthetic_events

echo 'hist:keys=cpu:ts0=common_timestamp.usecs:scpu=cpu' >> \
  
/sys/kernel/debug/tracing/events/preemptirq/preempt_disable/trigger

echo 'hist:keys=cpu:wakeup_lat=common_timestamp.usecs-$ts0:\
onmatch(preemptirq.preempt_disable).preemptdisable($wakeup_lat)' >> \

/sys/kernel/debug/tracing/events/preemptirq/preempt_enable/trigger

Enable synthetic event:

echo stacktrace > 
/sys/kernel/debug/tracing/events/synthetic/preemptdisable/trigger
echo 'lat > 400' > 
/sys/kernel/debug/tracing/events/synthetic/preemptdisable/filter
echo 1 > /sys/kernel/debug/tracing/events/synthetic/preemptdisable/enable

With the patch, the stacktrace is triggered only if the filter checks
passed. It also seems sensible to call triggers only if filter checks
pass so lets do that.

Signed-off-by: Joel Fernandes (Google) 
---
 kernel/trace/trace.h | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 630c5a24b2b2..b0b5df288924 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1296,13 +1296,14 @@ __event_trigger_test_discard(struct trace_event_file 
*file,
 enum event_trigger_type *tt)
 {
unsigned long eflags = file->flags;
+   bool filtered = (file->flags & EVENT_FILE_FL_FILTERED) &&
+!filter_match_preds(file->filter, entry);
 
-   if (eflags & EVENT_FILE_FL_TRIGGER_COND)
+   if (!filtered && (eflags & EVENT_FILE_FL_TRIGGER_COND))
*tt = event_triggers_call(file, entry, event);
 
if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, >flags) ||
-   (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
-!filter_match_preds(file->filter, entry))) {
+   filtered) {
__trace_event_discard_commit(buffer, event);
return true;
}
-- 
2.18.0.rc1.244.gcf134e6275-goog

[PATCH RFC] tracing: Call triggers only if event passes filter checks

2018-06-14 Thread Joel Fernandes

From: "Joel Fernandes (Google)" 

Currently, trace event triggers are called regardless of if the event
filter checks pass or fail. Thus if one were to enable event triggers
and filters at the same time, then the triggers will always be called
even if the filter checks didn't pass.

This is a problem for a usecase I was experimenting with: measuring the
time preemption is disabled using synthetic events and dump the stack
using the stacktrace trigger if the total preempt off time was greater
than a threshold. Following are the commands for the same:

Create synthetic event:

echo 'preemptdisable u64 lat' >> \
  /sys/kernel/debug/tracing/synthetic_events

echo 'hist:keys=cpu:ts0=common_timestamp.usecs:scpu=cpu' >> \
  
/sys/kernel/debug/tracing/events/preemptirq/preempt_disable/trigger

echo 'hist:keys=cpu:wakeup_lat=common_timestamp.usecs-$ts0:\
onmatch(preemptirq.preempt_disable).preemptdisable($wakeup_lat)' >> \

/sys/kernel/debug/tracing/events/preemptirq/preempt_enable/trigger

Enable synthetic event:

echo stacktrace > 
/sys/kernel/debug/tracing/events/synthetic/preemptdisable/trigger
echo 'lat > 400' > 
/sys/kernel/debug/tracing/events/synthetic/preemptdisable/filter
echo 1 > /sys/kernel/debug/tracing/events/synthetic/preemptdisable/enable

With the patch, the stacktrace is triggered only if the filter checks
passed. It also seems sensible to call triggers only if filter checks
pass so lets do that.

Signed-off-by: Joel Fernandes (Google) 
---
 kernel/trace/trace.h | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 630c5a24b2b2..b0b5df288924 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1296,13 +1296,14 @@ __event_trigger_test_discard(struct trace_event_file 
*file,
 enum event_trigger_type *tt)
 {
unsigned long eflags = file->flags;
+   bool filtered = (file->flags & EVENT_FILE_FL_FILTERED) &&
+!filter_match_preds(file->filter, entry);
 
-   if (eflags & EVENT_FILE_FL_TRIGGER_COND)
+   if (!filtered && (eflags & EVENT_FILE_FL_TRIGGER_COND))
*tt = event_triggers_call(file, entry, event);
 
if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, >flags) ||
-   (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
-!filter_match_preds(file->filter, entry))) {
+   filtered) {
__trace_event_discard_commit(buffer, event);
return true;
}
-- 
2.18.0.rc1.244.gcf134e6275-goog

Re: [PATCH] extcon: Release locking when sending the notification of connector state

2018-06-14 Thread Chanwoo Choi

Hi Roger,

If possible, Could you please review this patch? 

Regards,
Chanwoo Choi

On 2018년 06월 14일 20:33, H. Nikolaus Schaller wrote:
> 
>> Am 14.06.2018 um 12:39 schrieb H. Nikolaus Schaller :
>>
>> Hi Roger and Chanwoo,
>>
>>> Am 14.06.2018 um 12:18 schrieb Chanwoo Choi :
>>>
>>> + H. Nikolaus Schaller 
>>>
>>> On 2018년 06월 14일 13:14, Chanwoo Choi wrote:
 Previously, extcon used the spinlock before calling the notifier_call_chain
 to prevent the scheduled out of task and to prevent the notification delay.
 When spinlock is locked for sending the notification, deadlock issue
 occured on the side of extcon consumer device. To fix this issue,
 extcon consumer device should always use the work. it is always not
 reasonable to use work.

 To fix this issue on extcon consumer device, release locking when sending
 the notification of connector state.

 Fixes: ab11af049f88 ("extcon: Add the synchronization extcon APIs to 
 support the notification")
 Cc: sta...@vger.kernel.org
 Cc: Roger Quadros 
 Cc: Kishon Vijay Abraham I 
 Signed-off-by: Chanwoo Choi 
 ---
 drivers/extcon/extcon.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
 index 8bff5fd18185..f75b08a45d4e 100644
 --- a/drivers/extcon/extcon.c
 +++ b/drivers/extcon/extcon.c
 @@ -433,8 +433,8 @@ int extcon_sync(struct extcon_dev *edev, unsigned int 
 id)
return index;

spin_lock_irqsave(>lock, flags);
 -
state = !!(edev->state & BIT(index));
 +  spin_unlock_irqrestore(>lock, flags);

/*
 * Call functions in a raw notifier chain for the specific one
 @@ -448,6 +448,7 @@ int extcon_sync(struct extcon_dev *edev, unsigned int 
 id)
 */
raw_notifier_call_chain(>nh_all, state, edev);

 +  spin_lock_irqsave(>lock, flags);
/* This could be in interrupt handler */
prop_buf = (char *)get_zeroed_page(GFP_ATOMIC);
if (!prop_buf) {

>>
>> I have tested on the Pyra handheld prototype and now it works. Plugging in 
>> an OTG cable
>> enables/disables OTG power as expected and there are no kernel oops any more.
> 
> I did take some minutes to check and it now also works again on the OMAP5EVM.
> 
> BR,
> Nikolaus
> 
>

Re: [PATCH] extcon: Release locking when sending the notification of connector state

2018-06-14 Thread Chanwoo Choi

Hi Roger,

If possible, Could you please review this patch? 

Regards,
Chanwoo Choi

On 2018년 06월 14일 20:33, H. Nikolaus Schaller wrote:
> 
>> Am 14.06.2018 um 12:39 schrieb H. Nikolaus Schaller :
>>
>> Hi Roger and Chanwoo,
>>
>>> Am 14.06.2018 um 12:18 schrieb Chanwoo Choi :
>>>
>>> + H. Nikolaus Schaller 
>>>
>>> On 2018년 06월 14일 13:14, Chanwoo Choi wrote:
 Previously, extcon used the spinlock before calling the notifier_call_chain
 to prevent the scheduled out of task and to prevent the notification delay.
 When spinlock is locked for sending the notification, deadlock issue
 occured on the side of extcon consumer device. To fix this issue,
 extcon consumer device should always use the work. it is always not
 reasonable to use work.

 To fix this issue on extcon consumer device, release locking when sending
 the notification of connector state.

 Fixes: ab11af049f88 ("extcon: Add the synchronization extcon APIs to 
 support the notification")
 Cc: sta...@vger.kernel.org
 Cc: Roger Quadros 
 Cc: Kishon Vijay Abraham I 
 Signed-off-by: Chanwoo Choi 
 ---
 drivers/extcon/extcon.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

 diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c
 index 8bff5fd18185..f75b08a45d4e 100644
 --- a/drivers/extcon/extcon.c
 +++ b/drivers/extcon/extcon.c
 @@ -433,8 +433,8 @@ int extcon_sync(struct extcon_dev *edev, unsigned int 
 id)
return index;

spin_lock_irqsave(>lock, flags);
 -
state = !!(edev->state & BIT(index));
 +  spin_unlock_irqrestore(>lock, flags);

/*
 * Call functions in a raw notifier chain for the specific one
 @@ -448,6 +448,7 @@ int extcon_sync(struct extcon_dev *edev, unsigned int 
 id)
 */
raw_notifier_call_chain(>nh_all, state, edev);

 +  spin_lock_irqsave(>lock, flags);
/* This could be in interrupt handler */
prop_buf = (char *)get_zeroed_page(GFP_ATOMIC);
if (!prop_buf) {

>>
>> I have tested on the Pyra handheld prototype and now it works. Plugging in 
>> an OTG cable
>> enables/disables OTG power as expected and there are no kernel oops any more.
> 
> I did take some minutes to check and it now also works again on the OMAP5EVM.
> 
> BR,
> Nikolaus
> 
>

Re: [PATCH 4.16 00/43] 4.16.16-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:34, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.16.16 release.
> There are 43 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:21:17 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.16.16-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.16.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h


Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

Summary


kernel: 4.16.16-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.16.y
git commit: c61f86d679044facaf48c475eb4edb7e6c2d8d22
git describe: v4.16.15-44-gc61f86d67904
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.16-oe/build\
/v4.16.15-44-gc61f86d67904
^ Please join URL

No regressions (compared to build v4.16.15)


Ran 11513 total tests in the following environments and test suites.

Environments
--
- dragonboard-410c - arm64
- hi6220-hikey - arm64
- juno-r2 - arm64
- qemu_arm
- qemu_arm64
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH 4.16 00/43] 4.16.16-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:34, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.16.16 release.
> There are 43 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:21:17 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.16.16-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.16.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h


Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

Summary


kernel: 4.16.16-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.16.y
git commit: c61f86d679044facaf48c475eb4edb7e6c2d8d22
git describe: v4.16.15-44-gc61f86d67904
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.16-oe/build\
/v4.16.15-44-gc61f86d67904
^ Please join URL

No regressions (compared to build v4.16.15)


Ran 11513 total tests in the following environments and test suites.

Environments
--
- dragonboard-410c - arm64
- hi6220-hikey - arm64
- juno-r2 - arm64
- qemu_arm
- qemu_arm64
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH 4.17 00/45] 4.17.2-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:33, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.17.2 release.
> There are 45 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:21:05 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.17.2-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.17.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

NOTE:
fcntl36 is an intermittent failure on qemu arm32 for all branches.
fcntl36.c:205: FAIL: Unexpected data offset 12304 value 9
Old open bug to investigate, https://bugs.linaro.org/show_bug.cgi?id=3339

Summary


kernel: 4.17.2-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.17.y
git commit: f11fc39630770031c4dd042acc5ad98adc5de301
git describe: v4.17.1-46-gf11fc3963077
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.17-oe/build\
/v4.17.1-46-gf11fc3963077
^ Please join URL

No regressions (compared to build v4.17.1)


Boards, architectures and test suites:
-

dragonboard-410c - arm64
* boot - pass: 21,
* kselftest - skip: 28, pass: 46, fail: 6
* libhugetlbfs - skip: 1, pass: 89, fail: 1
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 17, pass: 64,
* ltp-cve-tests - skip: 8, pass: 27,
* ltp-fcntl-locktests-tests - pass: 2,
* ltp-filecaps-tests - pass: 2,
* ltp-fs-tests - skip: 6, pass: 60,
* ltp-fs_bind-tests - pass: 2,
* ltp-fs_perms_simple-tests - pass: 19,
* ltp-fsx-tests - pass: 2,
* ltp-hugetlb-tests - skip: 1, pass: 21,
* ltp-io-tests - pass: 3,
* ltp-ipc-tests - pass: 9,
* ltp-math-tests - pass: 11,
* ltp-nptl-tests - pass: 2,
* ltp-pty-tests - pass: 4,
* ltp-sched-tests - pass: 14,
* ltp-securebits-tests - pass: 4,
* ltp-syscalls-tests - skip: 131, pass: 1018,
* ltp-timers-tests - pass: 13,

hi6220-hikey - arm64
* boot - pass: 21,
* kselftest - skip: 22, pass: 53, fail: 4
* libhugetlbfs - skip: 1, pass: 90,
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 17, pass: 64,
* ltp-cve-tests - skip: 8, pass: 27,
* ltp-fcntl-locktests-tests - pass: 2,
* ltp-filecaps-tests - pass: 2,
* ltp-fs_bind-tests - pass: 2,
* ltp-fs_perms_simple-tests - pass: 19,
* ltp-fsx-tests - pass: 2,
* ltp-hugetlb-tests - skip: 1, pass: 21,
* ltp-io-tests - pass: 3,
* ltp-ipc-tests - pass: 9,
* ltp-math-tests - pass: 11,
* ltp-nptl-tests - pass: 2,
* ltp-pty-tests - pass: 4,
* ltp-sched-tests - skip: 4, pass: 10,
* ltp-securebits-tests - pass: 4,
* ltp-syscalls-tests - skip: 132, pass: 1017,
* ltp-timers-tests - pass: 13,

juno-r2 - arm64
* boot - pass: 21,
* kselftest - skip: 24, pass: 51, fail: 5
* libhugetlbfs - skip: 1, pass: 90,
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 17, pass: 64,
* ltp-cve-tests - skip: 9, pass: 26,
* ltp-fcntl-locktests-tests - pass: 2,
* ltp-filecaps-tests - pass: 2,
* ltp-fs-tests - skip: 6, pass: 60,
* ltp-fs_bind-tests - pass: 2,
* ltp-fs_perms_simple-tests - pass: 19,
* ltp-fsx-tests - pass: 2,
* ltp-hugetlb-tests - pass: 22,
* ltp-io-tests - pass: 3,
* ltp-ipc-tests - pass: 9,
* ltp-math-tests - pass: 11,
* ltp-nptl-tests - pass: 2,
* ltp-pty-tests - pass: 4,
* ltp-sched-tests - skip: 4, pass: 10,
* ltp-securebits-tests - pass: 4,
* ltp-syscalls-tests - skip: 131, pass: 1018,
* ltp-timers-tests - pass: 13,

qemu_arm
* boot - pass: 21,
* kselftest - skip: 32, pass: 45, fail: 8
* libhugetlbfs - skip: 1, pass: 87,
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 18, pass: 63,
* ltp-cve-tests - skip: 12, pass: 23,
* ltp-fcntl-locktests-tests - pass: 2,
* ltp-filecaps-tests - pass: 2,
* ltp-fs-tests - skip: 5, pass: 61,
* ltp-fs_bind-tests - pass: 2,
* ltp-fs_perms_simple-tests - pass: 19,
* ltp-fsx-tests - pass: 2,
* ltp-hugetlb-tests - skip: 1, pass: 21,
* ltp-io-tests - pass: 3,
* ltp-ipc-tests - pass: 9,
* ltp-math-tests - pass: 11,
* ltp-nptl-tests - pass: 2,
* ltp-pty-tests - pass: 4,
* ltp-sched-tests - skip: 7, pass: 7,
* ltp-securebits-tests - pass: 4,
* ltp-syscalls-tests - skip: 94, pass: 1052, fail: 3
* ltp-timers-tests - pass: 13,

qemu_arm64
* boot - pass: 20, fail: 1
* kselftest - skip: 30, pass: 52, fail: 5
* libhugetlbfs - skip: 1, pass: 90,
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 17, pass: 64,
* ltp-cve-tests - skip: 10, pass: 25,
* ltp-filecaps-tests - pass: 2,
* ltp-fs-tests - skip: 6, pass: 60,
* ltp-fs_bind-tests

Re: [PATCH 4.17 00/45] 4.17.2-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:33, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.17.2 release.
> There are 45 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:21:05 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.17.2-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.17.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

NOTE:
fcntl36 is an intermittent failure on qemu arm32 for all branches.
fcntl36.c:205: FAIL: Unexpected data offset 12304 value 9
Old open bug to investigate, https://bugs.linaro.org/show_bug.cgi?id=3339

Summary


kernel: 4.17.2-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.17.y
git commit: f11fc39630770031c4dd042acc5ad98adc5de301
git describe: v4.17.1-46-gf11fc3963077
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.17-oe/build\
/v4.17.1-46-gf11fc3963077
^ Please join URL

No regressions (compared to build v4.17.1)


Boards, architectures and test suites:
-

dragonboard-410c - arm64
* boot - pass: 21,
* kselftest - skip: 28, pass: 46, fail: 6
* libhugetlbfs - skip: 1, pass: 89, fail: 1
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 17, pass: 64,
* ltp-cve-tests - skip: 8, pass: 27,
* ltp-fcntl-locktests-tests - pass: 2,
* ltp-filecaps-tests - pass: 2,
* ltp-fs-tests - skip: 6, pass: 60,
* ltp-fs_bind-tests - pass: 2,
* ltp-fs_perms_simple-tests - pass: 19,
* ltp-fsx-tests - pass: 2,
* ltp-hugetlb-tests - skip: 1, pass: 21,
* ltp-io-tests - pass: 3,
* ltp-ipc-tests - pass: 9,
* ltp-math-tests - pass: 11,
* ltp-nptl-tests - pass: 2,
* ltp-pty-tests - pass: 4,
* ltp-sched-tests - pass: 14,
* ltp-securebits-tests - pass: 4,
* ltp-syscalls-tests - skip: 131, pass: 1018,
* ltp-timers-tests - pass: 13,

hi6220-hikey - arm64
* boot - pass: 21,
* kselftest - skip: 22, pass: 53, fail: 4
* libhugetlbfs - skip: 1, pass: 90,
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 17, pass: 64,
* ltp-cve-tests - skip: 8, pass: 27,
* ltp-fcntl-locktests-tests - pass: 2,
* ltp-filecaps-tests - pass: 2,
* ltp-fs_bind-tests - pass: 2,
* ltp-fs_perms_simple-tests - pass: 19,
* ltp-fsx-tests - pass: 2,
* ltp-hugetlb-tests - skip: 1, pass: 21,
* ltp-io-tests - pass: 3,
* ltp-ipc-tests - pass: 9,
* ltp-math-tests - pass: 11,
* ltp-nptl-tests - pass: 2,
* ltp-pty-tests - pass: 4,
* ltp-sched-tests - skip: 4, pass: 10,
* ltp-securebits-tests - pass: 4,
* ltp-syscalls-tests - skip: 132, pass: 1017,
* ltp-timers-tests - pass: 13,

juno-r2 - arm64
* boot - pass: 21,
* kselftest - skip: 24, pass: 51, fail: 5
* libhugetlbfs - skip: 1, pass: 90,
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 17, pass: 64,
* ltp-cve-tests - skip: 9, pass: 26,
* ltp-fcntl-locktests-tests - pass: 2,
* ltp-filecaps-tests - pass: 2,
* ltp-fs-tests - skip: 6, pass: 60,
* ltp-fs_bind-tests - pass: 2,
* ltp-fs_perms_simple-tests - pass: 19,
* ltp-fsx-tests - pass: 2,
* ltp-hugetlb-tests - pass: 22,
* ltp-io-tests - pass: 3,
* ltp-ipc-tests - pass: 9,
* ltp-math-tests - pass: 11,
* ltp-nptl-tests - pass: 2,
* ltp-pty-tests - pass: 4,
* ltp-sched-tests - skip: 4, pass: 10,
* ltp-securebits-tests - pass: 4,
* ltp-syscalls-tests - skip: 131, pass: 1018,
* ltp-timers-tests - pass: 13,

qemu_arm
* boot - pass: 21,
* kselftest - skip: 32, pass: 45, fail: 8
* libhugetlbfs - skip: 1, pass: 87,
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 18, pass: 63,
* ltp-cve-tests - skip: 12, pass: 23,
* ltp-fcntl-locktests-tests - pass: 2,
* ltp-filecaps-tests - pass: 2,
* ltp-fs-tests - skip: 5, pass: 61,
* ltp-fs_bind-tests - pass: 2,
* ltp-fs_perms_simple-tests - pass: 19,
* ltp-fsx-tests - pass: 2,
* ltp-hugetlb-tests - skip: 1, pass: 21,
* ltp-io-tests - pass: 3,
* ltp-ipc-tests - pass: 9,
* ltp-math-tests - pass: 11,
* ltp-nptl-tests - pass: 2,
* ltp-pty-tests - pass: 4,
* ltp-sched-tests - skip: 7, pass: 7,
* ltp-securebits-tests - pass: 4,
* ltp-syscalls-tests - skip: 94, pass: 1052, fail: 3
* ltp-timers-tests - pass: 13,

qemu_arm64
* boot - pass: 20, fail: 1
* kselftest - skip: 30, pass: 52, fail: 5
* libhugetlbfs - skip: 1, pass: 90,
* ltp-cap_bounds-tests - pass: 2,
* ltp-containers-tests - skip: 17, pass: 64,
* ltp-cve-tests - skip: 10, pass: 25,
* ltp-filecaps-tests - pass: 2,
* ltp-fs-tests - skip: 6, pass: 60,
* ltp-fs_bind-tests

linux-next: manual merge of the y2038 tree with the overlayfs tree

2018-06-14 Thread Stephen Rothwell

Hi Arnd,

Today's linux-next merge of the y2038 tree got conflicts in:

  fs/inode.c
  fs/overlayfs/inode.c
  fs/overlayfs/overlayfs.h

between various commits from the overlayfs tree and commits:

  8efd6894ff08 ("fs: add timespec64_truncate()")
  95582b008388 ("vfs: change inode times to use struct timespec64")

from the y2038 tree.

I fixed it up (I copied the resolutions that used to be in the merge of
the overlayfs into the y2038 tree - see below) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc fs/inode.c
index 9a6fc2f2d220,9fe1f941be02..55373fcba3a5
--- a/fs/inode.c
+++ b/fs/inode.c
@@@ -1635,10 -1681,11 +1635,10 @@@ static int update_time(struct inode *in
   *This function automatically handles read only file systems and media,
   *as well as the "noatime" flag and inode specific "noatime" markers.
   */
 -bool __atime_needs_update(const struct path *path, struct inode *inode,
 -bool rcu)
 +bool atime_needs_update(const struct path *path, struct inode *inode)
  {
struct vfsmount *mnt = path->mnt;
-   struct timespec now;
+   struct timespec64 now;
  
if (inode->i_flags & S_NOATIME)
return false;
@@@ -1661,10 -1708,10 +1661,10 @@@
  
now = current_time(inode);
  
-   if (!relatime_need_update(mnt, inode, now))
 -  if (!relatime_need_update(path, inode, timespec64_to_timespec(now), 
rcu))
++  if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
return false;
  
-   if (timespec_equal(>i_atime, ))
+   if (timespec64_equal(>i_atime, ))
return false;
  
return true;
@@@ -1674,9 -1721,9 +1674,9 @@@ void touch_atime(const struct path *pat
  {
struct vfsmount *mnt = path->mnt;
struct inode *inode = d_inode(path->dentry);
-   struct timespec now;
+   struct timespec64 now;
  
 -  if (!__atime_needs_update(path, inode, false))
 +  if (!atime_needs_update(path, inode))
return;
  
if (!sb_start_write_trylock(inode->i_sb))
diff --cc fs/overlayfs/file.c
index f801e1175a0b,..c6bce11ac6d3
mode 100644,00..100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@@ -1,508 -1,0 +1,508 @@@
 +/*
 + * Copyright (C) 2017 Red Hat, Inc.
 + *
 + * This program is free software; you can redistribute it and/or modify it
 + * under the terms of the GNU General Public License version 2 as published by
 + * the Free Software Foundation.
 + */
 +
 +#include 
 +#include 
 +#include 
 +#include 
 +#include 
 +#include "overlayfs.h"
 +
 +static char ovl_whatisit(struct inode *inode, struct inode *realinode)
 +{
 +  if (realinode != ovl_inode_upper(inode))
 +  return 'l';
 +  if (ovl_has_upperdata(inode))
 +  return 'u';
 +  else
 +  return 'm';
 +}
 +
 +static struct file *ovl_open_realfile(const struct file *file,
 +struct inode *realinode)
 +{
 +  struct inode *inode = file_inode(file);
 +  struct file *realfile;
 +  const struct cred *old_cred;
 +
 +  old_cred = ovl_override_creds(inode->i_sb);
 +  realfile = path_open(>f_path, file->f_flags | O_NOATIME,
 +   realinode, current_cred(), false);
 +  revert_creds(old_cred);
 +
 +  pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
 +   file, file, ovl_whatisit(inode, realinode), file->f_flags,
 +   realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
 +
 +  return realfile;
 +}
 +
 +#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
 +
 +static int ovl_change_flags(struct file *file, unsigned int flags)
 +{
 +  struct inode *inode = file_inode(file);
 +  int err;
 +
 +  /* No atime modificaton on underlying */
 +  flags |= O_NOATIME;
 +
 +  /* If some flag changed that cannot be changed then something's amiss */
 +  if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
 +  return -EIO;
 +
 +  flags &= OVL_SETFL_MASK;
 +
 +  if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
 +  return -EPERM;
 +
 +  if (flags & O_DIRECT) {
 +  if (!file->f_mapping->a_ops ||
 +  !file->f_mapping->a_ops->direct_IO)
 +  return -EINVAL;
 +  }
 +
 +  if (file->f_op->check_flags) {
 +  err = file->f_op->check_flags(flags);
 +  if (err)
 +  return err;
 +  }
 +
 +  spin_lock(>f_lock);
 +  file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
 +  spin_unlock(>f_lock);
 +
 +

linux-next: manual merge of the y2038 tree with the overlayfs tree

2018-06-14 Thread Stephen Rothwell

Hi Arnd,

Today's linux-next merge of the y2038 tree got conflicts in:

  fs/inode.c
  fs/overlayfs/inode.c
  fs/overlayfs/overlayfs.h

between various commits from the overlayfs tree and commits:

  8efd6894ff08 ("fs: add timespec64_truncate()")
  95582b008388 ("vfs: change inode times to use struct timespec64")

from the y2038 tree.

I fixed it up (I copied the resolutions that used to be in the merge of
the overlayfs into the y2038 tree - see below) and can carry the fix as
necessary. This is now fixed as far as linux-next is concerned, but any
non trivial conflicts should be mentioned to your upstream maintainer
when your tree is submitted for merging.  You may also want to consider
cooperating with the maintainer of the conflicting tree to minimise any
particularly complex conflicts.

-- 
Cheers,
Stephen Rothwell

diff --cc fs/inode.c
index 9a6fc2f2d220,9fe1f941be02..55373fcba3a5
--- a/fs/inode.c
+++ b/fs/inode.c
@@@ -1635,10 -1681,11 +1635,10 @@@ static int update_time(struct inode *in
   *This function automatically handles read only file systems and media,
   *as well as the "noatime" flag and inode specific "noatime" markers.
   */
 -bool __atime_needs_update(const struct path *path, struct inode *inode,
 -bool rcu)
 +bool atime_needs_update(const struct path *path, struct inode *inode)
  {
struct vfsmount *mnt = path->mnt;
-   struct timespec now;
+   struct timespec64 now;
  
if (inode->i_flags & S_NOATIME)
return false;
@@@ -1661,10 -1708,10 +1661,10 @@@
  
now = current_time(inode);
  
-   if (!relatime_need_update(mnt, inode, now))
 -  if (!relatime_need_update(path, inode, timespec64_to_timespec(now), 
rcu))
++  if (!relatime_need_update(mnt, inode, timespec64_to_timespec(now)))
return false;
  
-   if (timespec_equal(>i_atime, ))
+   if (timespec64_equal(>i_atime, ))
return false;
  
return true;
@@@ -1674,9 -1721,9 +1674,9 @@@ void touch_atime(const struct path *pat
  {
struct vfsmount *mnt = path->mnt;
struct inode *inode = d_inode(path->dentry);
-   struct timespec now;
+   struct timespec64 now;
  
 -  if (!__atime_needs_update(path, inode, false))
 +  if (!atime_needs_update(path, inode))
return;
  
if (!sb_start_write_trylock(inode->i_sb))
diff --cc fs/overlayfs/file.c
index f801e1175a0b,..c6bce11ac6d3
mode 100644,00..100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@@ -1,508 -1,0 +1,508 @@@
 +/*
 + * Copyright (C) 2017 Red Hat, Inc.
 + *
 + * This program is free software; you can redistribute it and/or modify it
 + * under the terms of the GNU General Public License version 2 as published by
 + * the Free Software Foundation.
 + */
 +
 +#include 
 +#include 
 +#include 
 +#include 
 +#include 
 +#include "overlayfs.h"
 +
 +static char ovl_whatisit(struct inode *inode, struct inode *realinode)
 +{
 +  if (realinode != ovl_inode_upper(inode))
 +  return 'l';
 +  if (ovl_has_upperdata(inode))
 +  return 'u';
 +  else
 +  return 'm';
 +}
 +
 +static struct file *ovl_open_realfile(const struct file *file,
 +struct inode *realinode)
 +{
 +  struct inode *inode = file_inode(file);
 +  struct file *realfile;
 +  const struct cred *old_cred;
 +
 +  old_cred = ovl_override_creds(inode->i_sb);
 +  realfile = path_open(>f_path, file->f_flags | O_NOATIME,
 +   realinode, current_cred(), false);
 +  revert_creds(old_cred);
 +
 +  pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
 +   file, file, ovl_whatisit(inode, realinode), file->f_flags,
 +   realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
 +
 +  return realfile;
 +}
 +
 +#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
 +
 +static int ovl_change_flags(struct file *file, unsigned int flags)
 +{
 +  struct inode *inode = file_inode(file);
 +  int err;
 +
 +  /* No atime modificaton on underlying */
 +  flags |= O_NOATIME;
 +
 +  /* If some flag changed that cannot be changed then something's amiss */
 +  if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
 +  return -EIO;
 +
 +  flags &= OVL_SETFL_MASK;
 +
 +  if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
 +  return -EPERM;
 +
 +  if (flags & O_DIRECT) {
 +  if (!file->f_mapping->a_ops ||
 +  !file->f_mapping->a_ops->direct_IO)
 +  return -EINVAL;
 +  }
 +
 +  if (file->f_op->check_flags) {
 +  err = file->f_op->check_flags(flags);
 +  if (err)
 +  return err;
 +  }
 +
 +  spin_lock(>f_lock);
 +  file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
 +  spin_unlock(>f_lock);
 +
 +

Re: [PATCH 4.14 00/36] 4.14.50-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:34, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.14.50 release.
> There are 36 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:21:44 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.50-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.14.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

NOTE:
fcntl36 is an intermittent failure on qemu arm32 for all stable branches.
fcntl36.c:205: FAIL: Unexpected data offset 12304 value 9
Old open bug to investigate, https://bugs.linaro.org/show_bug.cgi?id=3339

Summary


kernel: 4.14.50-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.14.y
git commit: 3fb86a556ae52204eac4b69373ce236510a9a91b
git describe: v4.14.49-37-g3fb86a556ae5
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.14-oe/build \
/v4.14.49-37-g3fb86a556ae5
^ Please join URL

No regressions (compared to build v4.14.49)


Ran 11475 total tests in the following environments and test suites.

Environments
--
- dragonboard-410c - arm64
- hi6220-hikey - arm64
- juno-r2 - arm64
- qemu_arm
- qemu_arm64
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* ltp-sched-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH 4.14 00/36] 4.14.50-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:34, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.14.50 release.
> There are 36 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:21:44 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.50-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.14.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

NOTE:
fcntl36 is an intermittent failure on qemu arm32 for all stable branches.
fcntl36.c:205: FAIL: Unexpected data offset 12304 value 9
Old open bug to investigate, https://bugs.linaro.org/show_bug.cgi?id=3339

Summary


kernel: 4.14.50-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.14.y
git commit: 3fb86a556ae52204eac4b69373ce236510a9a91b
git describe: v4.14.49-37-g3fb86a556ae5
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.14-oe/build \
/v4.14.49-37-g3fb86a556ae5
^ Please join URL

No regressions (compared to build v4.14.49)


Ran 11475 total tests in the following environments and test suites.

Environments
--
- dragonboard-410c - arm64
- hi6220-hikey - arm64
- juno-r2 - arm64
- qemu_arm
- qemu_arm64
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* ltp-sched-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH 4.9 00/30] 4.9.109-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:34, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.9.109 release.
> There are 30 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:25:48 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.109-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.9.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

Summary


kernel: 4.9.109-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.9.y
git commit: 9778721e03d9551ee231bf73f4b4fc418e1ad158
git describe: v4.9.108-31-g9778721e03d9
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.9-oe/build \
/v4.9.108-31-g9778721e03d9
^ please join URL

No regressions (compared to build v4.9.108-12-g311d47aaa645)


Ran 11388 total tests in the following environments and test suites.

Environments
--
- dragonboard-410c - arm64
- hi6220-hikey - arm64
- juno-r2 - arm64
- qemu_arm
- qemu_arm64
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH 4.9 00/30] 4.9.109-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:34, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.9.109 release.
> There are 30 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:25:48 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.109-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.9.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

Summary


kernel: 4.9.109-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.9.y
git commit: 9778721e03d9551ee231bf73f4b4fc418e1ad158
git describe: v4.9.108-31-g9778721e03d9
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.9-oe/build \
/v4.9.108-31-g9778721e03d9
^ please join URL

No regressions (compared to build v4.9.108-12-g311d47aaa645)


Ran 11388 total tests in the following environments and test suites.

Environments
--
- dragonboard-410c - arm64
- hi6220-hikey - arm64
- juno-r2 - arm64
- qemu_arm
- qemu_arm64
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH 4.4 00/24] 4.4.138-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:34, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.4.138 release.
> There are 24 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:27:15 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.138-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.4.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

NOTE:
1) ealier reported CVE-2011-2496 failure test got PASS now.
vma03   1  TPASS  :  mremap failed as expected

2) LTP: cve-2015-3290 failed intermittently on qemu_x86_64
https://bugs.linaro.org/show_bug.cgi?id=3910
We will investigate.

Summary


kernel: 4.4.138-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.4.y
git commit: 64f298340be794f1500c602285b542c4bfd3eb21
git describe: v4.4.137-25-g64f298340be7
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.4-oe/build \
/v4.4.137-25-g64f298340be7
^ Please join URL

No regressions (compared to build v4.4.137-15-g7d690c56754e)


Ran 7174 total tests in the following environments and test suites.

Environments
--
- juno-r2 - arm64
- qemu_arm
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none
Summary


kernel: 4.4.138-rc1
git repo: https://git.linaro.org/lkft/arm64-stable-rc.git
git branch: 4.4.138-rc1-hikey-20180614-218
git commit: 55a4e4dfb0ebf4bbc212a778883e72f06d3735b7
git describe: 4.4.138-rc1-hikey-20180614-218
Test details: https://qa-reports.linaro.org/lkft/ \
linaro-hikey-stable-rc-4.4-oe/build/4.4.138-rc1-hikey-20180614-218
^ Please join URL

No regressions (compared to build 4.4.138-rc1-hikey-20180613-217)


Ran 2629 total tests in the following environments and test suites.

Environments
--
- hi6220-hikey - arm64
- qemu_arm64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* ltp-fs-tests

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH v2] dmaengine: rcar-dmac: don't use DMAC error interrupt

2018-06-14 Thread Kuninori Morimoto



Hi Vinod

Thank you for your feedback

> > From: Kuninori Morimoto 
> > 
> > Current rcar-dmac is using DMAC error interrupt which will handle all
> > channel's error. But in this design, error handling itself will be
> > issue if user want to use virtualization, multi OS, etc.
> > This patch removes current DMAC error interrupt handling, and handle it
> > on each channel interrupt handler.
> 
> So what happens when you are not in virtualization, multi OS..? Who
> handles this interrupt?

Sorry for non clear English
It has 2types of interrupt.
 1) IRQ for Error
 2) IRQ for each channels

If error happens on some channels,
the error IRQ was handled by 1), and "all" channels will be restarted.
Thus, this patch removes 1), and handles each error on 2).

Best regards
---
Kuninori Morimoto

Re: [PATCH 4.4 00/24] 4.4.138-stable review

2018-06-14 Thread Naresh Kamboju

On 14 June 2018 at 19:34, Greg Kroah-Hartman  wrote:
> This is the start of the stable review cycle for the 4.4.138 release.
> There are 24 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
>
> Responses should be made by Sat Jun 16 13:27:15 UTC 2018.
> Anything received after that time might be too late.
>
> The whole patch series can be found in one patch at:
> 
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.138-rc1.gz
> or in the git tree and branch at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.4.y
> and the diffstat can be found below.
>
> thanks,
>
> greg k-h

Results from Linaro’s test farm.
No regressions on arm64, arm and x86_64.

NOTE:
1) ealier reported CVE-2011-2496 failure test got PASS now.
vma03   1  TPASS  :  mremap failed as expected

2) LTP: cve-2015-3290 failed intermittently on qemu_x86_64
https://bugs.linaro.org/show_bug.cgi?id=3910
We will investigate.

Summary


kernel: 4.4.138-rc1
git repo:
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git
git branch: linux-4.4.y
git commit: 64f298340be794f1500c602285b542c4bfd3eb21
git describe: v4.4.137-25-g64f298340be7
Test details: https://qa-reports.linaro.org/lkft/linux-stable-rc-4.4-oe/build \
/v4.4.137-25-g64f298340be7
^ Please join URL

No regressions (compared to build v4.4.137-15-g7d690c56754e)


Ran 7174 total tests in the following environments and test suites.

Environments
--
- juno-r2 - arm64
- qemu_arm
- qemu_x86_64
- x15 - arm
- x86_64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* kselftest-vsyscall-mode-native
* kselftest-vsyscall-mode-none
Summary


kernel: 4.4.138-rc1
git repo: https://git.linaro.org/lkft/arm64-stable-rc.git
git branch: 4.4.138-rc1-hikey-20180614-218
git commit: 55a4e4dfb0ebf4bbc212a778883e72f06d3735b7
git describe: 4.4.138-rc1-hikey-20180614-218
Test details: https://qa-reports.linaro.org/lkft/ \
linaro-hikey-stable-rc-4.4-oe/build/4.4.138-rc1-hikey-20180614-218
^ Please join URL

No regressions (compared to build 4.4.138-rc1-hikey-20180613-217)


Ran 2629 total tests in the following environments and test suites.

Environments
--
- hi6220-hikey - arm64
- qemu_arm64

Test Suites
---
* boot
* kselftest
* libhugetlbfs
* ltp-cap_bounds-tests
* ltp-containers-tests
* ltp-cve-tests
* ltp-fcntl-locktests-tests
* ltp-filecaps-tests
* ltp-fs_bind-tests
* ltp-fs_perms_simple-tests
* ltp-fsx-tests
* ltp-hugetlb-tests
* ltp-io-tests
* ltp-ipc-tests
* ltp-math-tests
* ltp-nptl-tests
* ltp-pty-tests
* ltp-sched-tests
* ltp-securebits-tests
* ltp-syscalls-tests
* ltp-timers-tests
* ltp-fs-tests

-- 
Linaro LKFT
https://lkft.linaro.org

Re: [PATCH v2] dmaengine: rcar-dmac: don't use DMAC error interrupt

2018-06-14 Thread Kuninori Morimoto



Hi Vinod

Thank you for your feedback

> > From: Kuninori Morimoto 
> > 
> > Current rcar-dmac is using DMAC error interrupt which will handle all
> > channel's error. But in this design, error handling itself will be
> > issue if user want to use virtualization, multi OS, etc.
> > This patch removes current DMAC error interrupt handling, and handle it
> > on each channel interrupt handler.
> 
> So what happens when you are not in virtualization, multi OS..? Who
> handles this interrupt?

Sorry for non clear English
It has 2types of interrupt.
 1) IRQ for Error
 2) IRQ for each channels

If error happens on some channels,
the error IRQ was handled by 1), and "all" channels will be restarted.
Thus, this patch removes 1), and handles each error on 2).

Best regards
---
Kuninori Morimoto

Re: [PATCH] doc: add description to dirtytime_expire_seconds

2018-06-14 Thread Yang Shi


ping


Ted,


Any comment is appreciated.


Regards,

Yang



On 5/30/18 4:56 PM, Yang Shi wrote:

commit 1efff914afac8a965ad63817ecf8861a927c2ace ("fs: add
dirtytime_expire_seconds sysctl") introduced dirtytime_expire_seconds
knob, but there is not description about it in
Documentation/sysctl/vm.txt.

Add the description for it.

Cc: Theodore Ts'o 
Signed-off-by: Yang Shi 
---
I didn't dig into the old review discussion about why the description
was not added at the first place. I'm supposed every knob under /proc/sys
should have a brief description.

  Documentation/sysctl/vm.txt | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 17256f2..f4f4f9c 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -27,6 +27,7 @@ Currently, these files are in /proc/sys/vm:
  - dirty_bytes
  - dirty_expire_centisecs
  - dirty_ratio
+- dirtytime_expire_seconds
  - dirty_writeback_centisecs
  - drop_caches
  - extfrag_threshold
@@ -178,6 +179,16 @@ The total available memory is not equal to total system 
memory.
  
  ==
  
+dirtytime_expire_seconds

+
+When a lazytime inode is constantly having its pages dirtied, it with an
+updated timestamp will never get chance to be written out.  This tunable
+is used to define when dirty inode is old enough to be eligible for
+writeback by the kernel flusher threads. And, it is also used as the
+interval to wakeup dirtytime_writeback thread. It is expressed in seconds.
+
+==
+
  dirty_writeback_centisecs
  
  The kernel flusher threads will periodically wake up and write `old' data

Re: [PATCH] doc: add description to dirtytime_expire_seconds

2018-06-14 Thread Yang Shi


ping


Ted,


Any comment is appreciated.


Regards,

Yang



On 5/30/18 4:56 PM, Yang Shi wrote:

commit 1efff914afac8a965ad63817ecf8861a927c2ace ("fs: add
dirtytime_expire_seconds sysctl") introduced dirtytime_expire_seconds
knob, but there is not description about it in
Documentation/sysctl/vm.txt.

Add the description for it.

Cc: Theodore Ts'o 
Signed-off-by: Yang Shi 
---
I didn't dig into the old review discussion about why the description
was not added at the first place. I'm supposed every knob under /proc/sys
should have a brief description.

  Documentation/sysctl/vm.txt | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 17256f2..f4f4f9c 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -27,6 +27,7 @@ Currently, these files are in /proc/sys/vm:
  - dirty_bytes
  - dirty_expire_centisecs
  - dirty_ratio
+- dirtytime_expire_seconds
  - dirty_writeback_centisecs
  - drop_caches
  - extfrag_threshold
@@ -178,6 +179,16 @@ The total available memory is not equal to total system 
memory.
  
  ==
  
+dirtytime_expire_seconds

+
+When a lazytime inode is constantly having its pages dirtied, it with an
+updated timestamp will never get chance to be written out.  This tunable
+is used to define when dirty inode is old enough to be eligible for
+writeback by the kernel flusher threads. And, it is also used as the
+interval to wakeup dirtytime_writeback thread. It is expressed in seconds.
+
+==
+
  dirty_writeback_centisecs
  
  The kernel flusher threads will periodically wake up and write `old' data

[PATCH] autofs - fix directory and symlink access

2018-06-14 Thread Ian Kent

Depending on how it is configured the autofs user space daemon can
leave in use mounts mounted at exit and re-connect to them at start
up. But for this to work best the state of the autofs file system
needs to be left intact over the restart.

Also, at system shutdown, mounts in an autofs file system might be
umounted exposing a mount point trigger for which subsequent access
can lead to a hang. So recent versions of automount(8) now does its
best to set autofs file system mounts catatonic at shutdown.

When autofs file system mounts are catatonic it's currently possible
to create and remove directories and symlinks which can be a problem
at restart, as described above.

So return EACCES in the directory, symlink and unlink methods if the
autofs file system is catatonic.
---
 fs/autofs/root.c |   33 ++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index a3d414150578..782e57b911ab 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -559,6 +559,13 @@ static int autofs_dir_symlink(struct inode *dir,
if (!autofs_oz_mode(sbi))
return -EACCES;
 
+   /* autofs_oz_mode() needs to allow path walks when the
+* autofs mount is catatonic but the state of an autofs
+* file system needs to be preserved over restarts.
+*/
+   if (sbi->catatonic)
+   return -EACCES;
+
BUG_ON(!ino);
 
autofs_clean_ino(ino);
@@ -612,9 +619,15 @@ static int autofs_dir_unlink(struct inode *dir, struct 
dentry *dentry)
struct autofs_info *ino = autofs_dentry_ino(dentry);
struct autofs_info *p_ino;
 
-   /* This allows root to remove symlinks */
-   if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
-   return -EPERM;
+   if (!autofs_oz_mode(sbi))
+   return -EACCES;
+
+   /* autofs_oz_mode() needs to allow path walks when the
+* autofs mount is catatonic but the state of an autofs
+* file system needs to be preserved over restarts.
+*/
+   if (sbi->catatonic)
+   return -EACCES;
 
if (atomic_dec_and_test(>count)) {
p_ino = autofs_dentry_ino(dentry->d_parent);
@@ -697,6 +710,13 @@ static int autofs_dir_rmdir(struct inode *dir, struct 
dentry *dentry)
if (!autofs_oz_mode(sbi))
return -EACCES;
 
+   /* autofs_oz_mode() needs to allow path walks when the
+* autofs mount is catatonic but the state of an autofs
+* file system needs to be preserved over restarts.
+*/
+   if (sbi->catatonic)
+   return -EACCES;
+
spin_lock(>lookup_lock);
if (!simple_empty(dentry)) {
spin_unlock(>lookup_lock);
@@ -735,6 +755,13 @@ static int autofs_dir_mkdir(struct inode *dir,
if (!autofs_oz_mode(sbi))
return -EACCES;
 
+   /* autofs_oz_mode() needs to allow path walks when the
+* autofs mount is catatonic but the state of an autofs
+* file system needs to be preserved over restarts.
+*/
+   if (sbi->catatonic)
+   return -EACCES;
+
pr_debug("dentry %p, creating %pd\n", dentry, dentry);
 
BUG_ON(!ino);

[PATCH] autofs - fix directory and symlink access

2018-06-14 Thread Ian Kent

Depending on how it is configured the autofs user space daemon can
leave in use mounts mounted at exit and re-connect to them at start
up. But for this to work best the state of the autofs file system
needs to be left intact over the restart.

Also, at system shutdown, mounts in an autofs file system might be
umounted exposing a mount point trigger for which subsequent access
can lead to a hang. So recent versions of automount(8) now does its
best to set autofs file system mounts catatonic at shutdown.

When autofs file system mounts are catatonic it's currently possible
to create and remove directories and symlinks which can be a problem
at restart, as described above.

So return EACCES in the directory, symlink and unlink methods if the
autofs file system is catatonic.
---
 fs/autofs/root.c |   33 ++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index a3d414150578..782e57b911ab 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -559,6 +559,13 @@ static int autofs_dir_symlink(struct inode *dir,
if (!autofs_oz_mode(sbi))
return -EACCES;
 
+   /* autofs_oz_mode() needs to allow path walks when the
+* autofs mount is catatonic but the state of an autofs
+* file system needs to be preserved over restarts.
+*/
+   if (sbi->catatonic)
+   return -EACCES;
+
BUG_ON(!ino);
 
autofs_clean_ino(ino);
@@ -612,9 +619,15 @@ static int autofs_dir_unlink(struct inode *dir, struct 
dentry *dentry)
struct autofs_info *ino = autofs_dentry_ino(dentry);
struct autofs_info *p_ino;
 
-   /* This allows root to remove symlinks */
-   if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
-   return -EPERM;
+   if (!autofs_oz_mode(sbi))
+   return -EACCES;
+
+   /* autofs_oz_mode() needs to allow path walks when the
+* autofs mount is catatonic but the state of an autofs
+* file system needs to be preserved over restarts.
+*/
+   if (sbi->catatonic)
+   return -EACCES;
 
if (atomic_dec_and_test(>count)) {
p_ino = autofs_dentry_ino(dentry->d_parent);
@@ -697,6 +710,13 @@ static int autofs_dir_rmdir(struct inode *dir, struct 
dentry *dentry)
if (!autofs_oz_mode(sbi))
return -EACCES;
 
+   /* autofs_oz_mode() needs to allow path walks when the
+* autofs mount is catatonic but the state of an autofs
+* file system needs to be preserved over restarts.
+*/
+   if (sbi->catatonic)
+   return -EACCES;
+
spin_lock(>lookup_lock);
if (!simple_empty(dentry)) {
spin_unlock(>lookup_lock);
@@ -735,6 +755,13 @@ static int autofs_dir_mkdir(struct inode *dir,
if (!autofs_oz_mode(sbi))
return -EACCES;
 
+   /* autofs_oz_mode() needs to allow path walks when the
+* autofs mount is catatonic but the state of an autofs
+* file system needs to be preserved over restarts.
+*/
+   if (sbi->catatonic)
+   return -EACCES;
+
pr_debug("dentry %p, creating %pd\n", dentry, dentry);
 
BUG_ON(!ino);

[PATCH 3/4] MIPS: Wire up the restartable sequences (rseq) syscall

2018-06-14 Thread Paul Burton

Wire up the restartable sequences (rseq) syscall for MIPS. This was
introduced by commit d7822b1e24f2 ("rseq: Introduce restartable
sequences system call") & MIPS now supports the prerequisites.

Signed-off-by: Paul Burton 
Cc: James Hogan 
Cc: Ralf Baechle 
Cc: Mathieu Desnoyers 
Cc: Peter Zijlstra 
Cc: Paul E. McKenney 
Cc: Boqun Feng 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---

 arch/mips/include/uapi/asm/unistd.h | 15 +--
 arch/mips/kernel/scall32-o32.S  |  1 +
 arch/mips/kernel/scall64-64.S   |  1 +
 arch/mips/kernel/scall64-n32.S  |  1 +
 arch/mips/kernel/scall64-o32.S  |  1 +
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/uapi/asm/unistd.h 
b/arch/mips/include/uapi/asm/unistd.h
index bb05e9916a5f..170bf0b5b250 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -388,17 +388,18 @@
 #define __NR_pkey_alloc(__NR_Linux + 364)
 #define __NR_pkey_free (__NR_Linux + 365)
 #define __NR_statx (__NR_Linux + 366)
+#define __NR_rseq  (__NR_Linux + 367)
 
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls366
+#define __NR_Linux_syscalls367
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux 4000
-#define __NR_O32_Linux_syscalls366
+#define __NR_O32_Linux_syscalls367
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -733,16 +734,17 @@
 #define __NR_pkey_alloc(__NR_Linux + 324)
 #define __NR_pkey_free (__NR_Linux + 325)
 #define __NR_statx (__NR_Linux + 326)
+#define __NR_rseq  (__NR_Linux + 327)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls326
+#define __NR_Linux_syscalls327
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux  5000
-#define __NR_64_Linux_syscalls 326
+#define __NR_64_Linux_syscalls 327
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1081,15 +1083,16 @@
 #define __NR_pkey_alloc(__NR_Linux + 328)
 #define __NR_pkey_free (__NR_Linux + 329)
 #define __NR_statx (__NR_Linux + 330)
+#define __NR_rseq  (__NR_Linux + 331)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls330
+#define __NR_Linux_syscalls331
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux 6000
-#define __NR_N32_Linux_syscalls330
+#define __NR_N32_Linux_syscalls331
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index a9a7d78803cd..842ff1612893 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -590,3 +590,4 @@ EXPORT(sys_call_table)
PTR sys_pkey_alloc
PTR sys_pkey_free   /* 4365 */
PTR sys_statx
+   PTR sys_rseq
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 65d5aeeb9bdb..558830d1e5ba 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -439,4 +439,5 @@ EXPORT(sys_call_table)
PTR sys_pkey_alloc
PTR sys_pkey_free   /* 5325 */
PTR sys_statx
+   PTR sys_rseq
.size   sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index cbf190ef9e8a..293f0b0119f3 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -434,4 +434,5 @@ EXPORT(sysn32_call_table)
PTR sys_pkey_alloc
PTR sys_pkey_free
PTR sys_statx   /* 6330 */
+   PTR sys_rseq
.size   sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 9ebe3e2403b1..f13a08de8078 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -583,4 +583,5 @@ EXPORT(sys32_call_table)
PTR sys_pkey_alloc
PTR sys_pkey_free   /* 4365 */
PTR sys_statx
+   PTR sys_rseq
.size   sys32_call_table,.-sys32_call_table
-- 
2.17.1

[PATCH 3/4] MIPS: Wire up the restartable sequences (rseq) syscall

2018-06-14 Thread Paul Burton

Wire up the restartable sequences (rseq) syscall for MIPS. This was
introduced by commit d7822b1e24f2 ("rseq: Introduce restartable
sequences system call") & MIPS now supports the prerequisites.

Signed-off-by: Paul Burton 
Cc: James Hogan 
Cc: Ralf Baechle 
Cc: Mathieu Desnoyers 
Cc: Peter Zijlstra 
Cc: Paul E. McKenney 
Cc: Boqun Feng 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---

 arch/mips/include/uapi/asm/unistd.h | 15 +--
 arch/mips/kernel/scall32-o32.S  |  1 +
 arch/mips/kernel/scall64-64.S   |  1 +
 arch/mips/kernel/scall64-n32.S  |  1 +
 arch/mips/kernel/scall64-o32.S  |  1 +
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/uapi/asm/unistd.h 
b/arch/mips/include/uapi/asm/unistd.h
index bb05e9916a5f..170bf0b5b250 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -388,17 +388,18 @@
 #define __NR_pkey_alloc(__NR_Linux + 364)
 #define __NR_pkey_free (__NR_Linux + 365)
 #define __NR_statx (__NR_Linux + 366)
+#define __NR_rseq  (__NR_Linux + 367)
 
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls366
+#define __NR_Linux_syscalls367
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux 4000
-#define __NR_O32_Linux_syscalls366
+#define __NR_O32_Linux_syscalls367
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -733,16 +734,17 @@
 #define __NR_pkey_alloc(__NR_Linux + 324)
 #define __NR_pkey_free (__NR_Linux + 325)
 #define __NR_statx (__NR_Linux + 326)
+#define __NR_rseq  (__NR_Linux + 327)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls326
+#define __NR_Linux_syscalls327
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux  5000
-#define __NR_64_Linux_syscalls 326
+#define __NR_64_Linux_syscalls 327
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1081,15 +1083,16 @@
 #define __NR_pkey_alloc(__NR_Linux + 328)
 #define __NR_pkey_free (__NR_Linux + 329)
 #define __NR_statx (__NR_Linux + 330)
+#define __NR_rseq  (__NR_Linux + 331)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls330
+#define __NR_Linux_syscalls331
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux 6000
-#define __NR_N32_Linux_syscalls330
+#define __NR_N32_Linux_syscalls331
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index a9a7d78803cd..842ff1612893 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -590,3 +590,4 @@ EXPORT(sys_call_table)
PTR sys_pkey_alloc
PTR sys_pkey_free   /* 4365 */
PTR sys_statx
+   PTR sys_rseq
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 65d5aeeb9bdb..558830d1e5ba 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -439,4 +439,5 @@ EXPORT(sys_call_table)
PTR sys_pkey_alloc
PTR sys_pkey_free   /* 5325 */
PTR sys_statx
+   PTR sys_rseq
.size   sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index cbf190ef9e8a..293f0b0119f3 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -434,4 +434,5 @@ EXPORT(sysn32_call_table)
PTR sys_pkey_alloc
PTR sys_pkey_free
PTR sys_statx   /* 6330 */
+   PTR sys_rseq
.size   sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 9ebe3e2403b1..f13a08de8078 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -583,4 +583,5 @@ EXPORT(sys32_call_table)
PTR sys_pkey_alloc
PTR sys_pkey_free   /* 4365 */
PTR sys_statx
+   PTR sys_rseq
.size   sys32_call_table,.-sys32_call_table
-- 
2.17.1

[PATCH 4/4] rseq/selftests: Implement MIPS support

2018-06-14 Thread Paul Burton

Implement support for both MIPS32 & MIPS64 in the rseq selftests, in
order to sanity check the recently enabled rseq syscall.

The tests all pass on a MIPS Boston development board running either a
MIPS32r2 interAptiv CPU & a MIPS64r6 I6500 CPU, both of which were
configured with 2 cores each of which have 2 hardware threads (VP(E)s) -
ie. 4 CPUs.

Signed-off-by: Paul Burton 
Cc: James Hogan 
Cc: Ralf Baechle 
Cc: Mathieu Desnoyers 
Cc: Peter Zijlstra 
Cc: Paul E. McKenney 
Cc: Boqun Feng 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org

---

 tools/testing/selftests/rseq/param_test.c |  24 +
 tools/testing/selftests/rseq/rseq-mips.h  | 725 ++
 tools/testing/selftests/rseq/rseq.h   |   2 +
 3 files changed, 751 insertions(+)
 create mode 100644 tools/testing/selftests/rseq/rseq-mips.h

diff --git a/tools/testing/selftests/rseq/param_test.c 
b/tools/testing/selftests/rseq/param_test.c
index 6a9f602a8718..615252331813 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -137,6 +137,30 @@ unsigned int yield_mod_cnt, nr_abort;
"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
"bne 222b\n\t" \
"333:\n\t"
+
+#elif defined(__mips__)
+
+#define RSEQ_INJECT_INPUT \
+   , [loop_cnt_1]"m"(loop_cnt[1]) \
+   , [loop_cnt_2]"m"(loop_cnt[2]) \
+   , [loop_cnt_3]"m"(loop_cnt[3]) \
+   , [loop_cnt_4]"m"(loop_cnt[4]) \
+   , [loop_cnt_5]"m"(loop_cnt[5]) \
+   , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "$5"
+
+#define RSEQ_INJECT_CLOBBER \
+   , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+   "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+   "beqz " INJECT_ASM_REG ", 333f\n\t" \
+   "222:\n\t" \
+   "addiu " INJECT_ASM_REG ", -1\n\t" \
+   "bnez " INJECT_ASM_REG ", 222b\n\t" \
+   "333:\n\t"
+
 #else
 #error unsupported target
 #endif
diff --git a/tools/testing/selftests/rseq/rseq-mips.h 
b/tools/testing/selftests/rseq/rseq-mips.h
new file mode 100644
index ..63131352fbdd
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Author: Paul Burton 
+ * (C) Copyright 2018 MIPS Tech LLC
+ *
+ * Based on rseq-arm.h:
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers 
+ */
+
+#define RSEQ_SIG   0x53053053
+
+#define rseq_smp_mb()  __asm__ __volatile__ ("sync" ::: "memory")
+#define rseq_smp_rmb() rseq_smp_mb()
+#define rseq_smp_wmb() rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p)   \
+__extension__ ({   \
+   __typeof(*p) p1 = RSEQ_READ_ONCE(*p);   \
+   rseq_smp_mb();  \
+   p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)   \
+do {   \
+   rseq_smp_mb();  \
+   RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#if _MIPS_SZLONG == 64
+# define LONG  ".dword"
+# define LONG_LA   "dla"
+# define LONG_L"ld"
+# define LONG_S"sd"
+# define LONG_ADDI "daddiu"
+# define U32_U64_PAD(x)x
+#elif _MIPS_SZLONG == 32
+# define LONG  ".word"
+# define LONG_LA   "la"
+# define LONG_L"lw"
+# define LONG_S"sw"
+# define LONG_ADDI "addiu"
+# ifdef __BIG_ENDIAN
+#  define U32_U64_PAD(x)   "0x0, " x
+# else
+#  define U32_U64_PAD(x)   x ", 0x0"
+# endif
+#else
+# error unsupported _MIPS_SZLONG
+#endif
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags,start_ip,   
\
+   post_commit_offset, abort_ip)   
\
+   ".pushsection __rseq_table, \"aw\"\n\t" 
\
+   ".balign 32\n\t"
\
+   ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t"  
\
+   LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t"   
\
+   LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" 
\
+   LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t"   
\
+   ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip)  
\
+   __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, 
\
+

[PATCH 4/4] rseq/selftests: Implement MIPS support

2018-06-14 Thread Paul Burton

Implement support for both MIPS32 & MIPS64 in the rseq selftests, in
order to sanity check the recently enabled rseq syscall.

The tests all pass on a MIPS Boston development board running either a
MIPS32r2 interAptiv CPU & a MIPS64r6 I6500 CPU, both of which were
configured with 2 cores each of which have 2 hardware threads (VP(E)s) -
ie. 4 CPUs.

Signed-off-by: Paul Burton 
Cc: James Hogan 
Cc: Ralf Baechle 
Cc: Mathieu Desnoyers 
Cc: Peter Zijlstra 
Cc: Paul E. McKenney 
Cc: Boqun Feng 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org

---

 tools/testing/selftests/rseq/param_test.c |  24 +
 tools/testing/selftests/rseq/rseq-mips.h  | 725 ++
 tools/testing/selftests/rseq/rseq.h   |   2 +
 3 files changed, 751 insertions(+)
 create mode 100644 tools/testing/selftests/rseq/rseq-mips.h

diff --git a/tools/testing/selftests/rseq/param_test.c 
b/tools/testing/selftests/rseq/param_test.c
index 6a9f602a8718..615252331813 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -137,6 +137,30 @@ unsigned int yield_mod_cnt, nr_abort;
"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
"bne 222b\n\t" \
"333:\n\t"
+
+#elif defined(__mips__)
+
+#define RSEQ_INJECT_INPUT \
+   , [loop_cnt_1]"m"(loop_cnt[1]) \
+   , [loop_cnt_2]"m"(loop_cnt[2]) \
+   , [loop_cnt_3]"m"(loop_cnt[3]) \
+   , [loop_cnt_4]"m"(loop_cnt[4]) \
+   , [loop_cnt_5]"m"(loop_cnt[5]) \
+   , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "$5"
+
+#define RSEQ_INJECT_CLOBBER \
+   , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+   "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+   "beqz " INJECT_ASM_REG ", 333f\n\t" \
+   "222:\n\t" \
+   "addiu " INJECT_ASM_REG ", -1\n\t" \
+   "bnez " INJECT_ASM_REG ", 222b\n\t" \
+   "333:\n\t"
+
 #else
 #error unsupported target
 #endif
diff --git a/tools/testing/selftests/rseq/rseq-mips.h 
b/tools/testing/selftests/rseq/rseq-mips.h
new file mode 100644
index ..63131352fbdd
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Author: Paul Burton 
+ * (C) Copyright 2018 MIPS Tech LLC
+ *
+ * Based on rseq-arm.h:
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers 
+ */
+
+#define RSEQ_SIG   0x53053053
+
+#define rseq_smp_mb()  __asm__ __volatile__ ("sync" ::: "memory")
+#define rseq_smp_rmb() rseq_smp_mb()
+#define rseq_smp_wmb() rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p)   \
+__extension__ ({   \
+   __typeof(*p) p1 = RSEQ_READ_ONCE(*p);   \
+   rseq_smp_mb();  \
+   p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)   \
+do {   \
+   rseq_smp_mb();  \
+   RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#if _MIPS_SZLONG == 64
+# define LONG  ".dword"
+# define LONG_LA   "dla"
+# define LONG_L"ld"
+# define LONG_S"sd"
+# define LONG_ADDI "daddiu"
+# define U32_U64_PAD(x)x
+#elif _MIPS_SZLONG == 32
+# define LONG  ".word"
+# define LONG_LA   "la"
+# define LONG_L"lw"
+# define LONG_S"sw"
+# define LONG_ADDI "addiu"
+# ifdef __BIG_ENDIAN
+#  define U32_U64_PAD(x)   "0x0, " x
+# else
+#  define U32_U64_PAD(x)   x ", 0x0"
+# endif
+#else
+# error unsupported _MIPS_SZLONG
+#endif
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags,start_ip,   
\
+   post_commit_offset, abort_ip)   
\
+   ".pushsection __rseq_table, \"aw\"\n\t" 
\
+   ".balign 32\n\t"
\
+   ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t"  
\
+   LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t"   
\
+   LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" 
\
+   LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t"   
\
+   ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip)  
\
+   __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, 
\
+

[PATCH 1/4] MIPS: Add support for restartable sequences

2018-06-14 Thread Paul Burton

Implement support for restartable sequences on MIPS, which requires 3
simple things:

  - Call rseq_handle_notify_resume() on return to userspace if
TIF_NOTIFY_RESUME is set.

  - Call rseq_signal_deliver() to fixup the pre-signal stack frame when
a signal is delivered whilst executing a restartable sequence
critical section.

  - Select CONFIG_HAVE_RSEQ.

Signed-off-by: Paul Burton 
Cc: James Hogan 
Cc: Ralf Baechle 
Cc: Mathieu Desnoyers 
Cc: Peter Zijlstra 
Cc: Paul E. McKenney 
Cc: Boqun Feng 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---

 arch/mips/Kconfig | 1 +
 arch/mips/kernel/signal.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index fe98e459a416..afe2b0c867ac 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -66,6 +66,7 @@ config MIPS
select HAVE_OPROFILE
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
+   select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
select IRQ_FORCED_THREADING
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 9e224469c788..00f2535d2226 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -801,6 +801,8 @@ static void handle_signal(struct ksignal *ksig, struct 
pt_regs *regs)
regs->regs[0] = 0;  /* Don't deal with this again.  
*/
}
 
+   rseq_signal_deliver(regs);
+
if (sig_uses_siginfo(>ka, abi))
ret = abi->setup_rt_frame(vdso + abi->vdso->off_rt_sigreturn,
  ksig, regs, oldset);
@@ -868,6 +870,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void 
*unused,
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+   rseq_handle_notify_resume(regs);
}
 
user_enter();
-- 
2.17.1

[PATCH 1/4] MIPS: Add support for restartable sequences

2018-06-14 Thread Paul Burton

Implement support for restartable sequences on MIPS, which requires 3
simple things:

  - Call rseq_handle_notify_resume() on return to userspace if
TIF_NOTIFY_RESUME is set.

  - Call rseq_signal_deliver() to fixup the pre-signal stack frame when
a signal is delivered whilst executing a restartable sequence
critical section.

  - Select CONFIG_HAVE_RSEQ.

Signed-off-by: Paul Burton 
Cc: James Hogan 
Cc: Ralf Baechle 
Cc: Mathieu Desnoyers 
Cc: Peter Zijlstra 
Cc: Paul E. McKenney 
Cc: Boqun Feng 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---

 arch/mips/Kconfig | 1 +
 arch/mips/kernel/signal.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index fe98e459a416..afe2b0c867ac 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -66,6 +66,7 @@ config MIPS
select HAVE_OPROFILE
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
+   select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
select IRQ_FORCED_THREADING
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 9e224469c788..00f2535d2226 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -801,6 +801,8 @@ static void handle_signal(struct ksignal *ksig, struct 
pt_regs *regs)
regs->regs[0] = 0;  /* Don't deal with this again.  
*/
}
 
+   rseq_signal_deliver(regs);
+
if (sig_uses_siginfo(>ka, abi))
ret = abi->setup_rt_frame(vdso + abi->vdso->off_rt_sigreturn,
  ksig, regs, oldset);
@@ -868,6 +870,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void 
*unused,
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
+   rseq_handle_notify_resume(regs);
}
 
user_enter();
-- 
2.17.1

[PATCH 2/4] MIPS: Add syscall detection for restartable sequences

2018-06-14 Thread Paul Burton

Syscalls are not allowed inside restartable sequences, so add a call to
rseq_syscall() at the very beginning of the system call exit path when
CONFIG_DEBUG_RSEQ=y. This will help us to detect whether there is a
syscall issued erroneously inside a restartable sequence.

Signed-off-by: Paul Burton 
Cc: James Hogan 
Cc: Ralf Baechle 
Cc: Mathieu Desnoyers 
Cc: Peter Zijlstra 
Cc: Paul E. McKenney 
Cc: Boqun Feng 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---

 arch/mips/kernel/entry.S | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 38a302919e6b..d7de8adcfcc8 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -79,6 +79,10 @@ FEXPORT(ret_from_fork)
jal schedule_tail   # a0 = struct task_struct *prev
 
 FEXPORT(syscall_exit)
+#ifdef CONFIG_DEBUG_RSEQ
+   movea0, sp
+   jal rseq_syscall
+#endif
local_irq_disable   # make sure need_resched and
# signals dont change between
# sampling and return
@@ -141,6 +145,10 @@ work_notifysig:# deal with 
pending signals and
j   resume_userspace_check
 
 FEXPORT(syscall_exit_partial)
+#ifdef CONFIG_DEBUG_RSEQ
+   movea0, sp
+   jal rseq_syscall
+#endif
local_irq_disable   # make sure need_resched doesn't
# change between and return
LONG_L  a2, TI_FLAGS($28)   # current->work
-- 
2.17.1

[PATCH 0/4] MIPS: Restartable sequences (rseq) support

2018-06-14 Thread Paul Burton

This series implements MIPS support for restartable sequences, hooks up
the rseq syscall & implements MIPS support in the rseq selftests.

Applies atop Linus' master as of 2837461dbe6f ("Merge tag 'scsi-fixes'
of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi").

Thanks,
Paul

Paul Burton (4):
  MIPS: Add support for restartable sequences
  MIPS: Add syscall detection for restartable sequences
  MIPS: Wire up the restartable sequences (rseq) syscall
  rseq/selftests: Implement MIPS support

 arch/mips/Kconfig |   1 +
 arch/mips/include/uapi/asm/unistd.h   |  15 +-
 arch/mips/kernel/entry.S  |   8 +
 arch/mips/kernel/scall32-o32.S|   1 +
 arch/mips/kernel/scall64-64.S |   1 +
 arch/mips/kernel/scall64-n32.S|   1 +
 arch/mips/kernel/scall64-o32.S|   1 +
 arch/mips/kernel/signal.c |   3 +
 tools/testing/selftests/rseq/param_test.c |  24 +
 tools/testing/selftests/rseq/rseq-mips.h  | 725 ++
 tools/testing/selftests/rseq/rseq.h   |   2 +
 11 files changed, 776 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/rseq/rseq-mips.h

-- 
2.17.1

[PATCH 0/4] MIPS: Restartable sequences (rseq) support

2018-06-14 Thread Paul Burton

This series implements MIPS support for restartable sequences, hooks up
the rseq syscall & implements MIPS support in the rseq selftests.

Applies atop Linus' master as of 2837461dbe6f ("Merge tag 'scsi-fixes'
of git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi").

Thanks,
Paul

Paul Burton (4):
  MIPS: Add support for restartable sequences
  MIPS: Add syscall detection for restartable sequences
  MIPS: Wire up the restartable sequences (rseq) syscall
  rseq/selftests: Implement MIPS support

 arch/mips/Kconfig |   1 +
 arch/mips/include/uapi/asm/unistd.h   |  15 +-
 arch/mips/kernel/entry.S  |   8 +
 arch/mips/kernel/scall32-o32.S|   1 +
 arch/mips/kernel/scall64-64.S |   1 +
 arch/mips/kernel/scall64-n32.S|   1 +
 arch/mips/kernel/scall64-o32.S|   1 +
 arch/mips/kernel/signal.c |   3 +
 tools/testing/selftests/rseq/param_test.c |  24 +
 tools/testing/selftests/rseq/rseq-mips.h  | 725 ++
 tools/testing/selftests/rseq/rseq.h   |   2 +
 11 files changed, 776 insertions(+), 6 deletions(-)
 create mode 100644 tools/testing/selftests/rseq/rseq-mips.h

-- 
2.17.1

[PATCH 2/4] MIPS: Add syscall detection for restartable sequences

2018-06-14 Thread Paul Burton

Syscalls are not allowed inside restartable sequences, so add a call to
rseq_syscall() at the very beginning of the system call exit path when
CONFIG_DEBUG_RSEQ=y. This will help us to detect whether there is a
syscall issued erroneously inside a restartable sequence.

Signed-off-by: Paul Burton 
Cc: James Hogan 
Cc: Ralf Baechle 
Cc: Mathieu Desnoyers 
Cc: Peter Zijlstra 
Cc: Paul E. McKenney 
Cc: Boqun Feng 
Cc: linux-m...@linux-mips.org
Cc: linux-kernel@vger.kernel.org
---

 arch/mips/kernel/entry.S | 8 
 1 file changed, 8 insertions(+)

diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 38a302919e6b..d7de8adcfcc8 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -79,6 +79,10 @@ FEXPORT(ret_from_fork)
jal schedule_tail   # a0 = struct task_struct *prev
 
 FEXPORT(syscall_exit)
+#ifdef CONFIG_DEBUG_RSEQ
+   movea0, sp
+   jal rseq_syscall
+#endif
local_irq_disable   # make sure need_resched and
# signals dont change between
# sampling and return
@@ -141,6 +145,10 @@ work_notifysig:# deal with 
pending signals and
j   resume_userspace_check
 
 FEXPORT(syscall_exit_partial)
+#ifdef CONFIG_DEBUG_RSEQ
+   movea0, sp
+   jal rseq_syscall
+#endif
local_irq_disable   # make sure need_resched doesn't
# change between and return
LONG_L  a2, TI_FLAGS($28)   # current->work
-- 
2.17.1

Re: [PATCH] drivers/of: Add devm_of_iomap()

2018-06-14 Thread Benjamin Herrenschmidt

On Thu, 2018-06-14 at 10:27 +0200, Geert Uytterhoeven wrote:
> > --- a/include/linux/of_address.h
> > +++ b/include/linux/of_address.h
> > @@ -40,6 +40,11 @@ extern void __iomem *of_iomap(struct device_node 
> > *device, int index);
> >   void __iomem *of_io_request_and_map(struct device_node *device,
> >  int index, const char *name);
> > 
> > +/* Request and map, wrapper on devm_ioremap_resource */
> > +extern void __iomem *devm_of_iomap(struct device *dev,
> > +  struct device_node *node, int index,
> > +  resource_size_t *size);
> > +
> >   /* Extract an address from a device, returns the region size and
> >* the address space flags too. The PCI version uses a BAR number
> >* instead of an absolute index
> 
> Do you need a dummy for !CONFIG_OF_ADDRESS, to aid compile-testing?

I didn't think so, as of_address_to_resource() already has a dummy, so
it should build fine.

Cheers,
Ben.

Re: [PATCH] drivers/of: Add devm_of_iomap()

2018-06-14 Thread Benjamin Herrenschmidt

On Thu, 2018-06-14 at 10:27 +0200, Geert Uytterhoeven wrote:
> > --- a/include/linux/of_address.h
> > +++ b/include/linux/of_address.h
> > @@ -40,6 +40,11 @@ extern void __iomem *of_iomap(struct device_node 
> > *device, int index);
> >   void __iomem *of_io_request_and_map(struct device_node *device,
> >  int index, const char *name);
> > 
> > +/* Request and map, wrapper on devm_ioremap_resource */
> > +extern void __iomem *devm_of_iomap(struct device *dev,
> > +  struct device_node *node, int index,
> > +  resource_size_t *size);
> > +
> >   /* Extract an address from a device, returns the region size and
> >* the address space flags too. The PCI version uses a BAR number
> >* instead of an absolute index
> 
> Do you need a dummy for !CONFIG_OF_ADDRESS, to aid compile-testing?

I didn't think so, as of_address_to_resource() already has a dummy, so
it should build fine.

Cheers,
Ben.

Re: [PATCH 1/5] KVM: hyperv: define VP assist page helpers

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> From: Ladi Prosek 
> 
> The state related to the VP assist page is still managed by the LAPIC
> code in the pv_eoi field.
> 
> Signed-off-by: Ladi Prosek 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/kvm/hyperv.c | 23 +--
>  arch/x86/kvm/hyperv.h |  4 
>  arch/x86/kvm/lapic.c  |  4 ++--
>  arch/x86/kvm/lapic.h  |  2 +-
>  arch/x86/kvm/x86.c|  2 +-
>  5 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 14e0d0ae4e0a..fdf659ca6167 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -688,6 +688,24 @@ void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
>   stimer_cleanup(_vcpu->stimer[i]);
>  }
>  
> +bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
> +{
> + if (!(vcpu->arch.hyperv.hv_vapic &
> HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
> + return false;
> + return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
> +}
> +EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
> +
> +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
> + struct hv_vp_assist_page *assist_page)
> +{
> + if (!kvm_hv_assist_page_enabled(vcpu))
> + return false;
> + return !kvm_read_guest_cached(vcpu->kvm, >arch.pv_eoi.data,
> +   assist_page, sizeof(*assist_page));
> +}
> +EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
> +
>  static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
>  {
>   struct hv_message *msg = >msg;
> @@ -1048,7 +1066,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu,
> u32 msr, u64 data, bool host)
>  
>   if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
>   hv->hv_vapic = data;
> - if (kvm_lapic_enable_pv_eoi(vcpu, 0))
> + if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
>   return 1;
>   break;
>   }
> @@ -1061,7 +1079,8 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu,
> u32 msr, u64 data, bool host)
>   hv->hv_vapic = data;
>   kvm_vcpu_mark_page_dirty(vcpu, gfn);
>   if (kvm_lapic_enable_pv_eoi(vcpu,
> - gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
> + gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
> + sizeof(struct hv_vp_assist_page)))
>   return 1;
>   break;
>   }
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 837465d69c6d..db825bb7efc7 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -62,6 +62,10 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
>  void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
>  void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
>  
> +bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
> +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
> + struct hv_vp_assist_page *assist_page);
> +
>  static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct
> kvm_vcpu *vcpu,
>   int timer_index)
>  {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 776391cf69a5..b6d6a36f1a33 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2540,7 +2540,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu,
> u32 reg, u64 *data)
>   return 0;
>  }
>  
> -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
> +int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned
> long len)
>  {
>   u64 addr = data & ~KVM_MSR_ENABLED;
>   if (!IS_ALIGNED(addr, 4))
> @@ -2550,7 +2550,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu
> *vcpu, u64 data)
>   if (!pv_eoi_enabled(vcpu))
>   return 0;
>   return kvm_gfn_to_hva_cache_init(vcpu->kvm,
> >arch.pv_eoi.data,
> -  addr, sizeof(u8));
> +  addr, len);
>  }
>  
>  void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index ed0ed39abd36..ff6ef9c3d760 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -120,7 +120,7 @@ static inline bool
> kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
>   return vcpu->arch.hyperv.hv_vapic &
> HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
>  }
>  
> -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
> +int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned
> long len);
>  void kvm_lapic_init(void);
>  void kvm_lapic_exit(void);
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 06dd4cdb2ca8..a57766b940a5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2442,7 +2442,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
>  
>

Re: [PATCH 1/5] KVM: hyperv: define VP assist page helpers

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> From: Ladi Prosek 
> 
> The state related to the VP assist page is still managed by the LAPIC
> code in the pv_eoi field.
> 
> Signed-off-by: Ladi Prosek 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/kvm/hyperv.c | 23 +--
>  arch/x86/kvm/hyperv.h |  4 
>  arch/x86/kvm/lapic.c  |  4 ++--
>  arch/x86/kvm/lapic.h  |  2 +-
>  arch/x86/kvm/x86.c|  2 +-
>  5 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 14e0d0ae4e0a..fdf659ca6167 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -688,6 +688,24 @@ void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
>   stimer_cleanup(_vcpu->stimer[i]);
>  }
>  
> +bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
> +{
> + if (!(vcpu->arch.hyperv.hv_vapic &
> HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
> + return false;
> + return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
> +}
> +EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
> +
> +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
> + struct hv_vp_assist_page *assist_page)
> +{
> + if (!kvm_hv_assist_page_enabled(vcpu))
> + return false;
> + return !kvm_read_guest_cached(vcpu->kvm, >arch.pv_eoi.data,
> +   assist_page, sizeof(*assist_page));
> +}
> +EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
> +
>  static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
>  {
>   struct hv_message *msg = >msg;
> @@ -1048,7 +1066,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu,
> u32 msr, u64 data, bool host)
>  
>   if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
>   hv->hv_vapic = data;
> - if (kvm_lapic_enable_pv_eoi(vcpu, 0))
> + if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
>   return 1;
>   break;
>   }
> @@ -1061,7 +1079,8 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu,
> u32 msr, u64 data, bool host)
>   hv->hv_vapic = data;
>   kvm_vcpu_mark_page_dirty(vcpu, gfn);
>   if (kvm_lapic_enable_pv_eoi(vcpu,
> - gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
> + gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
> + sizeof(struct hv_vp_assist_page)))
>   return 1;
>   break;
>   }
> diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
> index 837465d69c6d..db825bb7efc7 100644
> --- a/arch/x86/kvm/hyperv.h
> +++ b/arch/x86/kvm/hyperv.h
> @@ -62,6 +62,10 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
>  void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
>  void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
>  
> +bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
> +bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
> + struct hv_vp_assist_page *assist_page);
> +
>  static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct
> kvm_vcpu *vcpu,
>   int timer_index)
>  {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 776391cf69a5..b6d6a36f1a33 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2540,7 +2540,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu,
> u32 reg, u64 *data)
>   return 0;
>  }
>  
> -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
> +int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned
> long len)
>  {
>   u64 addr = data & ~KVM_MSR_ENABLED;
>   if (!IS_ALIGNED(addr, 4))
> @@ -2550,7 +2550,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu
> *vcpu, u64 data)
>   if (!pv_eoi_enabled(vcpu))
>   return 0;
>   return kvm_gfn_to_hva_cache_init(vcpu->kvm,
> >arch.pv_eoi.data,
> -  addr, sizeof(u8));
> +  addr, len);
>  }
>  
>  void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index ed0ed39abd36..ff6ef9c3d760 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -120,7 +120,7 @@ static inline bool
> kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
>   return vcpu->arch.hyperv.hv_vapic &
> HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
>  }
>  
> -int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
> +int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned
> long len);
>  void kvm_lapic_init(void);
>  void kvm_lapic_exit(void);
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 06dd4cdb2ca8..a57766b940a5 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -2442,7 +2442,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu,
> struct msr_data *msr_info)
>  
>

Re: [PATCH 5/5] KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> When Enlightened VMCS is in use by L1 hypervisor we can avoid
> vmwriting
> VMCS fields which did not change.
> 
> Our first goal is to achieve minimal impact on traditional VMCS case
> so
> we're not wrapping each vmwrite() with an if-changed checker. We also
> can't
> utilize static keys as Enlightened VMCS usage is per-guest.
> 
> This patch implements the simpliest solution: checking fields in
> groups.
> We skip single vmwrite() statements as doing the check will cost us
> something even in non-evmcs case and the win is tiny. Unfortunately,
> this
> makes prepare_vmcs02_full{,_full}() code Enlightened VMCS-dependent
> (and
> a bit ugly).
> 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/kvm/vmx.c | 143
> ++---
>  1 file changed, 82 insertions(+), 61 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6802ba91468c..9a7d76c5c92b 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -11619,50 +11619,79 @@ static int nested_vmx_load_cr3(struct
> kvm_vcpu *vcpu, unsigned long cr3, bool ne
>   return 0;
>  }
>  
> +/*
> + * Check if L1 hypervisor changed the particular field in
> Enlightened
> + * VMCS and avoid redundant vmwrite if it didn't. Can only be used
> when
> + * the value we're about to write is unchanged vmcs12->field.
> + */
> +#define evmcs_needs_write(vmx, clean_field)
> ((vmx)->nested.dirty_vmcs12 ||\
> + !(vmx->nested.hv_evmcs->hv_clean_fields &\
> +   HV_VMX_ENLIGHTENED_CLEAN_FIELD_##clean_field))

Why declare this is a macro instead of an static inline small function?
Just to shorten the name of the clean-field constant?

> +
>  static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12
> *vmcs12)
>  {
>   struct vcpu_vmx *vmx = to_vmx(vcpu);
> + struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
> +
> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
> + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> + vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
> + vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
> + vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
> + vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
> + vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
> + vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
> + vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
> + vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
> + vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
> + vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
> + vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
> + vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
> + vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
> + vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
> + vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
> + vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
> + vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
> + vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
> + vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
> + vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
> + vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
> + vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
> + vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
> + vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
> + vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
> + vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
> + vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
> + vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
> + vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
> + vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
> + }
> +
> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP1)) {
> + vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
> + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
> + vmcs12->guest_pending_dbg_exceptions);
> + vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
> + vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
> +
> + if (vmx_mpx_supported())
> + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>  
> - vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> - vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
> - vmcs_write16(GUEST_DS_SELECTOR,

Re: [PATCH 5/5] KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> When Enlightened VMCS is in use by L1 hypervisor we can avoid
> vmwriting
> VMCS fields which did not change.
> 
> Our first goal is to achieve minimal impact on traditional VMCS case
> so
> we're not wrapping each vmwrite() with an if-changed checker. We also
> can't
> utilize static keys as Enlightened VMCS usage is per-guest.
> 
> This patch implements the simpliest solution: checking fields in
> groups.
> We skip single vmwrite() statements as doing the check will cost us
> something even in non-evmcs case and the win is tiny. Unfortunately,
> this
> makes prepare_vmcs02_full{,_full}() code Enlightened VMCS-dependent
> (and
> a bit ugly).
> 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/kvm/vmx.c | 143
> ++---
>  1 file changed, 82 insertions(+), 61 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6802ba91468c..9a7d76c5c92b 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -11619,50 +11619,79 @@ static int nested_vmx_load_cr3(struct
> kvm_vcpu *vcpu, unsigned long cr3, bool ne
>   return 0;
>  }
>  
> +/*
> + * Check if L1 hypervisor changed the particular field in
> Enlightened
> + * VMCS and avoid redundant vmwrite if it didn't. Can only be used
> when
> + * the value we're about to write is unchanged vmcs12->field.
> + */
> +#define evmcs_needs_write(vmx, clean_field)
> ((vmx)->nested.dirty_vmcs12 ||\
> + !(vmx->nested.hv_evmcs->hv_clean_fields &\
> +   HV_VMX_ENLIGHTENED_CLEAN_FIELD_##clean_field))

Why declare this is a macro instead of an static inline small function?
Just to shorten the name of the clean-field constant?

> +
>  static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12
> *vmcs12)
>  {
>   struct vcpu_vmx *vmx = to_vmx(vcpu);
> + struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
> +
> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
> + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> + vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
> + vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
> + vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
> + vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
> + vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
> + vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
> + vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
> + vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
> + vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
> + vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
> + vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
> + vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
> + vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
> + vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
> + vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
> + vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
> + vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
> + vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
> + vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
> + vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
> + vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
> + vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
> + vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
> + vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
> + vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
> + vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
> + vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
> + vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
> + vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
> + vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
> + }
> +
> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP1)) {
> + vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
> + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
> + vmcs12->guest_pending_dbg_exceptions);
> + vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
> + vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
> +
> + if (vmx_mpx_supported())
> + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>  
> - vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> - vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
> - vmcs_write16(GUEST_DS_SELECTOR,

Re: [PATCH 3/5] KVM: nVMX: add enlightened VMCS state

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> Adds hv_evmcs pointer and implement copy_enlightened_to_vmcs12() and
> copy_enlightened_to_vmcs12().
> 
> prepare_vmcs02()/prepare_vmcs02_full() separation is not valid for
> Enlightened VMCS, do full sync for now.
> 
> Suggested-by: Ladi Prosek 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/kvm/vmx.c | 431
> +++--
>  1 file changed, 417 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 51749207cef1..e7fa9f9c6e36 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -640,10 +640,10 @@ struct nested_vmx {
>*/
>   struct vmcs12 *cached_vmcs12;
>   /*
> -  * Indicates if the shadow vmcs must be updated with the
> -  * data hold by vmcs12
> +  * Indicates if the shadow vmcs or enlightened vmcs must be updated
> +  * with the data held by struct vmcs12.
>*/
> - bool sync_shadow_vmcs;
> + bool need_vmcs12_sync;
>   bool dirty_vmcs12;
>  
>   bool change_vmcs01_virtual_apic_mode;
> @@ -689,6 +689,8 @@ struct nested_vmx {
>   /* in guest mode on SMM entry? */
>   bool guest_mode;
>   } smm;
> +
> + struct hv_enlightened_vmcs *hv_evmcs;
>  };
>  
>  #define POSTED_INTR_ON  0
> @@ -8010,7 +8012,7 @@ static inline void nested_release_vmcs12(struct
> vcpu_vmx *vmx)
>   /* copy to memory all shadowed fields in case
>  they were modified */
>   copy_shadow_to_vmcs12(vmx);
> - vmx->nested.sync_shadow_vmcs = false;
> + vmx->nested.need_vmcs12_sync = false;
>   vmx_disable_shadow_vmcs(vmx);
>   }
>   vmx->nested.posted_intr_nv = -1;
> @@ -8187,6 +8189,393 @@ static inline int vmcs12_write_any(struct
> kvm_vcpu *vcpu,
>  
>  }
>  
> +static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, bool
> full)
> +{
> + struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
> + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
> +
> + /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
> + vmcs12->tpr_threshold = evmcs->tpr_threshold;
> + vmcs12->guest_rip = evmcs->guest_rip;
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
> + vmcs12->guest_rsp = evmcs->guest_rsp;
> + vmcs12->guest_rflags = evmcs->guest_rflags;
> + vmcs12->guest_interruptibility_info =
> + evmcs->guest_interruptibility_info;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
> + vmcs12->cpu_based_vm_exec_control =
> + evmcs->cpu_based_vm_exec_control;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
> + vmcs12->exception_bitmap = evmcs->exception_bitmap;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
> + vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
> + vmcs12->vm_entry_intr_info_field =
> + evmcs->vm_entry_intr_info_field;
> + vmcs12->vm_entry_exception_error_code =
> + evmcs->vm_entry_exception_error_code;
> + vmcs12->vm_entry_instruction_len =
> + evmcs->vm_entry_instruction_len;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +   HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
> + vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
> + vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
> + vmcs12->host_cr0 = evmcs->host_cr0;
> + vmcs12->host_cr3 = evmcs->host_cr3;
> + vmcs12->host_cr4 = evmcs->host_cr4;
> + vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
> + vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
> + vmcs12->host_rip = evmcs->host_rip;
> + vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
> + vmcs12->host_es_selector = evmcs->host_es_selector;
> + vmcs12->host_cs_selector = evmcs->host_cs_selector;
> + vmcs12->host_ss_selector = evmcs->host_ss_selector;
> + vmcs12->host_ds_selector = evmcs->host_ds_selector;
> + vmcs12->host_fs_selector = evmcs->host_fs_selector;
> + vmcs12->host_gs_selector = evmcs->host_gs_selector;
> + vmcs12->host_tr_selector = evmcs->host_tr_selector;
> + }
> +
> + if

Re: [PATCH 3/5] KVM: nVMX: add enlightened VMCS state

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> Adds hv_evmcs pointer and implement copy_enlightened_to_vmcs12() and
> copy_enlightened_to_vmcs12().
> 
> prepare_vmcs02()/prepare_vmcs02_full() separation is not valid for
> Enlightened VMCS, do full sync for now.
> 
> Suggested-by: Ladi Prosek 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/kvm/vmx.c | 431
> +++--
>  1 file changed, 417 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 51749207cef1..e7fa9f9c6e36 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -640,10 +640,10 @@ struct nested_vmx {
>*/
>   struct vmcs12 *cached_vmcs12;
>   /*
> -  * Indicates if the shadow vmcs must be updated with the
> -  * data hold by vmcs12
> +  * Indicates if the shadow vmcs or enlightened vmcs must be updated
> +  * with the data held by struct vmcs12.
>*/
> - bool sync_shadow_vmcs;
> + bool need_vmcs12_sync;
>   bool dirty_vmcs12;
>  
>   bool change_vmcs01_virtual_apic_mode;
> @@ -689,6 +689,8 @@ struct nested_vmx {
>   /* in guest mode on SMM entry? */
>   bool guest_mode;
>   } smm;
> +
> + struct hv_enlightened_vmcs *hv_evmcs;
>  };
>  
>  #define POSTED_INTR_ON  0
> @@ -8010,7 +8012,7 @@ static inline void nested_release_vmcs12(struct
> vcpu_vmx *vmx)
>   /* copy to memory all shadowed fields in case
>  they were modified */
>   copy_shadow_to_vmcs12(vmx);
> - vmx->nested.sync_shadow_vmcs = false;
> + vmx->nested.need_vmcs12_sync = false;
>   vmx_disable_shadow_vmcs(vmx);
>   }
>   vmx->nested.posted_intr_nv = -1;
> @@ -8187,6 +8189,393 @@ static inline int vmcs12_write_any(struct
> kvm_vcpu *vcpu,
>  
>  }
>  
> +static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, bool
> full)
> +{
> + struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
> + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
> +
> + /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
> + vmcs12->tpr_threshold = evmcs->tpr_threshold;
> + vmcs12->guest_rip = evmcs->guest_rip;
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
> + vmcs12->guest_rsp = evmcs->guest_rsp;
> + vmcs12->guest_rflags = evmcs->guest_rflags;
> + vmcs12->guest_interruptibility_info =
> + evmcs->guest_interruptibility_info;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
> + vmcs12->cpu_based_vm_exec_control =
> + evmcs->cpu_based_vm_exec_control;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
> + vmcs12->exception_bitmap = evmcs->exception_bitmap;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
> + vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
> + vmcs12->vm_entry_intr_info_field =
> + evmcs->vm_entry_intr_info_field;
> + vmcs12->vm_entry_exception_error_code =
> + evmcs->vm_entry_exception_error_code;
> + vmcs12->vm_entry_instruction_len =
> + evmcs->vm_entry_instruction_len;
> + }
> +
> + if (unlikely(full || !(evmcs->hv_clean_fields &
> +   HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
> + vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
> + vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
> + vmcs12->host_cr0 = evmcs->host_cr0;
> + vmcs12->host_cr3 = evmcs->host_cr3;
> + vmcs12->host_cr4 = evmcs->host_cr4;
> + vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
> + vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
> + vmcs12->host_rip = evmcs->host_rip;
> + vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
> + vmcs12->host_es_selector = evmcs->host_es_selector;
> + vmcs12->host_cs_selector = evmcs->host_cs_selector;
> + vmcs12->host_ss_selector = evmcs->host_ss_selector;
> + vmcs12->host_ds_selector = evmcs->host_ds_selector;
> + vmcs12->host_fs_selector = evmcs->host_fs_selector;
> + vmcs12->host_gs_selector = evmcs->host_gs_selector;
> + vmcs12->host_tr_selector = evmcs->host_tr_selector;
> + }
> +
> + if

[PATCH] selftests: zram: return Kselftest Skip code for skipped tests

2018-06-14 Thread Shuah Khan (Samsung OSG)

When zram test is skipped because of unmet dependencies and/or
unsupported configuration, it exits with error which is treated as
a fail by the Kselftest framework. This leads to false negative result
even when the test could not be run.

Change it to return kselftest skip code when a test gets skipped to
clearly report that the test could not be run.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) 
---
 tools/testing/selftests/zram/zram.sh | 5 -
 tools/testing/selftests/zram/zram_lib.sh | 5 -
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/zram/zram.sh 
b/tools/testing/selftests/zram/zram.sh
index 754de7da426a..232e958ec454 100755
--- a/tools/testing/selftests/zram/zram.sh
+++ b/tools/testing/selftests/zram/zram.sh
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 TCID="zram.sh"
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./zram_lib.sh
 
 run_zram () {
@@ -24,5 +27,5 @@ elif [ -b /dev/zram0 ]; then
 else
echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
echo "$TCID : CONFIG_ZRAM is not set"
-   exit 1
+   exit $ksft_skip
 fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh 
b/tools/testing/selftests/zram/zram_lib.sh
index f6a9c73e7a44..9e73a4fb9b0a 100755
--- a/tools/testing/selftests/zram/zram_lib.sh
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -18,6 +18,9 @@ MODULE=0
 dev_makeswap=-1
 dev_mounted=-1
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 trap INT
 
 check_prereqs()
@@ -27,7 +30,7 @@ check_prereqs()
 
if [ $uid -ne 0 ]; then
echo $msg must be run as root >&2
-   exit 0
+   exit $ksft_skip
fi
 }
 
-- 
2.17.0

[PATCH] selftests: zram: return Kselftest Skip code for skipped tests

2018-06-14 Thread Shuah Khan (Samsung OSG)

When zram test is skipped because of unmet dependencies and/or
unsupported configuration, it exits with error which is treated as
a fail by the Kselftest framework. This leads to false negative result
even when the test could not be run.

Change it to return kselftest skip code when a test gets skipped to
clearly report that the test could not be run.

Kselftest framework SKIP code is 4 and the framework prints appropriate
messages to indicate that the test is skipped.

Signed-off-by: Shuah Khan (Samsung OSG) 
---
 tools/testing/selftests/zram/zram.sh | 5 -
 tools/testing/selftests/zram/zram_lib.sh | 5 -
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/zram/zram.sh 
b/tools/testing/selftests/zram/zram.sh
index 754de7da426a..232e958ec454 100755
--- a/tools/testing/selftests/zram/zram.sh
+++ b/tools/testing/selftests/zram/zram.sh
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 TCID="zram.sh"
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./zram_lib.sh
 
 run_zram () {
@@ -24,5 +27,5 @@ elif [ -b /dev/zram0 ]; then
 else
echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
echo "$TCID : CONFIG_ZRAM is not set"
-   exit 1
+   exit $ksft_skip
 fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh 
b/tools/testing/selftests/zram/zram_lib.sh
index f6a9c73e7a44..9e73a4fb9b0a 100755
--- a/tools/testing/selftests/zram/zram_lib.sh
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -18,6 +18,9 @@ MODULE=0
 dev_makeswap=-1
 dev_mounted=-1
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 trap INT
 
 check_prereqs()
@@ -27,7 +30,7 @@ check_prereqs()
 
if [ $uid -ne 0 ]; then
echo $msg must be run as root >&2
-   exit 0
+   exit $ksft_skip
fi
 }
 
-- 
2.17.0

Re: [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> Per Hyper-V TLFS 5.0b:
> 
> "The L1 hypervisor may choose to use enlightened VMCSs by writing 1
> to
> the corresponding field in the VP assist page (see section 7.8.7).
> Another field in the VP assist page controls the currently active
> enlightened VMCS. Each enlightened VMCS is exactly one page (4 KB) in
> size and must be initially zeroed. No VMPTRLD instruction must be
> executed to make an enlightened VMCS active or current.
> 
> After the L1 hypervisor performs a VM entry with an enlightened VMCS,
> the VMCS is considered active on the processor. An enlightened VMCS
> can only be active on a single processor at the same time. The L1
> hypervisor can execute a VMCLEAR instruction to transition an
> enlightened VMCS from the active to the non-active state. Any VMREAD
> or VMWRITE instructions while an enlightened VMCS is active is
> unsupported and can result in unexpected behavior."
> 
> Keep Enlightened VMCS structure for the current L2 guest permanently
> mapped
> from struct nested_vmx instead of mapping it every time.
> 
> Suggested-by: Ladi Prosek 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/kvm/vmx.c | 98
> ++
>  1 file changed, 91 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index e7fa9f9c6e36..6802ba91468c 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -20,6 +20,7 @@
>  #include "mmu.h"
>  #include "cpuid.h"
>  #include "lapic.h"
> +#include "hyperv.h"
>  
>  #include 
>  #include 
> @@ -690,6 +691,8 @@ struct nested_vmx {
>   bool guest_mode;
>   } smm;
>  
> + gpa_t hv_evmcs_vmptr;
> + struct page *hv_evmcs_page;
>   struct hv_enlightened_vmcs *hv_evmcs;
>  };
>  
> @@ -7695,7 +7698,9 @@ static void nested_vmx_failInvalid(struct
> kvm_vcpu *vcpu)
>  static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
>   u32 vm_instruction_error)
>  {
> - if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> + if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs) {
>   /*
>* failValid writes the error number to the current VMCS, which
>* can't be done there isn't a current VMCS.
> @@ -8003,6 +8008,18 @@ static void vmx_disable_shadow_vmcs(struct
> vcpu_vmx *vmx)
>   vmcs_write64(VMCS_LINK_POINTER, -1ull);
>  }
>  
> +static inline void nested_release_evmcs(struct vcpu_vmx *vmx)
> +{
> + if (!vmx->nested.hv_evmcs)
> + return;
> +
> + kunmap(vmx->nested.hv_evmcs_page);
> + kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
> + vmx->nested.hv_evmcs_vmptr = -1ull;
> + vmx->nested.hv_evmcs_page = NULL;
> + vmx->nested.hv_evmcs = NULL;
> +}
> +
>  static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>  {
>   if (vmx->nested.current_vmptr == -1ull)
> @@ -8062,6 +8079,8 @@ static void free_nested(struct vcpu_vmx *vmx)
>   vmx->nested.pi_desc = NULL;
>   }
>  
> + nested_release_evmcs(vmx);
> +
>   free_loaded_vmcs(>nested.vmcs02);
>  }
>  
> @@ -8098,12 +8117,18 @@ static int handle_vmclear(struct kvm_vcpu
> *vcpu)
>   return kvm_skip_emulated_instruction(vcpu);
>   }
>  
> - if (vmptr == vmx->nested.current_vmptr)
> - nested_release_vmcs12(vmx);
> + if (vmx->nested.hv_evmcs_page) {
> + if (vmptr == vmx->nested.hv_evmcs_vmptr)
> + nested_release_evmcs(vmx);
> + } else {
> + if (vmptr == vmx->nested.current_vmptr)
> + nested_release_vmcs12(vmx);
>  
> - kvm_vcpu_write_guest(vcpu,
> - vmptr + offsetof(struct vmcs12, launch_state),
> - , sizeof(zero));
> + kvm_vcpu_write_guest(vcpu,
> +  vmptr + offsetof(struct vmcs12,
> +   launch_state),
> +  , sizeof(zero));
> + }
>  
>   nested_vmx_succeed(vcpu);
>   return kvm_skip_emulated_instruction(vcpu);
> @@ -8814,6 +8839,10 @@ static int handle_vmptrld(struct kvm_vcpu
> *vcpu)
>   return kvm_skip_emulated_instruction(vcpu);
>   }
>  
> + /* Forbid normal VMPTRLD if Enlightened version was used */
> + if (vmx->nested.hv_evmcs)
> + return 1;
> +
>   if (vmx->nested.current_vmptr != vmptr) {
>   struct vmcs12 *new_vmcs12;
>   struct page *page;
> @@ -8847,6 +8876,55 @@ static int handle_vmptrld(struct kvm_vcpu
> *vcpu)
>   return kvm_skip_emulated_instruction(vcpu);
>  }
>  
> +/*
> + * This is an equivalent of the nested hypervisor executing the
> vmptrld
> + * instruction.
> + */
> +static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu
> *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +

Re: [PATCH 4/5] KVM: nVMX: implement enlightened VMPTRLD and VMCLEAR

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> Per Hyper-V TLFS 5.0b:
> 
> "The L1 hypervisor may choose to use enlightened VMCSs by writing 1
> to
> the corresponding field in the VP assist page (see section 7.8.7).
> Another field in the VP assist page controls the currently active
> enlightened VMCS. Each enlightened VMCS is exactly one page (4 KB) in
> size and must be initially zeroed. No VMPTRLD instruction must be
> executed to make an enlightened VMCS active or current.
> 
> After the L1 hypervisor performs a VM entry with an enlightened VMCS,
> the VMCS is considered active on the processor. An enlightened VMCS
> can only be active on a single processor at the same time. The L1
> hypervisor can execute a VMCLEAR instruction to transition an
> enlightened VMCS from the active to the non-active state. Any VMREAD
> or VMWRITE instructions while an enlightened VMCS is active is
> unsupported and can result in unexpected behavior."
> 
> Keep Enlightened VMCS structure for the current L2 guest permanently
> mapped
> from struct nested_vmx instead of mapping it every time.
> 
> Suggested-by: Ladi Prosek 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/kvm/vmx.c | 98
> ++
>  1 file changed, 91 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index e7fa9f9c6e36..6802ba91468c 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -20,6 +20,7 @@
>  #include "mmu.h"
>  #include "cpuid.h"
>  #include "lapic.h"
> +#include "hyperv.h"
>  
>  #include 
>  #include 
> @@ -690,6 +691,8 @@ struct nested_vmx {
>   bool guest_mode;
>   } smm;
>  
> + gpa_t hv_evmcs_vmptr;
> + struct page *hv_evmcs_page;
>   struct hv_enlightened_vmcs *hv_evmcs;
>  };
>  
> @@ -7695,7 +7698,9 @@ static void nested_vmx_failInvalid(struct
> kvm_vcpu *vcpu)
>  static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
>   u32 vm_instruction_error)
>  {
> - if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> + if (vmx->nested.current_vmptr == -1ull && !vmx->nested.hv_evmcs) {
>   /*
>* failValid writes the error number to the current VMCS, which
>* can't be done there isn't a current VMCS.
> @@ -8003,6 +8008,18 @@ static void vmx_disable_shadow_vmcs(struct
> vcpu_vmx *vmx)
>   vmcs_write64(VMCS_LINK_POINTER, -1ull);
>  }
>  
> +static inline void nested_release_evmcs(struct vcpu_vmx *vmx)
> +{
> + if (!vmx->nested.hv_evmcs)
> + return;
> +
> + kunmap(vmx->nested.hv_evmcs_page);
> + kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
> + vmx->nested.hv_evmcs_vmptr = -1ull;
> + vmx->nested.hv_evmcs_page = NULL;
> + vmx->nested.hv_evmcs = NULL;
> +}
> +
>  static inline void nested_release_vmcs12(struct vcpu_vmx *vmx)
>  {
>   if (vmx->nested.current_vmptr == -1ull)
> @@ -8062,6 +8079,8 @@ static void free_nested(struct vcpu_vmx *vmx)
>   vmx->nested.pi_desc = NULL;
>   }
>  
> + nested_release_evmcs(vmx);
> +
>   free_loaded_vmcs(>nested.vmcs02);
>  }
>  
> @@ -8098,12 +8117,18 @@ static int handle_vmclear(struct kvm_vcpu
> *vcpu)
>   return kvm_skip_emulated_instruction(vcpu);
>   }
>  
> - if (vmptr == vmx->nested.current_vmptr)
> - nested_release_vmcs12(vmx);
> + if (vmx->nested.hv_evmcs_page) {
> + if (vmptr == vmx->nested.hv_evmcs_vmptr)
> + nested_release_evmcs(vmx);
> + } else {
> + if (vmptr == vmx->nested.current_vmptr)
> + nested_release_vmcs12(vmx);
>  
> - kvm_vcpu_write_guest(vcpu,
> - vmptr + offsetof(struct vmcs12, launch_state),
> - , sizeof(zero));
> + kvm_vcpu_write_guest(vcpu,
> +  vmptr + offsetof(struct vmcs12,
> +   launch_state),
> +  , sizeof(zero));
> + }
>  
>   nested_vmx_succeed(vcpu);
>   return kvm_skip_emulated_instruction(vcpu);
> @@ -8814,6 +8839,10 @@ static int handle_vmptrld(struct kvm_vcpu
> *vcpu)
>   return kvm_skip_emulated_instruction(vcpu);
>   }
>  
> + /* Forbid normal VMPTRLD if Enlightened version was used */
> + if (vmx->nested.hv_evmcs)
> + return 1;
> +
>   if (vmx->nested.current_vmptr != vmptr) {
>   struct vmcs12 *new_vmcs12;
>   struct page *page;
> @@ -8847,6 +8876,55 @@ static int handle_vmptrld(struct kvm_vcpu
> *vcpu)
>   return kvm_skip_emulated_instruction(vcpu);
>  }
>  
> +/*
> + * This is an equivalent of the nested hypervisor executing the
> vmptrld
> + * instruction.
> + */
> +static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu
> *vcpu)
> +{
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> +

mmotm 2018-06-14-16-20 uploaded

2018-06-14 Thread akpm

The mm-of-the-moment snapshot 2018-06-14-16-20 has been uploaded to

   http://www.ozlabs.org/~akpm/mmotm/

mmotm-readme.txt says

README for mm-of-the-moment:

http://www.ozlabs.org/~akpm/mmotm/

This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
more than once a week.

You will need quilt to apply these patches to the latest Linus release (4.x
or 4.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
http://ozlabs.org/~akpm/mmotm/series

The file broken-out.tar.gz contains two datestamp files: .DATE and
.DATE--mm-dd-hh-mm-ss.  Both contain the string -mm-dd-hh-mm-ss,
followed by the base kernel version against which this patch series is to
be applied.

This tree is partially included in linux-next.  To see which patches are
included in linux-next, consult the `series' file.  Only the patches
within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in
linux-next.

A git tree which contains the memory management portion of this tree is
maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
by Michal Hocko.  It contains the patches which are between the
"#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series
file, http://www.ozlabs.org/~akpm/mmotm/series.


A full copy of the full kernel tree with the linux-next and mmotm patches
already applied is available through git within an hour of the mmotm
release.  Individual mmotm releases are tagged.  The master branch always
points to the latest release, so it's constantly rebasing.

http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/

To develop on top of mmotm git:

  $ git remote add mmotm 
git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
  $ git remote update mmotm
  $ git checkout -b topic mmotm/master
  
  $ git send-email mmotm/master.. [...]

To rebase a branch with older patches to a new mmotm release:

  $ git remote update mmotm
  $ git rebase --onto mmotm/master  topic




The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second)
contains daily snapshots of the -mm tree.  It is updated more frequently
than mmotm, and is untested.

A git copy of this tree is available at

http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/

and use of this tree is similar to
http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above.


This mmotm tree contains the following patches against 4.17:
(patches marked "*" will be included in linux-next)

  origin.patch
  i-need-old-gcc.patch
* mm-ksm-ignore-stable_flag-of-rmap_item-address-in-rmap_walk_ksm.patch
* mm-fix-null-pointer-dereference-in-mem_cgroup_protected.patch
* mm-swap-fix-swap_count-comment-about-nonexistent-swap_has_cont.patch
* mm-fix-devmem_is_allowed-for-sub-page-system-ram-intersections.patch
* mm-fix-race-between-kmem_cache-destroy-create-and-deactivate.patch
* kexec-yield-to-scheduler-when-loading-kimage-segments.patch
* mm-check-for-sigkill-inside-dup_mmap-loop.patch
* mm-memblock-add-missing-include-linux-bootmemh.patch
* 
mremap-remove-latency_limit-from-mremap-to-reduce-the-number-of-tlb-shootdowns.patch
* proc-skip-branch-in-proc-lookup.patch
* fat-use-fat_fs_error-instead-of-bug_on-in-__fat_get_block.patch
* coredump-fix-spam-with-zero-vma-process.patch
* exofs-avoid-vla-in-structures.patch
* kernel-relay-change-return-type-to-vm_fault_t.patch
* kcov-ensure-irq-code-sees-a-valid-area.patch
* kcov-prefault-the-kcov_area.patch
* sched-core-kcov-avoid-kcov_area-during-task-switch.patch
* arm-port-kcov-to-arm.patch
* fault-injection-reorder-config-entries.patch
* ipc-sem-mitigate-semnum-index-against-spectre-v1.patch
* ipc-adding-new-return-type-vm_fault_t.patch
* mm-use-octal-not-symbolic-permissions.patch
* treewide-use-phys_addr_max-to-avoid-type-casting-ullong_max.patch
* mm-fix-oom_kill-event-handling.patch
* hexagon-fix-printk-format-warning-in-setupc.patch
* hexagon-drop-the-unused-variable-zero_page_mask.patch
* lib-test_printfc-call-wait_for_random_bytes-before-plain-%p-tests.patch
* memcg-remove-memcg_cgroup-id-from-idr-on-mem_cgroup_css_alloc-failure.patch
* lib-percpu_idac-dont-do-alloc-from-per-cpu-list-if-there-is-none.patch
* mm-zero-remaining-unavailable-struct-pages.patch
* arm-arch-arm-include-asm-pageh-needs-personalityh.patch
* prctl-add-pr_et_pdeathsig_proc.patch
* ocfs2-get-rid-of-ocfs2_is_o2cb_active-function.patch
* ocfs2-without-quota-support-try-to-avoid-calling-quota-recovery.patch
* ocfs2-dont-use-iocb-when-eiocbqueued-returns.patch
* ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch
* ocfs2-dont-put-and-assigning-null-to-bh-allocated-outside.patch
* 
block-restore-proc-partitions-to-not-display-non-partitionable-removable-devices.patch
* dentry-fix-kmemcheck-splat-at-take_dentry_name_snapshot.patch
* namei-allow-restricted-o_creat-of-fifos-and-regular-files.patch
* namei-allow-restricted-o_creat-of-fifos-and-regular-files-fix.patch
  mm.patch
* mm-devm_memremap_pages-mark-devm_memremap_pages-export_symbol_gpl.patch
*

mmotm 2018-06-14-16-20 uploaded

2018-06-14 Thread akpm

The mm-of-the-moment snapshot 2018-06-14-16-20 has been uploaded to

   http://www.ozlabs.org/~akpm/mmotm/

mmotm-readme.txt says

README for mm-of-the-moment:

http://www.ozlabs.org/~akpm/mmotm/

This is a snapshot of my -mm patch queue.  Uploaded at random hopefully
more than once a week.

You will need quilt to apply these patches to the latest Linus release (4.x
or 4.x-rcY).  The series file is in broken-out.tar.gz and is duplicated in
http://ozlabs.org/~akpm/mmotm/series

The file broken-out.tar.gz contains two datestamp files: .DATE and
.DATE--mm-dd-hh-mm-ss.  Both contain the string -mm-dd-hh-mm-ss,
followed by the base kernel version against which this patch series is to
be applied.

This tree is partially included in linux-next.  To see which patches are
included in linux-next, consult the `series' file.  Only the patches
within the #NEXT_PATCHES_START/#NEXT_PATCHES_END markers are included in
linux-next.

A git tree which contains the memory management portion of this tree is
maintained at git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
by Michal Hocko.  It contains the patches which are between the
"#NEXT_PATCHES_START mm" and "#NEXT_PATCHES_END" markers, from the series
file, http://www.ozlabs.org/~akpm/mmotm/series.


A full copy of the full kernel tree with the linux-next and mmotm patches
already applied is available through git within an hour of the mmotm
release.  Individual mmotm releases are tagged.  The master branch always
points to the latest release, so it's constantly rebasing.

http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/

To develop on top of mmotm git:

  $ git remote add mmotm 
git://git.kernel.org/pub/scm/linux/kernel/git/mhocko/mm.git
  $ git remote update mmotm
  $ git checkout -b topic mmotm/master
  
  $ git send-email mmotm/master.. [...]

To rebase a branch with older patches to a new mmotm release:

  $ git remote update mmotm
  $ git rebase --onto mmotm/master  topic




The directory http://www.ozlabs.org/~akpm/mmots/ (mm-of-the-second)
contains daily snapshots of the -mm tree.  It is updated more frequently
than mmotm, and is untested.

A git copy of this tree is available at

http://git.cmpxchg.org/cgit.cgi/linux-mmots.git/

and use of this tree is similar to
http://git.cmpxchg.org/cgit.cgi/linux-mmotm.git/, described above.


This mmotm tree contains the following patches against 4.17:
(patches marked "*" will be included in linux-next)

  origin.patch
  i-need-old-gcc.patch
* mm-ksm-ignore-stable_flag-of-rmap_item-address-in-rmap_walk_ksm.patch
* mm-fix-null-pointer-dereference-in-mem_cgroup_protected.patch
* mm-swap-fix-swap_count-comment-about-nonexistent-swap_has_cont.patch
* mm-fix-devmem_is_allowed-for-sub-page-system-ram-intersections.patch
* mm-fix-race-between-kmem_cache-destroy-create-and-deactivate.patch
* kexec-yield-to-scheduler-when-loading-kimage-segments.patch
* mm-check-for-sigkill-inside-dup_mmap-loop.patch
* mm-memblock-add-missing-include-linux-bootmemh.patch
* 
mremap-remove-latency_limit-from-mremap-to-reduce-the-number-of-tlb-shootdowns.patch
* proc-skip-branch-in-proc-lookup.patch
* fat-use-fat_fs_error-instead-of-bug_on-in-__fat_get_block.patch
* coredump-fix-spam-with-zero-vma-process.patch
* exofs-avoid-vla-in-structures.patch
* kernel-relay-change-return-type-to-vm_fault_t.patch
* kcov-ensure-irq-code-sees-a-valid-area.patch
* kcov-prefault-the-kcov_area.patch
* sched-core-kcov-avoid-kcov_area-during-task-switch.patch
* arm-port-kcov-to-arm.patch
* fault-injection-reorder-config-entries.patch
* ipc-sem-mitigate-semnum-index-against-spectre-v1.patch
* ipc-adding-new-return-type-vm_fault_t.patch
* mm-use-octal-not-symbolic-permissions.patch
* treewide-use-phys_addr_max-to-avoid-type-casting-ullong_max.patch
* mm-fix-oom_kill-event-handling.patch
* hexagon-fix-printk-format-warning-in-setupc.patch
* hexagon-drop-the-unused-variable-zero_page_mask.patch
* lib-test_printfc-call-wait_for_random_bytes-before-plain-%p-tests.patch
* memcg-remove-memcg_cgroup-id-from-idr-on-mem_cgroup_css_alloc-failure.patch
* lib-percpu_idac-dont-do-alloc-from-per-cpu-list-if-there-is-none.patch
* mm-zero-remaining-unavailable-struct-pages.patch
* arm-arch-arm-include-asm-pageh-needs-personalityh.patch
* prctl-add-pr_et_pdeathsig_proc.patch
* ocfs2-get-rid-of-ocfs2_is_o2cb_active-function.patch
* ocfs2-without-quota-support-try-to-avoid-calling-quota-recovery.patch
* ocfs2-dont-use-iocb-when-eiocbqueued-returns.patch
* ocfs2-fix-a-misuse-a-of-brelse-after-failing-ocfs2_check_dir_entry.patch
* ocfs2-dont-put-and-assigning-null-to-bh-allocated-outside.patch
* 
block-restore-proc-partitions-to-not-display-non-partitionable-removable-devices.patch
* dentry-fix-kmemcheck-splat-at-take_dentry_name_snapshot.patch
* namei-allow-restricted-o_creat-of-fifos-and-regular-files.patch
* namei-allow-restricted-o_creat-of-fifos-and-regular-files-fix.patch
  mm.patch
* mm-devm_memremap_pages-mark-devm_memremap_pages-export_symbol_gpl.patch
*

Re: [PATCH] infiniband: fix a subtle race condition

2018-06-14 Thread Cong Wang

On Thu, Jun 14, 2018 at 10:24 AM, Jason Gunthorpe  wrote:
> On Thu, Jun 14, 2018 at 10:03:09AM -0700, Cong Wang wrote:
>> On Thu, Jun 14, 2018 at 7:24 AM, Jason Gunthorpe  wrote:
>> >
>> > This was my brief reaction too, this code path almost certainly has a
>> > use-after-free, and we should fix the concurrency between the two
>> > places in some correct way..
>>
>> First of all, why use-after-free could trigger an imbalance unlock?
>> IOW, why do we have to solve use-after-free to fix this imbalance
>> unlock?
>
> The issue syzkaller hit is that accessing ctx->file does not seem
> locked in any way and can race with other manipulations of ctx->file.
>
> So.. for this patch to be correct we need to understand how this
> statement:
>
>f = ctx->file
>
> Avoids f becoming a dangling pointer - and without locking, my

It doesn't, because this is not the point, this is not the cause
of the unlock imbalance either. syzbot didn't report use-after-free
or a kernel segfault here.

> suspicion is that it doesn't - because missing locking around
> ctx->file is probably the actual bug syzkaller found.

Does my patch make it lockless or dangling? Apparently no.

Before my patch:

mutex_lock(>file->mut);

After my patch:

cur_file = ctx->file;
mutex_lock(_file->mut);

The deference is same as before, it was lockless and it is lockless
after my patch.

Look at the assembly code *without* my patch:

819354f0:   49 8b 7c 24 78  mov0x78(%r12),%rdi
819354f5:   48 89 c3mov%rax,%rbx
819354f8:   31 f6   xor%esi,%esi
819354fa:   e8 d8 dd 40 00  callq
81d432d7 

Apparently the pointer is dereferenced before lock.

What difference does my patch make?

819354f2:   4d 8b 74 24 78  mov0x78(%r12),%r14
819354f7:   48 89 c3mov%rax,%rbx
819354fa:   31 f6   xor%esi,%esi
819354fc:   4c 89 f7mov%r14,%rdi
819354ff:   e8 9b df 40 00  callq
81d4349f 
...
8193567d:   4c 89 f7mov%r14,%rdi
81935680:   e8 98 dd 40 00  callq
81d4341d 

The %r14 here is the whole point of my patch.

>
> If this is not the case, then add a comment explaining how f's
> lifetime is OK.
>
> Otherwise, we need some kind of locking and guessing we need to hold a
> kref for f?

I agree with you, but again, this is not necessary for unlock
imbalance.

>
>> Third of all, the use-after-free I can see (race with ->close) exists
>> before my patch, this patch doesn't make it better or worse, nor
>> I have any intend to fix it.
>
> I'm not sure that race exists, there should be something that flushes
> the WQ on the path to close... (though I have another email that
> perhaps that is broken, sigh)
>

This is not related to my patch, but to convince you, let me explain:

struct ucma_file is not refcnt'ed, I know you cancel the work in
rdma_destroy_id(), but after ucma_migrate_id() the ctx has already
been moved to the new file, for the old file, it won't cancel the
ctx flying with workqueue. So, I think the following use-after-free
could happen:

ucma_event_handler():
cur_file = ctx->file; // old file

ucma_migrate_id():
lock();
list_move_tail(>list, _file->ctx_list);
ctx->file = new_file;
unlock();

ucma_close():
// retrieve old file via filp->private_data
// the loop won't cover the ctx moved to the new_file
kfree(file);

ucma_event_handler():
// continued from above
lock(_file->mux); // already freed!

This is _not_ the cause of the unlock imbalance, and is _not_ expected
to solve by patch either.

Re: [PATCH] infiniband: fix a subtle race condition

2018-06-14 Thread Cong Wang

On Thu, Jun 14, 2018 at 10:24 AM, Jason Gunthorpe  wrote:
> On Thu, Jun 14, 2018 at 10:03:09AM -0700, Cong Wang wrote:
>> On Thu, Jun 14, 2018 at 7:24 AM, Jason Gunthorpe  wrote:
>> >
>> > This was my brief reaction too, this code path almost certainly has a
>> > use-after-free, and we should fix the concurrency between the two
>> > places in some correct way..
>>
>> First of all, why use-after-free could trigger an imbalance unlock?
>> IOW, why do we have to solve use-after-free to fix this imbalance
>> unlock?
>
> The issue syzkaller hit is that accessing ctx->file does not seem
> locked in any way and can race with other manipulations of ctx->file.
>
> So.. for this patch to be correct we need to understand how this
> statement:
>
>f = ctx->file
>
> Avoids f becoming a dangling pointer - and without locking, my

It doesn't, because this is not the point, this is not the cause
of the unlock imbalance either. syzbot didn't report use-after-free
or a kernel segfault here.

> suspicion is that it doesn't - because missing locking around
> ctx->file is probably the actual bug syzkaller found.

Does my patch make it lockless or dangling? Apparently no.

Before my patch:

mutex_lock(>file->mut);

After my patch:

cur_file = ctx->file;
mutex_lock(_file->mut);

The deference is same as before, it was lockless and it is lockless
after my patch.

Look at the assembly code *without* my patch:

819354f0:   49 8b 7c 24 78  mov0x78(%r12),%rdi
819354f5:   48 89 c3mov%rax,%rbx
819354f8:   31 f6   xor%esi,%esi
819354fa:   e8 d8 dd 40 00  callq
81d432d7 

Apparently the pointer is dereferenced before lock.

What difference does my patch make?

819354f2:   4d 8b 74 24 78  mov0x78(%r12),%r14
819354f7:   48 89 c3mov%rax,%rbx
819354fa:   31 f6   xor%esi,%esi
819354fc:   4c 89 f7mov%r14,%rdi
819354ff:   e8 9b df 40 00  callq
81d4349f 
...
8193567d:   4c 89 f7mov%r14,%rdi
81935680:   e8 98 dd 40 00  callq
81d4341d 

The %r14 here is the whole point of my patch.

>
> If this is not the case, then add a comment explaining how f's
> lifetime is OK.
>
> Otherwise, we need some kind of locking and guessing we need to hold a
> kref for f?

I agree with you, but again, this is not necessary for unlock
imbalance.

>
>> Third of all, the use-after-free I can see (race with ->close) exists
>> before my patch, this patch doesn't make it better or worse, nor
>> I have any intend to fix it.
>
> I'm not sure that race exists, there should be something that flushes
> the WQ on the path to close... (though I have another email that
> perhaps that is broken, sigh)
>

This is not related to my patch, but to convince you, let me explain:

struct ucma_file is not refcnt'ed, I know you cancel the work in
rdma_destroy_id(), but after ucma_migrate_id() the ctx has already
been moved to the new file, for the old file, it won't cancel the
ctx flying with workqueue. So, I think the following use-after-free
could happen:

ucma_event_handler():
cur_file = ctx->file; // old file

ucma_migrate_id():
lock();
list_move_tail(>list, _file->ctx_list);
ctx->file = new_file;
unlock();

ucma_close():
// retrieve old file via filp->private_data
// the loop won't cover the ctx moved to the new_file
kfree(file);

ucma_event_handler():
// continued from above
lock(_file->mux); // already freed!

This is _not_ the cause of the unlock imbalance, and is _not_ expected
to solve by patch either.

Re: [PATCH 4.4 038/268] Btrfs: fix scrub to repair raid6 corruption

2018-06-14 Thread Sasha Levin

On Thu, Jun 14, 2018 at 10:18:58AM +0200, Greg Kroah-Hartman wrote:
>On Fri, Jun 08, 2018 at 07:42:47PM +0100, Ben Hutchings wrote:
>> On Mon, 2018-05-28 at 12:00 +0200, Greg Kroah-Hartman wrote:
>> > 4.4-stable review patch.  If anyone has any objections, please let me know.
>> >
>> > --
>> >
>> > From: Liu Bo 
>> >
>> > [ Upstream commit 762221f095e3932669093466aaf4b85ed9ad2ac1 ]
>>
>> The diff here is actually from commit 8810f7517a3b ("Btrfs: make raid6
>> rebuild retry more", mentioned in this commit message).  (Sasha, please
>> try to work out why commit messages and descriptions are getting mixed
>> up in your auto-selections.)
>>
>> Maybe stable branches should get the real commit 762221f095e3 as well?
>
>Ugh, not good.  Sasha, can you fix this up and send me patches for it,
>separate from your pull requests?

Crap, I see what's wrong. I'll send fixes later tonight.

Re: [PATCH 4.4 038/268] Btrfs: fix scrub to repair raid6 corruption

2018-06-14 Thread Sasha Levin

On Thu, Jun 14, 2018 at 10:18:58AM +0200, Greg Kroah-Hartman wrote:
>On Fri, Jun 08, 2018 at 07:42:47PM +0100, Ben Hutchings wrote:
>> On Mon, 2018-05-28 at 12:00 +0200, Greg Kroah-Hartman wrote:
>> > 4.4-stable review patch.  If anyone has any objections, please let me know.
>> >
>> > --
>> >
>> > From: Liu Bo 
>> >
>> > [ Upstream commit 762221f095e3932669093466aaf4b85ed9ad2ac1 ]
>>
>> The diff here is actually from commit 8810f7517a3b ("Btrfs: make raid6
>> rebuild retry more", mentioned in this commit message).  (Sasha, please
>> try to work out why commit messages and descriptions are getting mixed
>> up in your auto-selections.)
>>
>> Maybe stable branches should get the real commit 762221f095e3 as well?
>
>Ugh, not good.  Sasha, can you fix this up and send me patches for it,
>separate from your pull requests?

Crap, I see what's wrong. I'll send fixes later tonight.

Re: [PATCH 2/5] KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> Enlightened VMCS is opt-in. The current version does not contain all
> fields supported by nested VMX so we must not advertise the
> corresponding VMX features if enlightened VMCS is enabled.
> 
> Userspace is given the enlightened VMCS version supported by KVM as
> part of enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS. The version is to
> be advertised to the nested hypervisor, currently done via a cpuid
> leaf for Hyper-V.
> 
> Suggested-by: Ladi Prosek 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/include/asm/kvm_host.h |   3 +
>  arch/x86/kvm/svm.c  |   9 +++
>  arch/x86/kvm/vmx.c  | 138
> ++--
>  arch/x86/kvm/x86.c  |  15 +
>  include/uapi/linux/kvm.h|   1 +
>  5 files changed, 105 insertions(+), 61 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 0ebe659f2802..d7e8f7155d79 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1095,6 +1095,9 @@ struct kvm_x86_ops {
>   int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region
> *argp);
>  
>   int (*get_msr_feature)(struct kvm_msr_entry *entry);
> +
> + int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
> +uint16_t *vmcs_version);
>  };
>  
>  struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d9305f1723f5..6dc42c870565 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7009,6 +7009,13 @@ static int svm_unregister_enc_region(struct kvm
> *kvm,
>   return ret;
>  }
>  
> +static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
> +uint16_t *vmcs_version)
> +{
> + /* Intel-only feature */
> + return -ENODEV;
> +}
> +
>  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>   .cpu_has_kvm_support = has_svm,
>   .disabled_by_bios = is_disabled,
> @@ -7135,6 +7142,8 @@ static struct kvm_x86_ops svm_x86_ops
> __ro_after_init = {
>   .mem_enc_op = svm_mem_enc_op,
>   .mem_enc_reg_region = svm_register_enc_region,
>   .mem_enc_unreg_region = svm_unregister_enc_region,
> +
> + .nested_enable_evmcs = nested_enable_evmcs,
>  };
>  
>  static int __init svm_init(void)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 48989f78be60..51749207cef1 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -648,6 +648,13 @@ struct nested_vmx {
>  
>   bool change_vmcs01_virtual_apic_mode;
>  
> + /*
> +  * Enlightened VMCS has been enabled. It does not mean that L1 has
> to
> +  * use it. However, VMX features available to L1 will be limited
> based
> +  * on what the enlightened VMCS supports.
> +  */
> + bool enlightened_vmcs_enabled;
> +
>   /* L2 must run next, and mustn't decide to exit to L1. */
>   bool nested_run_pending;
>  
> @@ -1186,6 +1193,49 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);
>  
>  #define KVM_EVMCS_VERSION 1
>  
> +/*
> + * Enlightened VMCSv1 doesn't support these:
> + *
> + *   POSTED_INTR_NV  = 0x0002,
> + *   GUEST_INTR_STATUS   = 0x0810,
> + *   APIC_ACCESS_ADDR= 0x2014,
> + *   POSTED_INTR_DESC_ADDR   = 0x2016,
> + *   EOI_EXIT_BITMAP0= 0x201c,
> + *   EOI_EXIT_BITMAP1= 0x201e,
> + *   EOI_EXIT_BITMAP2= 0x2020,
> + *   EOI_EXIT_BITMAP3= 0x2022,
> + *   GUEST_PML_INDEX = 0x0812,
> + *   PML_ADDRESS = 0x200e,
> + *   VM_FUNCTION_CONTROL = 0x2018,
> + *   EPTP_LIST_ADDRESS   = 0x2024,
> + *   VMREAD_BITMAP   = 0x2026,
> + *   VMWRITE_BITMAP  = 0x2028,
> + *
> + *   TSC_MULTIPLIER  = 0x2032,
> + *   PLE_GAP = 0x4020,
> + *   PLE_WINDOW  = 0x4022,
> + *   VMX_PREEMPTION_TIMER_VALUE  = 0x482E,
> + *  GUEST_IA32_PERF_GLOBAL_CTRL = 0x2808,
> + *  HOST_IA32_PERF_GLOBAL_CTRL  = 0x2c04,
> + *
> + * Currently unsupported in KVM:
> + *   GUEST_IA32_RTIT_CTL = 0x2814,
> + */
> +#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
> + PIN_BASED_VMX_PREEMPTION_TIMER)
> +#define EVMCS1_UNSUPPORTED_2NDEXEC   \
> + (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
> +  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |  \
> +  SECONDARY_EXEC_APIC_REGISTER_VIRT |\
> +  SECONDARY_EXEC_ENABLE_PML |\
> +  SECONDARY_EXEC_ENABLE_VMFUNC | \
> +  SECONDARY_EXEC_SHADOW_VMCS |   \
> +

Re: [PATCH 2/5] KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability

2018-06-14 Thread Liran Alon



- vkuzn...@redhat.com wrote:

> Enlightened VMCS is opt-in. The current version does not contain all
> fields supported by nested VMX so we must not advertise the
> corresponding VMX features if enlightened VMCS is enabled.
> 
> Userspace is given the enlightened VMCS version supported by KVM as
> part of enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS. The version is to
> be advertised to the nested hypervisor, currently done via a cpuid
> leaf for Hyper-V.
> 
> Suggested-by: Ladi Prosek 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/include/asm/kvm_host.h |   3 +
>  arch/x86/kvm/svm.c  |   9 +++
>  arch/x86/kvm/vmx.c  | 138
> ++--
>  arch/x86/kvm/x86.c  |  15 +
>  include/uapi/linux/kvm.h|   1 +
>  5 files changed, 105 insertions(+), 61 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> index 0ebe659f2802..d7e8f7155d79 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1095,6 +1095,9 @@ struct kvm_x86_ops {
>   int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region
> *argp);
>  
>   int (*get_msr_feature)(struct kvm_msr_entry *entry);
> +
> + int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu,
> +uint16_t *vmcs_version);
>  };
>  
>  struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index d9305f1723f5..6dc42c870565 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -7009,6 +7009,13 @@ static int svm_unregister_enc_region(struct kvm
> *kvm,
>   return ret;
>  }
>  
> +static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
> +uint16_t *vmcs_version)
> +{
> + /* Intel-only feature */
> + return -ENODEV;
> +}
> +
>  static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
>   .cpu_has_kvm_support = has_svm,
>   .disabled_by_bios = is_disabled,
> @@ -7135,6 +7142,8 @@ static struct kvm_x86_ops svm_x86_ops
> __ro_after_init = {
>   .mem_enc_op = svm_mem_enc_op,
>   .mem_enc_reg_region = svm_register_enc_region,
>   .mem_enc_unreg_region = svm_unregister_enc_region,
> +
> + .nested_enable_evmcs = nested_enable_evmcs,
>  };
>  
>  static int __init svm_init(void)
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 48989f78be60..51749207cef1 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -648,6 +648,13 @@ struct nested_vmx {
>  
>   bool change_vmcs01_virtual_apic_mode;
>  
> + /*
> +  * Enlightened VMCS has been enabled. It does not mean that L1 has
> to
> +  * use it. However, VMX features available to L1 will be limited
> based
> +  * on what the enlightened VMCS supports.
> +  */
> + bool enlightened_vmcs_enabled;
> +
>   /* L2 must run next, and mustn't decide to exit to L1. */
>   bool nested_run_pending;
>  
> @@ -1186,6 +1193,49 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs);
>  
>  #define KVM_EVMCS_VERSION 1
>  
> +/*
> + * Enlightened VMCSv1 doesn't support these:
> + *
> + *   POSTED_INTR_NV  = 0x0002,
> + *   GUEST_INTR_STATUS   = 0x0810,
> + *   APIC_ACCESS_ADDR= 0x2014,
> + *   POSTED_INTR_DESC_ADDR   = 0x2016,
> + *   EOI_EXIT_BITMAP0= 0x201c,
> + *   EOI_EXIT_BITMAP1= 0x201e,
> + *   EOI_EXIT_BITMAP2= 0x2020,
> + *   EOI_EXIT_BITMAP3= 0x2022,
> + *   GUEST_PML_INDEX = 0x0812,
> + *   PML_ADDRESS = 0x200e,
> + *   VM_FUNCTION_CONTROL = 0x2018,
> + *   EPTP_LIST_ADDRESS   = 0x2024,
> + *   VMREAD_BITMAP   = 0x2026,
> + *   VMWRITE_BITMAP  = 0x2028,
> + *
> + *   TSC_MULTIPLIER  = 0x2032,
> + *   PLE_GAP = 0x4020,
> + *   PLE_WINDOW  = 0x4022,
> + *   VMX_PREEMPTION_TIMER_VALUE  = 0x482E,
> + *  GUEST_IA32_PERF_GLOBAL_CTRL = 0x2808,
> + *  HOST_IA32_PERF_GLOBAL_CTRL  = 0x2c04,
> + *
> + * Currently unsupported in KVM:
> + *   GUEST_IA32_RTIT_CTL = 0x2814,
> + */
> +#define EVMCS1_UNSUPPORTED_PINCTRL (PIN_BASED_POSTED_INTR | \
> + PIN_BASED_VMX_PREEMPTION_TIMER)
> +#define EVMCS1_UNSUPPORTED_2NDEXEC   \
> + (SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
> +  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |  \
> +  SECONDARY_EXEC_APIC_REGISTER_VIRT |\
> +  SECONDARY_EXEC_ENABLE_PML |\
> +  SECONDARY_EXEC_ENABLE_VMFUNC | \
> +  SECONDARY_EXEC_SHADOW_VMCS |   \
> +

[PATCH 2/2] arm: multi_v7_defconfig: Enable KSM.

2018-06-14 Thread Daniel Díaz

As per the documentation, Kernel Samepage Merging (available
since 2.6.32) is a memory-saving de-duplication feature,
enabled by CONFIG_KSM=y and activated via sysfs. More
information can be found here:
  https://www.kernel.org/doc/Documentation/vm/ksm.txt

When enabled in the kernel, the default is to not do anything
at all, until it is activated at run-time with:
  echo 1 > /sys/kernel/mm/ksm/run

Signed-off-by: Daniel Díaz 
---
 arch/arm/configs/multi_v7_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/configs/multi_v7_defconfig 
b/arch/arm/configs/multi_v7_defconfig
index 7e1c543..79eedf4d 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -133,6 +133,7 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=16
 CONFIG_HIGHPTE=y
 CONFIG_CMA=y
+CONFIG_KSM=y
 CONFIG_SECCOMP=y
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
-- 
2.7.4

[PATCH 2/2] arm: multi_v7_defconfig: Enable KSM.

2018-06-14 Thread Daniel Díaz

As per the documentation, Kernel Samepage Merging (available
since 2.6.32) is a memory-saving de-duplication feature,
enabled by CONFIG_KSM=y and activated via sysfs. More
information can be found here:
  https://www.kernel.org/doc/Documentation/vm/ksm.txt

When enabled in the kernel, the default is to not do anything
at all, until it is activated at run-time with:
  echo 1 > /sys/kernel/mm/ksm/run

Signed-off-by: Daniel Díaz 
---
 arch/arm/configs/multi_v7_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/configs/multi_v7_defconfig 
b/arch/arm/configs/multi_v7_defconfig
index 7e1c543..79eedf4d 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -133,6 +133,7 @@ CONFIG_SMP=y
 CONFIG_NR_CPUS=16
 CONFIG_HIGHPTE=y
 CONFIG_CMA=y
+CONFIG_KSM=y
 CONFIG_SECCOMP=y
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
-- 
2.7.4

[PATCH 1/2] x86: x86_64_defconfig: Enable KSM.

2018-06-14 Thread Daniel Díaz

As per the documentation, Kernel Samepage Merging (available
since 2.6.32) is a memory-saving de-duplication feature,
enabled by CONFIG_KSM=y and activated via sysfs. More
information can be found here:
  https://www.kernel.org/doc/Documentation/vm/ksm.txt

When enabled in the kernel, the default is to not do anything
at all, until it is activated at run-time with:
  echo 1 > /sys/kernel/mm/ksm/run

Signed-off-by: Daniel Díaz 
---
 arch/x86/configs/x86_64_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/configs/x86_64_defconfig 
b/arch/x86/configs/x86_64_defconfig
index e32fc1f..8fd7396 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -48,6 +48,7 @@ CONFIG_MICROCODE_AMD=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 CONFIG_NUMA=y
+CONFIG_KSM=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 # CONFIG_MTRR_SANITIZER is not set
 CONFIG_EFI=y
-- 
2.7.4

[PATCH 1/2] x86: x86_64_defconfig: Enable KSM.

2018-06-14 Thread Daniel Díaz

As per the documentation, Kernel Samepage Merging (available
since 2.6.32) is a memory-saving de-duplication feature,
enabled by CONFIG_KSM=y and activated via sysfs. More
information can be found here:
  https://www.kernel.org/doc/Documentation/vm/ksm.txt

When enabled in the kernel, the default is to not do anything
at all, until it is activated at run-time with:
  echo 1 > /sys/kernel/mm/ksm/run

Signed-off-by: Daniel Díaz 
---
 arch/x86/configs/x86_64_defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/configs/x86_64_defconfig 
b/arch/x86/configs/x86_64_defconfig
index e32fc1f..8fd7396 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -48,6 +48,7 @@ CONFIG_MICROCODE_AMD=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 CONFIG_NUMA=y
+CONFIG_KSM=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 # CONFIG_MTRR_SANITIZER is not set
 CONFIG_EFI=y
-- 
2.7.4

Re: [PATCH 4.4 00/24] 4.4.138-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:04 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.4.138 release.
> There are 24 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:27:15 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.138-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.4.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.4 00/24] 4.4.138-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:04 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.4.138 release.
> There are 24 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:27:15 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.4.138-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.4.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.9 00/30] 4.9.109-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:04 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.9.109 release.
> There are 30 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:25:48 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.109-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.9.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.9 00/30] 4.9.109-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:04 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.9.109 release.
> There are 30 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:25:48 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.9.109-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.9.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.14 00/36] 4.14.50-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:04 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.14.50 release.
> There are 36 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:21:44 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.50-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.14.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.14 00/36] 4.14.50-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:04 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.14.50 release.
> There are 36 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:21:44 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.14.50-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.14.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.16 00/43] 4.16.16-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:04 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.16.16 release.
> There are 43 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:21:17 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.16.16-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.16.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.16 00/43] 4.16.16-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:04 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.16.16 release.
> There are 43 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:21:17 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.16.16-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.16.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

[PATCH] x86/pti: don't report XenPV as vulnerable

2018-06-14 Thread Jiri Kosina

From: Jiri Kosina 

Xen PV domain is not by design affected by meltdown as it's enforcing 
split CR3 itself. Let's not report such systems as "Vulnerable" in sysfs 
(we're also already forcing PTI to off in X86_HYPER_XEN_PV cases)

Reported-and-tested-by: Mike Latimer 
Signed-off-by: Jiri Kosina 
---

I originally wanted to just not set X86_BUG_CPU_MELTDOWN in 
cpu_set_bug_bits() in the first place, but that has two issues:

- cpu_set_bug_bits() gets invoked from early_identify_cpu() before 
  init_hypervisor_platform() had a chance to run, and therefore the
  hypervisor type check doesn't work there

- it'd actually be inaccurate; the CPU *does* have the bug at the end
  of the day (so it's properly kept being reported in cpuinfo), it's
  "just a setup matter" that we don't need any addtional mitigation to
  be applied by the kernel

So let's not overcomplicate it.

 arch/x86/kernel/cpu/bugs.c |4 
 1 file changed, 4 insertions(+)

--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
@@ -685,6 +686,9 @@ static ssize_t cpu_show_common(struct de
if (boot_cpu_has(X86_FEATURE_PTI))
return sprintf(buf, "Mitigation: PTI\n");
 
+   if (hypervisor_is_type(X86_HYPER_XEN_PV))
+   return sprintf(buf, "Not affected\n");
+
break;
 
case X86_BUG_SPECTRE_V1:

-- 
Jiri Kosina
SUSE Labs

[PATCH] x86/pti: don't report XenPV as vulnerable

2018-06-14 Thread Jiri Kosina

From: Jiri Kosina 

Xen PV domain is not by design affected by meltdown as it's enforcing 
split CR3 itself. Let's not report such systems as "Vulnerable" in sysfs 
(we're also already forcing PTI to off in X86_HYPER_XEN_PV cases)

Reported-and-tested-by: Mike Latimer 
Signed-off-by: Jiri Kosina 
---

I originally wanted to just not set X86_BUG_CPU_MELTDOWN in 
cpu_set_bug_bits() in the first place, but that has two issues:

- cpu_set_bug_bits() gets invoked from early_identify_cpu() before 
  init_hypervisor_platform() had a chance to run, and therefore the
  hypervisor type check doesn't work there

- it'd actually be inaccurate; the CPU *does* have the bug at the end
  of the day (so it's properly kept being reported in cpuinfo), it's
  "just a setup matter" that we don't need any addtional mitigation to
  be applied by the kernel

So let's not overcomplicate it.

 arch/x86/kernel/cpu/bugs.c |4 
 1 file changed, 4 insertions(+)

--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
@@ -685,6 +686,9 @@ static ssize_t cpu_show_common(struct de
if (boot_cpu_has(X86_FEATURE_PTI))
return sprintf(buf, "Mitigation: PTI\n");
 
+   if (hypervisor_is_type(X86_HYPER_XEN_PV))
+   return sprintf(buf, "Not affected\n");
+
break;
 
case X86_BUG_SPECTRE_V1:

-- 
Jiri Kosina
SUSE Labs

Re: [PATCH 4.4 173/268] sched/rt: Fix rq->clock_update_flags < RQCF_ACT_SKIP warning

2018-06-14 Thread Steven Rostedt

On Thu, 14 Jun 2018 22:55:56 +0100
Ben Hutchings  wrote:

> On Mon, 2018-05-28 at 12:02 +0200, Greg Kroah-Hartman wrote:
> > 4.4-stable review patch.  If anyone has any objections, please let me know.
> > 
> > --
> > 
> > From: Davidlohr Bueso 
> > 
> > [ Upstream commit d29a20645d5e929aa7e8616f28e5d8e1c49263ec ]
> > 
> > While running rt-tests' pi_stress program I got the following splat:
> > 
> >   rq->clock_update_flags < RQCF_ACT_SKIP
> >   WARNING: CPU: 27 PID: 0 at kernel/sched/sched.h:960 
> > assert_clock_updated.isra.38.part.39+0x13/0x20
> > 
> >   [...]
> > 
> >   
> >   enqueue_top_rt_rq+0xf4/0x150
> >   ? cpufreq_dbs_governor_start+0x170/0x170
> >   sched_rt_rq_enqueue+0x65/0x80
> >   sched_rt_period_timer+0x156/0x360
> >   ? sched_rt_rq_enqueue+0x80/0x80
> >   __hrtimer_run_queues+0xfa/0x260
> >   hrtimer_interrupt+0xcb/0x220
> >   smp_apic_timer_interrupt+0x62/0x120
> >   apic_timer_interrupt+0xf/0x20
> >   
> > 
> >   [...]
> > 
> >   do_idle+0x183/0x1e0
> >   cpu_startup_entry+0x5f/0x70
> >   start_secondary+0x192/0x1d0
> >   secondary_startup_64+0xa5/0xb0
> > 
> > We can get rid of it be the "traditional" means of adding an
> > update_rq_clock() call after acquiring the rq->lock in
> > do_sched_rt_period_timer().
> > 
> > The case for the RT task throttling (which this workload also hits)
> > can be ignored in that the skip_update call is actually bogus and
> > quite the contrary (the request bits are removed/reverted).
> > 
> > By setting RQCF_UPDATED we really don't care if the skip is happening
> > or not and will therefore make the assert_clock_updated() check happy.  
> 
> There is no such flag or assertion in 4.4 or 4.9, so does this change
> still make sense there?

I believe the assert was added to catch bugs like this.

Although the change log is a bit ambiguous in if it is fixing an actual
miss update, or if it is just quieting a false positive.

Davidlohr?

-- Steve


> 
> Ben.
> 
> > Signed-off-by: Davidlohr Bueso 
> > Reviewed-by: Matt Fleming 
> > Acked-by: Peter Zijlstra (Intel) 
> > Cc: Linus Torvalds 
> > Cc: Mike Galbraith 
> > Cc: Thomas Gleixner 
> > Cc: d...@stgolabs.net
> > Cc: linux-kernel@vger.kernel.org
> > Cc: rost...@goodmis.org
> > Link: http://lkml.kernel.org/r/20180402164954.16255-1-d...@stgolabs.net
> > Signed-off-by: Ingo Molnar 
> > Signed-off-by: Sasha Levin 
> > Signed-off-by: Greg Kroah-Hartman 
> > ---
> >  kernel/sched/rt.c |2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > --- a/kernel/sched/rt.c
> > +++ b/kernel/sched/rt.c
> > @@ -822,6 +822,8 @@ static int do_sched_rt_period_timer(stru
> >     struct rq *rq = rq_of_rt_rq(rt_rq);
> >  
> >     raw_spin_lock(>lock);
> > +   update_rq_clock(rq);
> > +
> >     if (rt_rq->rt_time) {
> >     u64 runtime;
> >  
> > 
> > 
> >

Re: [PATCH 4.4 173/268] sched/rt: Fix rq->clock_update_flags < RQCF_ACT_SKIP warning

2018-06-14 Thread Steven Rostedt

On Thu, 14 Jun 2018 22:55:56 +0100
Ben Hutchings  wrote:

> On Mon, 2018-05-28 at 12:02 +0200, Greg Kroah-Hartman wrote:
> > 4.4-stable review patch.  If anyone has any objections, please let me know.
> > 
> > --
> > 
> > From: Davidlohr Bueso 
> > 
> > [ Upstream commit d29a20645d5e929aa7e8616f28e5d8e1c49263ec ]
> > 
> > While running rt-tests' pi_stress program I got the following splat:
> > 
> >   rq->clock_update_flags < RQCF_ACT_SKIP
> >   WARNING: CPU: 27 PID: 0 at kernel/sched/sched.h:960 
> > assert_clock_updated.isra.38.part.39+0x13/0x20
> > 
> >   [...]
> > 
> >   
> >   enqueue_top_rt_rq+0xf4/0x150
> >   ? cpufreq_dbs_governor_start+0x170/0x170
> >   sched_rt_rq_enqueue+0x65/0x80
> >   sched_rt_period_timer+0x156/0x360
> >   ? sched_rt_rq_enqueue+0x80/0x80
> >   __hrtimer_run_queues+0xfa/0x260
> >   hrtimer_interrupt+0xcb/0x220
> >   smp_apic_timer_interrupt+0x62/0x120
> >   apic_timer_interrupt+0xf/0x20
> >   
> > 
> >   [...]
> > 
> >   do_idle+0x183/0x1e0
> >   cpu_startup_entry+0x5f/0x70
> >   start_secondary+0x192/0x1d0
> >   secondary_startup_64+0xa5/0xb0
> > 
> > We can get rid of it be the "traditional" means of adding an
> > update_rq_clock() call after acquiring the rq->lock in
> > do_sched_rt_period_timer().
> > 
> > The case for the RT task throttling (which this workload also hits)
> > can be ignored in that the skip_update call is actually bogus and
> > quite the contrary (the request bits are removed/reverted).
> > 
> > By setting RQCF_UPDATED we really don't care if the skip is happening
> > or not and will therefore make the assert_clock_updated() check happy.  
> 
> There is no such flag or assertion in 4.4 or 4.9, so does this change
> still make sense there?

I believe the assert was added to catch bugs like this.

Although the change log is a bit ambiguous in if it is fixing an actual
miss update, or if it is just quieting a false positive.

Davidlohr?

-- Steve


> 
> Ben.
> 
> > Signed-off-by: Davidlohr Bueso 
> > Reviewed-by: Matt Fleming 
> > Acked-by: Peter Zijlstra (Intel) 
> > Cc: Linus Torvalds 
> > Cc: Mike Galbraith 
> > Cc: Thomas Gleixner 
> > Cc: d...@stgolabs.net
> > Cc: linux-kernel@vger.kernel.org
> > Cc: rost...@goodmis.org
> > Link: http://lkml.kernel.org/r/20180402164954.16255-1-d...@stgolabs.net
> > Signed-off-by: Ingo Molnar 
> > Signed-off-by: Sasha Levin 
> > Signed-off-by: Greg Kroah-Hartman 
> > ---
> >  kernel/sched/rt.c |2 ++
> >  1 file changed, 2 insertions(+)
> > 
> > --- a/kernel/sched/rt.c
> > +++ b/kernel/sched/rt.c
> > @@ -822,6 +822,8 @@ static int do_sched_rt_period_timer(stru
> >     struct rq *rq = rq_of_rt_rq(rt_rq);
> >  
> >     raw_spin_lock(>lock);
> > +   update_rq_clock(rq);
> > +
> >     if (rt_rq->rt_time) {
> >     u64 runtime;
> >  
> > 
> > 
> >

Re: [PATCH v3] dcdbas: Add support for WSMT ACPI table

2018-06-14 Thread Stuart Hayes

On 6/14/2018 12:25 PM, Andy Shevchenko wrote:
> On Thu, Jun 14, 2018 at 5:22 PM, Stuart Hayes  
> wrote:
>> On 6/13/2018 3:54 AM, Andy Shevchenko wrote:
> 
 +* Provide physical address of command buffer field within
 +* the struct smi_cmd... can't use virt_to_phys on smi_cmd
 +* because address may be from memremap.
>>>
>>> Wait, memremap() might return a virtual address. How we be sure that
>>> we got still physical address here?
> 
>> Before this patch, the address in smi_cmd always came from an alloc, so
>> virt_to_phys() was used to get the physical address here.  With WSMT, we
>> could be using a BIOS-provided buffer for SMI, in which case the address in
>> smi_cmd will come from memremap(), so we can't use virt_to_phys() on it.
>> So instead I changed this to use the physical address of smi_data_buf that
>> is stored in smi_data_buf_phys_addr, which will be valid regardless of how
>> the address of smi_data_buf was generated.
> 
> Yes, but what does guarantee that memremap() will return you still
> physical address?
> 

Sorry, I'm not sure I understand the question.

Up to now, this driver always just allocated a buffer from main memory that
it used to send/receive information from BIOS when it generated a SMI.  That's
what smi_cmd points to where this comment is.  And it was safe to use
virt_to_phys() on this address.

With this patch, though, the driver may now be using a buffer that isn't part
of main memory--it could now be using a buffer that BIOS provided the physical
address for, and this would not be part of main memory.  So smi_cmd may contain
a virtual address that memremap() provided.  And because memremap() is just
like ioremap(), the driver can no longer use virt_to_phys(smi_cmd) to get the
physical address of the buffer.

My comment is just pointing that out... I was trying to say, "the code can't
use virt_to_phys(smi_cmd) to get the virtual address here".

memremap() should always return a virtual address that points to the physical
address we send it (unless it fails of course).

 +   return 0;
 +
 +   /* Scan for EPS (entry point structure) */
 +   for (addr = (u8 *)__va(0xf);
 +addr < (u8 *)__va(0x10 - sizeof(struct smm_eps_table));
>>>
 +addr += 1) {
>>>
>>> This wasn't commented IIRC and changed. So, why?
> 
>> I changed this is response to your earlier comment (7 june)... you had 
>> pointed
>> out that it would be better if I put an "if (eps) break;" inside the for loop
>> instead of having "&& !eps" in the condition of the for loop.  I put the note
>> "Changed loop searching 0xf to be more readable" in the list of changes 
>> for
>> patch version v3 to cover this change.
> 
> Thanks, but here I meant += 1 vs += 16 step.
> 

Sorry, I thought I had answered this earlier.  The spec does not say that the 
EPS
table will be on a 16-byte boundary.  And I just added a printk in this driver 
to
see where it is on the system I had at hand, and it isn't on a 16-byte boundary:

[ 4680.192542] dcdbas - EPS table at 5761efb7
[ 4680.194012] dcdbas dcdbas: WSMT found, using firmware-provided SMI buffer.
[ 4680.195327] dcdbas dcdbas: Dell Systems Management Base Driver (version 
5.6.0-3.3)

Re: [PATCH 4.17 00/45] 4.17.2-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:03 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.17.2 release.
> There are 45 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:21:05 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.17.2-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.17.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH 4.17 00/45] 4.17.2-stable review

2018-06-14 Thread Shuah Khan

On 06/14/2018 08:03 AM, Greg Kroah-Hartman wrote:
> This is the start of the stable review cycle for the 4.17.2 release.
> There are 45 patches in this series, all will be posted as a response
> to this one.  If anyone has any issues with these being applied, please
> let me know.
> 
> Responses should be made by Sat Jun 16 13:21:05 UTC 2018.
> Anything received after that time might be too late.
> 
> The whole patch series can be found in one patch at:
>   
> https://www.kernel.org/pub/linux/kernel/v4.x/stable-review/patch-4.17.2-rc1.gz
> or in the git tree and branch at:
>   
> git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable-rc.git 
> linux-4.17.y
> and the diffstat can be found below.
> 
> thanks,
> 
> greg k-h
> 

Compiled and booted on my test system. No dmesg regressions.

thanks,
-- Shuah

Re: [PATCH v3] dcdbas: Add support for WSMT ACPI table

2018-06-14 Thread Stuart Hayes

On 6/14/2018 12:25 PM, Andy Shevchenko wrote:
> On Thu, Jun 14, 2018 at 5:22 PM, Stuart Hayes  
> wrote:
>> On 6/13/2018 3:54 AM, Andy Shevchenko wrote:
> 
 +* Provide physical address of command buffer field within
 +* the struct smi_cmd... can't use virt_to_phys on smi_cmd
 +* because address may be from memremap.
>>>
>>> Wait, memremap() might return a virtual address. How we be sure that
>>> we got still physical address here?
> 
>> Before this patch, the address in smi_cmd always came from an alloc, so
>> virt_to_phys() was used to get the physical address here.  With WSMT, we
>> could be using a BIOS-provided buffer for SMI, in which case the address in
>> smi_cmd will come from memremap(), so we can't use virt_to_phys() on it.
>> So instead I changed this to use the physical address of smi_data_buf that
>> is stored in smi_data_buf_phys_addr, which will be valid regardless of how
>> the address of smi_data_buf was generated.
> 
> Yes, but what does guarantee that memremap() will return you still
> physical address?
> 

Sorry, I'm not sure I understand the question.

Up to now, this driver always just allocated a buffer from main memory that
it used to send/receive information from BIOS when it generated a SMI.  That's
what smi_cmd points to where this comment is.  And it was safe to use
virt_to_phys() on this address.

With this patch, though, the driver may now be using a buffer that isn't part
of main memory--it could now be using a buffer that BIOS provided the physical
address for, and this would not be part of main memory.  So smi_cmd may contain
a virtual address that memremap() provided.  And because memremap() is just
like ioremap(), the driver can no longer use virt_to_phys(smi_cmd) to get the
physical address of the buffer.

My comment is just pointing that out... I was trying to say, "the code can't
use virt_to_phys(smi_cmd) to get the virtual address here".

memremap() should always return a virtual address that points to the physical
address we send it (unless it fails of course).

 +   return 0;
 +
 +   /* Scan for EPS (entry point structure) */
 +   for (addr = (u8 *)__va(0xf);
 +addr < (u8 *)__va(0x10 - sizeof(struct smm_eps_table));
>>>
 +addr += 1) {
>>>
>>> This wasn't commented IIRC and changed. So, why?
> 
>> I changed this is response to your earlier comment (7 june)... you had 
>> pointed
>> out that it would be better if I put an "if (eps) break;" inside the for loop
>> instead of having "&& !eps" in the condition of the for loop.  I put the note
>> "Changed loop searching 0xf to be more readable" in the list of changes 
>> for
>> patch version v3 to cover this change.
> 
> Thanks, but here I meant += 1 vs += 16 step.
> 

Sorry, I thought I had answered this earlier.  The spec does not say that the 
EPS
table will be on a 16-byte boundary.  And I just added a printk in this driver 
to
see where it is on the system I had at hand, and it isn't on a 16-byte boundary:

[ 4680.192542] dcdbas - EPS table at 5761efb7
[ 4680.194012] dcdbas dcdbas: WSMT found, using firmware-provided SMI buffer.
[ 4680.195327] dcdbas dcdbas: Dell Systems Management Base Driver (version 
5.6.0-3.3)

[PATCH 4/10 v2] Input: ams_delta_serio: Replace power GPIO with regulator

2018-06-14 Thread Janusz Krzysztofik

Modify the driver so it no longer requests and manipulates the
"keybrd_pwr" GPIO pin but a "vcc" regulator supply instead.

For this to work with Amstrad Delta, define a regulator over the
"keybrd_pwr" GPIO pin with the "vcc" supply for ams-delta-serio device
and register it from the board file.  Both assign an absulute GPIO
number to the soon depreciated .gpio member of the regulator config
structure, and also build and register a GPIO lookup table so it is
ready for use by the regulator driver as soon as its upcoming update
is applied.

Signed-off-by: Janusz Krzysztofik 
---
Changelog:
v2: Extend the comment above error code conversion, thanks Dmitry for 
requesting that.

If you prefer me to resubmit the whole series as v2, please let me know.
In that case, I'll also add the patch which moves substitution of IRQ
handler out of the driver and submit complete v2 as soon as I get access
to the device (next weekend) and can test that extra change, otherwise
I'll submit it as a separate patch.

Thanks,
Janusz


 arch/arm/mach-omap1/board-ams-delta.c | 63 +--
 drivers/input/serio/ams_delta_serio.c | 37 +++-
 2 files changed, 89 insertions(+), 11 deletions(-)

diff --git a/arch/arm/mach-omap1/board-ams-delta.c 
b/arch/arm/mach-omap1/board-ams-delta.c
index 2119d2d3ba84..706eb2f9301d 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -509,6 +509,46 @@ static struct platform_device ams_delta_serio_device = {
.id = PLATFORM_DEVID_NONE,
 };
 
+static struct regulator_consumer_supply keybrd_pwr_consumers[] = {
+   /*
+* Initialize supply .dev_name with NULL.  It will be replaced
+* with serio dev_name() as soon as the serio device is registered.
+*/
+   REGULATOR_SUPPLY("vcc", NULL),
+};
+
+static struct regulator_init_data keybrd_pwr_initdata = {
+   .constraints= {
+   .valid_ops_mask = REGULATOR_CHANGE_STATUS,
+   },
+   .num_consumer_supplies  = ARRAY_SIZE(keybrd_pwr_consumers),
+   .consumer_supplies  = keybrd_pwr_consumers,
+};
+
+static struct fixed_voltage_config keybrd_pwr_config = {
+   .supply_name= "keybrd_pwr",
+   .microvolts = 500,
+   .gpio   = AMS_DELTA_GPIO_PIN_KEYBRD_PWR,
+   .enable_high= 1,
+   .init_data  = _pwr_initdata,
+};
+
+static struct platform_device keybrd_pwr_device = {
+   .name   = "reg-fixed-voltage",
+   .id = PLATFORM_DEVID_AUTO,
+   .dev= {
+   .platform_data  = _pwr_config,
+   },
+};
+
+static struct gpiod_lookup_table keybrd_pwr_gpio_table = {
+   .table = {
+   GPIO_LOOKUP(LATCH2_LABEL, LATCH2_PIN_KEYBRD_PWR, NULL,
+   GPIO_ACTIVE_HIGH),
+   { },
+   },
+};
+
 static struct platform_device *ams_delta_devices[] __initdata = {
_gpio_device,
_gpio_device,
@@ -526,6 +566,7 @@ static struct platform_device *late_devices[] __initdata = {
 
 static struct gpiod_lookup_table *ams_delta_gpio_tables[] __initdata = {
_delta_audio_gpio_table,
+   _pwr_gpio_table,
 };
 
 static struct gpiod_lookup_table *late_gpio_tables[] __initdata = {
@@ -566,12 +607,30 @@ static void __init ams_delta_init(void)
platform_add_devices(ams_delta_devices, ARRAY_SIZE(ams_delta_devices));
 
/*
-* As soon as devices have been registered, assign their dev_names
-* to respective GPIO lookup tables before they are added.
+* As soon as regulator consumers have been registered, assign their
+* dev_names to consumer supply entries of respective regulators.
+*/
+   keybrd_pwr_consumers[0].dev_name =
+   dev_name(_delta_serio_device.dev);
+
+   /*
+* Once consumer supply entries are populated with dev_names,
+* register regulator devices.  At this stage only the keyboard
+* power regulator has its consumer supply table fully populated.
+*/
+   platform_device_register(_pwr_device);
+
+   /*
+* As soon as GPIO consumers have been registered, assign
+* their dev_names to respective GPIO lookup tables.
 */
ams_delta_audio_gpio_table.dev_id =
dev_name(_delta_audio_device.dev);
+   keybrd_pwr_gpio_table.dev_id = dev_name(_pwr_device.dev);
 
+   /*
+* Once GPIO lookup tables are populated with dev_names, register them.
+*/
gpiod_add_lookup_tables(ams_delta_gpio_tables,
ARRAY_SIZE(ams_delta_gpio_tables));
 
diff --git a/drivers/input/serio/ams_delta_serio.c 
b/drivers/input/serio/ams_delta_serio.c
index 551a4fa73fe4..854d0d3ada52 100644
--- a/drivers/input/serio/ams_delta_serio.c
+++ b/drivers/input/serio/ams_delta_serio.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include

[PATCH 4/10 v2] Input: ams_delta_serio: Replace power GPIO with regulator

2018-06-14 Thread Janusz Krzysztofik

Modify the driver so it no longer requests and manipulates the
"keybrd_pwr" GPIO pin but a "vcc" regulator supply instead.

For this to work with Amstrad Delta, define a regulator over the
"keybrd_pwr" GPIO pin with the "vcc" supply for ams-delta-serio device
and register it from the board file.  Both assign an absulute GPIO
number to the soon depreciated .gpio member of the regulator config
structure, and also build and register a GPIO lookup table so it is
ready for use by the regulator driver as soon as its upcoming update
is applied.

Signed-off-by: Janusz Krzysztofik 
---
Changelog:
v2: Extend the comment above error code conversion, thanks Dmitry for 
requesting that.

If you prefer me to resubmit the whole series as v2, please let me know.
In that case, I'll also add the patch which moves substitution of IRQ
handler out of the driver and submit complete v2 as soon as I get access
to the device (next weekend) and can test that extra change, otherwise
I'll submit it as a separate patch.

Thanks,
Janusz


 arch/arm/mach-omap1/board-ams-delta.c | 63 +--
 drivers/input/serio/ams_delta_serio.c | 37 +++-
 2 files changed, 89 insertions(+), 11 deletions(-)

diff --git a/arch/arm/mach-omap1/board-ams-delta.c 
b/arch/arm/mach-omap1/board-ams-delta.c
index 2119d2d3ba84..706eb2f9301d 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -509,6 +509,46 @@ static struct platform_device ams_delta_serio_device = {
.id = PLATFORM_DEVID_NONE,
 };
 
+static struct regulator_consumer_supply keybrd_pwr_consumers[] = {
+   /*
+* Initialize supply .dev_name with NULL.  It will be replaced
+* with serio dev_name() as soon as the serio device is registered.
+*/
+   REGULATOR_SUPPLY("vcc", NULL),
+};
+
+static struct regulator_init_data keybrd_pwr_initdata = {
+   .constraints= {
+   .valid_ops_mask = REGULATOR_CHANGE_STATUS,
+   },
+   .num_consumer_supplies  = ARRAY_SIZE(keybrd_pwr_consumers),
+   .consumer_supplies  = keybrd_pwr_consumers,
+};
+
+static struct fixed_voltage_config keybrd_pwr_config = {
+   .supply_name= "keybrd_pwr",
+   .microvolts = 500,
+   .gpio   = AMS_DELTA_GPIO_PIN_KEYBRD_PWR,
+   .enable_high= 1,
+   .init_data  = _pwr_initdata,
+};
+
+static struct platform_device keybrd_pwr_device = {
+   .name   = "reg-fixed-voltage",
+   .id = PLATFORM_DEVID_AUTO,
+   .dev= {
+   .platform_data  = _pwr_config,
+   },
+};
+
+static struct gpiod_lookup_table keybrd_pwr_gpio_table = {
+   .table = {
+   GPIO_LOOKUP(LATCH2_LABEL, LATCH2_PIN_KEYBRD_PWR, NULL,
+   GPIO_ACTIVE_HIGH),
+   { },
+   },
+};
+
 static struct platform_device *ams_delta_devices[] __initdata = {
_gpio_device,
_gpio_device,
@@ -526,6 +566,7 @@ static struct platform_device *late_devices[] __initdata = {
 
 static struct gpiod_lookup_table *ams_delta_gpio_tables[] __initdata = {
_delta_audio_gpio_table,
+   _pwr_gpio_table,
 };
 
 static struct gpiod_lookup_table *late_gpio_tables[] __initdata = {
@@ -566,12 +607,30 @@ static void __init ams_delta_init(void)
platform_add_devices(ams_delta_devices, ARRAY_SIZE(ams_delta_devices));
 
/*
-* As soon as devices have been registered, assign their dev_names
-* to respective GPIO lookup tables before they are added.
+* As soon as regulator consumers have been registered, assign their
+* dev_names to consumer supply entries of respective regulators.
+*/
+   keybrd_pwr_consumers[0].dev_name =
+   dev_name(_delta_serio_device.dev);
+
+   /*
+* Once consumer supply entries are populated with dev_names,
+* register regulator devices.  At this stage only the keyboard
+* power regulator has its consumer supply table fully populated.
+*/
+   platform_device_register(_pwr_device);
+
+   /*
+* As soon as GPIO consumers have been registered, assign
+* their dev_names to respective GPIO lookup tables.
 */
ams_delta_audio_gpio_table.dev_id =
dev_name(_delta_audio_device.dev);
+   keybrd_pwr_gpio_table.dev_id = dev_name(_pwr_device.dev);
 
+   /*
+* Once GPIO lookup tables are populated with dev_names, register them.
+*/
gpiod_add_lookup_tables(ams_delta_gpio_tables,
ARRAY_SIZE(ams_delta_gpio_tables));
 
diff --git a/drivers/input/serio/ams_delta_serio.c 
b/drivers/input/serio/ams_delta_serio.c
index 551a4fa73fe4..854d0d3ada52 100644
--- a/drivers/input/serio/ams_delta_serio.c
+++ b/drivers/input/serio/ams_delta_serio.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include

[PATCH V4] platform/x86: intel_pmc_core: Add CNP SLPS0 debug registers

2018-06-14 Thread David E. Box

From: 

Adds debugfs access to registers in the Cannonlake PCH PMC that are
useful for debugging #SLP_S0 signal assertion and other low power
related activities. Device pm states are latched in these registers
whenever the package enters C10 and can be read from slp_s0_debug_status.
The pm states may also be latched by writing 1 to slp_s0_debug_latch
which will immediately capture the current state on the next read of
slp_s0_debug_status.

Signed-off-by: Box, David E 
---
V4:
- rename slp_s0_dbg string to slp_s0_debug for consistency
- ADD ISCLK prefix to MAIN_PLL and OC_PLL
V3:
- use null terminator in bit_map array
- replaced ternary operator with if/else
- Removed space fixes on old code
V2:
- Clear latch bit after use
- Pass pmc_dev as parameter
- Use DEFINE_SHOW_ATTRIBUTE macro
 drivers/platform/x86/intel_pmc_core.c | 120 ++
 drivers/platform/x86/intel_pmc_core.h |   6 ++
 2 files changed, 126 insertions(+)

diff --git a/drivers/platform/x86/intel_pmc_core.c 
b/drivers/platform/x86/intel_pmc_core.c
index 43bbe74..d00fee2 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -196,9 +196,67 @@ static const struct pmc_bit_map cnp_pfear_map[] = {
{}
 };
 
+static const struct pmc_bit_map cnp_slps0_dbg0_map[] = {
+   {"AUDIO_D3",BIT(0)},
+   {"OTG_D3",  BIT(1)},
+   {"XHCI_D3", BIT(2)},
+   {"LPIO_D3", BIT(3)},
+   {"SDX_D3",  BIT(4)},
+   {"SATA_D3", BIT(5)},
+   {"UFS0_D3", BIT(6)},
+   {"UFS1_D3", BIT(7)},
+   {"EMMC_D3", BIT(8)},
+   {}
+};
+
+static const struct pmc_bit_map cnp_slps0_dbg1_map[] = {
+   {"SDIO_PLL_OFF",BIT(0)},
+   {"USB2_PLL_OFF",BIT(1)},
+   {"AUDIO_PLL_OFF",   BIT(2)},
+   {"ISCLK_OC_PLL_OFF",BIT(3)},
+   {"ISCLK_MAIN_PLL_OFF",  BIT(4)},
+   {"XOSC_OFF",BIT(5)},
+   {"LPC_CLKS_GATED",  BIT(6)},
+   {"PCIE_CLKREQS_IDLE",   BIT(7)},
+   {"AUDIO_ROSC_OFF",  BIT(8)},
+   {"HPET_XOSC_CLK_REQ",   BIT(9)},
+   {"PMC_ROSC_SLOW_CLK",   BIT(10)},
+   {"AON2_ROSC_GATED", BIT(11)},
+   {"CLKACKS_DEASSERTED",  BIT(12)},
+   {}
+};
+
+static const struct pmc_bit_map cnp_slps0_dbg2_map[] = {
+   {"MPHY_CORE_GATED", BIT(0)},
+   {"CSME_GATED",  BIT(1)},
+   {"USB2_SUS_GATED",  BIT(2)},
+   {"DYN_FLEX_IO_IDLE",BIT(3)},
+   {"GBE_NO_LINK", BIT(4)},
+   {"THERM_SEN_DISABLED",  BIT(5)},
+   {"PCIE_LOW_POWER",  BIT(6)},
+   {"ISH_VNNAON_REQ_ACT",  BIT(7)},
+   {"ISH_VNN_REQ_ACT", BIT(8)},
+   {"CNV_VNNAON_REQ_ACT",  BIT(9)},
+   {"CNV_VNN_REQ_ACT", BIT(10)},
+   {"NPK_VNNON_REQ_ACT",   BIT(11)},
+   {"PMSYNC_STATE_IDLE",   BIT(12)},
+   {"ALST_GT_THRES",   BIT(13)},
+   {"PMC_ARC_PG_READY",BIT(14)},
+   {}
+};
+
+static const struct pmc_bit_map *cnp_slps0_dbg_maps[] = {
+   cnp_slps0_dbg0_map,
+   cnp_slps0_dbg1_map,
+   cnp_slps0_dbg2_map,
+   NULL,
+};
+
 static const struct pmc_reg_map cnp_reg_map = {
.pfear_sts = cnp_pfear_map,
.slp_s0_offset = CNP_PMC_SLP_S0_RES_COUNTER_OFFSET,
+   .slps0_dbg_maps = cnp_slps0_dbg_maps,
+   .slps0_dbg_offset = CNP_PMC_SLPS0_DBG_OFFSET,
.ltr_ignore_offset = CNP_PMC_LTR_IGNORE_OFFSET,
.regmap_length = CNP_PMC_MMIO_REG_LEN,
.ppfear0_offset = CNP_PMC_HOST_PPFEAR0A,
@@ -252,6 +310,8 @@ static int pmc_core_check_read_lock_bit(void)
 }
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
+static bool slps0_dbg_latch;
+
 static void pmc_core_display_map(struct seq_file *s, int index,
 u8 pf_reg, const struct pmc_bit_map *pf_map)
 {
@@ -481,6 +541,57 @@ static const struct file_operations 
pmc_core_ltr_ignore_ops = {
.release= single_release,
 };
 
+static void pmc_core_slps0_dbg_latch(struct pmc_dev *pmcdev, bool reset)
+{
+   const struct pmc_reg_map *map = pmcdev->map;
+   u32 fd;
+
+   mutex_lock(>lock);
+
+   if (!reset && !slps0_dbg_latch)
+   goto out_unlock;
+
+   fd = pmc_core_reg_read(pmcdev, map->slps0_dbg_offset);
+   if (reset)
+   fd &= ~CNP_PMC_LATCH_SLPS0_EVENTS;
+   else
+   fd |= CNP_PMC_LATCH_SLPS0_EVENTS;
+   pmc_core_reg_write(pmcdev, map->slps0_dbg_offset, fd);
+
+   slps0_dbg_latch = 0;
+
+out_unlock:
+   mutex_unlock(>lock);
+}
+
+static int pmc_core_slps0_dbg_show(struct seq_file *s, void *unused)
+{
+   struct pmc_dev *pmcdev = s->private;
+   const struct pmc_bit_map **maps = pmcdev->map->slps0_dbg_maps;
+   const struct pmc_bit_map *map;
+   int offset;
+   u32 data;
+
+   pmc_core_slps0_dbg_latch(pmcdev, false);
+   offset =

[PATCH V4] platform/x86: intel_pmc_core: Add CNP SLPS0 debug registers

2018-06-14 Thread David E. Box

From: 

Adds debugfs access to registers in the Cannonlake PCH PMC that are
useful for debugging #SLP_S0 signal assertion and other low power
related activities. Device pm states are latched in these registers
whenever the package enters C10 and can be read from slp_s0_debug_status.
The pm states may also be latched by writing 1 to slp_s0_debug_latch
which will immediately capture the current state on the next read of
slp_s0_debug_status.

Signed-off-by: Box, David E 
---
V4:
- rename slp_s0_dbg string to slp_s0_debug for consistency
- ADD ISCLK prefix to MAIN_PLL and OC_PLL
V3:
- use null terminator in bit_map array
- replaced ternary operator with if/else
- Removed space fixes on old code
V2:
- Clear latch bit after use
- Pass pmc_dev as parameter
- Use DEFINE_SHOW_ATTRIBUTE macro
 drivers/platform/x86/intel_pmc_core.c | 120 ++
 drivers/platform/x86/intel_pmc_core.h |   6 ++
 2 files changed, 126 insertions(+)

diff --git a/drivers/platform/x86/intel_pmc_core.c 
b/drivers/platform/x86/intel_pmc_core.c
index 43bbe74..d00fee2 100644
--- a/drivers/platform/x86/intel_pmc_core.c
+++ b/drivers/platform/x86/intel_pmc_core.c
@@ -196,9 +196,67 @@ static const struct pmc_bit_map cnp_pfear_map[] = {
{}
 };
 
+static const struct pmc_bit_map cnp_slps0_dbg0_map[] = {
+   {"AUDIO_D3",BIT(0)},
+   {"OTG_D3",  BIT(1)},
+   {"XHCI_D3", BIT(2)},
+   {"LPIO_D3", BIT(3)},
+   {"SDX_D3",  BIT(4)},
+   {"SATA_D3", BIT(5)},
+   {"UFS0_D3", BIT(6)},
+   {"UFS1_D3", BIT(7)},
+   {"EMMC_D3", BIT(8)},
+   {}
+};
+
+static const struct pmc_bit_map cnp_slps0_dbg1_map[] = {
+   {"SDIO_PLL_OFF",BIT(0)},
+   {"USB2_PLL_OFF",BIT(1)},
+   {"AUDIO_PLL_OFF",   BIT(2)},
+   {"ISCLK_OC_PLL_OFF",BIT(3)},
+   {"ISCLK_MAIN_PLL_OFF",  BIT(4)},
+   {"XOSC_OFF",BIT(5)},
+   {"LPC_CLKS_GATED",  BIT(6)},
+   {"PCIE_CLKREQS_IDLE",   BIT(7)},
+   {"AUDIO_ROSC_OFF",  BIT(8)},
+   {"HPET_XOSC_CLK_REQ",   BIT(9)},
+   {"PMC_ROSC_SLOW_CLK",   BIT(10)},
+   {"AON2_ROSC_GATED", BIT(11)},
+   {"CLKACKS_DEASSERTED",  BIT(12)},
+   {}
+};
+
+static const struct pmc_bit_map cnp_slps0_dbg2_map[] = {
+   {"MPHY_CORE_GATED", BIT(0)},
+   {"CSME_GATED",  BIT(1)},
+   {"USB2_SUS_GATED",  BIT(2)},
+   {"DYN_FLEX_IO_IDLE",BIT(3)},
+   {"GBE_NO_LINK", BIT(4)},
+   {"THERM_SEN_DISABLED",  BIT(5)},
+   {"PCIE_LOW_POWER",  BIT(6)},
+   {"ISH_VNNAON_REQ_ACT",  BIT(7)},
+   {"ISH_VNN_REQ_ACT", BIT(8)},
+   {"CNV_VNNAON_REQ_ACT",  BIT(9)},
+   {"CNV_VNN_REQ_ACT", BIT(10)},
+   {"NPK_VNNON_REQ_ACT",   BIT(11)},
+   {"PMSYNC_STATE_IDLE",   BIT(12)},
+   {"ALST_GT_THRES",   BIT(13)},
+   {"PMC_ARC_PG_READY",BIT(14)},
+   {}
+};
+
+static const struct pmc_bit_map *cnp_slps0_dbg_maps[] = {
+   cnp_slps0_dbg0_map,
+   cnp_slps0_dbg1_map,
+   cnp_slps0_dbg2_map,
+   NULL,
+};
+
 static const struct pmc_reg_map cnp_reg_map = {
.pfear_sts = cnp_pfear_map,
.slp_s0_offset = CNP_PMC_SLP_S0_RES_COUNTER_OFFSET,
+   .slps0_dbg_maps = cnp_slps0_dbg_maps,
+   .slps0_dbg_offset = CNP_PMC_SLPS0_DBG_OFFSET,
.ltr_ignore_offset = CNP_PMC_LTR_IGNORE_OFFSET,
.regmap_length = CNP_PMC_MMIO_REG_LEN,
.ppfear0_offset = CNP_PMC_HOST_PPFEAR0A,
@@ -252,6 +310,8 @@ static int pmc_core_check_read_lock_bit(void)
 }
 
 #if IS_ENABLED(CONFIG_DEBUG_FS)
+static bool slps0_dbg_latch;
+
 static void pmc_core_display_map(struct seq_file *s, int index,
 u8 pf_reg, const struct pmc_bit_map *pf_map)
 {
@@ -481,6 +541,57 @@ static const struct file_operations 
pmc_core_ltr_ignore_ops = {
.release= single_release,
 };
 
+static void pmc_core_slps0_dbg_latch(struct pmc_dev *pmcdev, bool reset)
+{
+   const struct pmc_reg_map *map = pmcdev->map;
+   u32 fd;
+
+   mutex_lock(>lock);
+
+   if (!reset && !slps0_dbg_latch)
+   goto out_unlock;
+
+   fd = pmc_core_reg_read(pmcdev, map->slps0_dbg_offset);
+   if (reset)
+   fd &= ~CNP_PMC_LATCH_SLPS0_EVENTS;
+   else
+   fd |= CNP_PMC_LATCH_SLPS0_EVENTS;
+   pmc_core_reg_write(pmcdev, map->slps0_dbg_offset, fd);
+
+   slps0_dbg_latch = 0;
+
+out_unlock:
+   mutex_unlock(>lock);
+}
+
+static int pmc_core_slps0_dbg_show(struct seq_file *s, void *unused)
+{
+   struct pmc_dev *pmcdev = s->private;
+   const struct pmc_bit_map **maps = pmcdev->map->slps0_dbg_maps;
+   const struct pmc_bit_map *map;
+   int offset;
+   u32 data;
+
+   pmc_core_slps0_dbg_latch(pmcdev, false);
+   offset =

Re: [PATCH 3/3] x86/mce: Check for alternate indication of machine check recovery on Skylake

2018-06-14 Thread Luck, Tony

On Thu, Jun 07, 2018 at 10:24:46PM +0200, Borislav Petkov wrote:
> tglx just took 1 and 3, 2/3 had a minor issue but the merge window
> happened so I'll send it later. It is nice to have anyway.

Did you fix up part 2/3?  I see 1 & 3 were staged by Thomas in
TIP ras/urgent and ras-urgent-for-linus but haven't gone into
Linus' tree yet.

-Tony

Re: [PATCH 3/3] x86/mce: Check for alternate indication of machine check recovery on Skylake

2018-06-14 Thread Luck, Tony

On Thu, Jun 07, 2018 at 10:24:46PM +0200, Borislav Petkov wrote:
> tglx just took 1 and 3, 2/3 had a minor issue but the merge window
> happened so I'll send it later. It is nice to have anyway.

Did you fix up part 2/3?  I see 1 & 3 were staged by Thomas in
TIP ras/urgent and ras-urgent-for-linus but haven't gone into
Linus' tree yet.

-Tony

[PATCH v13 1/8] soc: qcom: Separate kryo l2 accessors from PMU driver

2018-06-14 Thread ilia . lin

From: Ilia Lin 

The driver provides kernel level API for other drivers
to access the MSM8996 L2 cache registers.
Separating the L2 access code from the PMU driver and
making it public to allow other drivers use it.
The accesses must be separated with a single spinlock,
maintained in this driver.

Signed-off-by: Ilia Lin 
Reviewed-by: Amit Kucheria 
Tested-by: Amit Kucheria 
---
 drivers/perf/Kconfig |  1 +
 drivers/perf/qcom_l2_pmu.c   | 90 ++--
 drivers/soc/qcom/Kconfig |  3 ++
 drivers/soc/qcom/Makefile|  1 +
 drivers/soc/qcom/kryo-l2-accessors.c | 56 ++
 include/soc/qcom/kryo-l2-accessors.h | 12 +
 6 files changed, 97 insertions(+), 66 deletions(-)
 create mode 100644 drivers/soc/qcom/kryo-l2-accessors.c
 create mode 100644 include/soc/qcom/kryo-l2-accessors.h

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 08ebaf7cca8b..5c3f07cd79f4 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -71,6 +71,7 @@ config HISI_PMU
 config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && ACPI
+   select QCOM_KRYO_L2_ACCESSORS
  help
  Provides support for the L2 cache performance monitor unit (PMU)
  in Qualcomm Technologies processors.
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index 842135cf35a3..cc31f5162942 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define MAX_L2_CTRS 9
 
@@ -87,8 +88,6 @@
 #define L2_COUNTER_RELOAD   BIT_ULL(31)
 #define L2_CYCLE_COUNTER_RELOAD BIT_ULL(63)
 
-#define L2CPUSRSELR_EL1 sys_reg(3, 3, 15, 0, 6)
-#define L2CPUSRDR_EL1   sys_reg(3, 3, 15, 0, 7)
 
 #define reg_idx(reg, i) (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
 
@@ -107,48 +106,7 @@
 #define L2_EVENT_STREX 0x421
 #define L2_EVENT_CLREX 0x422
 
-static DEFINE_RAW_SPINLOCK(l2_access_lock);
 
-/**
- * set_l2_indirect_reg: write value to an L2 register
- * @reg: Address of L2 register.
- * @value: Value to be written to register.
- *
- * Use architecturally required barriers for ordering between system register
- * accesses
- */
-static void set_l2_indirect_reg(u64 reg, u64 val)
-{
-   unsigned long flags;
-
-   raw_spin_lock_irqsave(_access_lock, flags);
-   write_sysreg_s(reg, L2CPUSRSELR_EL1);
-   isb();
-   write_sysreg_s(val, L2CPUSRDR_EL1);
-   isb();
-   raw_spin_unlock_irqrestore(_access_lock, flags);
-}
-
-/**
- * get_l2_indirect_reg: read an L2 register value
- * @reg: Address of L2 register.
- *
- * Use architecturally required barriers for ordering between system register
- * accesses
- */
-static u64 get_l2_indirect_reg(u64 reg)
-{
-   u64 val;
-   unsigned long flags;
-
-   raw_spin_lock_irqsave(_access_lock, flags);
-   write_sysreg_s(reg, L2CPUSRSELR_EL1);
-   isb();
-   val = read_sysreg_s(L2CPUSRDR_EL1);
-   raw_spin_unlock_irqrestore(_access_lock, flags);
-
-   return val;
-}
 
 struct cluster_pmu;
 
@@ -219,28 +177,28 @@ static inline struct cluster_pmu *get_cluster_pmu(
 static void cluster_pmu_reset(void)
 {
/* Reset all counters */
-   set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
-   set_l2_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask);
-   set_l2_indirect_reg(L2PMINTENCLR, l2_counter_present_mask);
-   set_l2_indirect_reg(L2PMOVSCLR, l2_counter_present_mask);
+   kryo_l2_set_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
+   kryo_l2_set_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask);
+   kryo_l2_set_indirect_reg(L2PMINTENCLR, l2_counter_present_mask);
+   kryo_l2_set_indirect_reg(L2PMOVSCLR, l2_counter_present_mask);
 }
 
 static inline void cluster_pmu_enable(void)
 {
-   set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE);
+   kryo_l2_set_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE);
 }
 
 static inline void cluster_pmu_disable(void)
 {
-   set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE);
+   kryo_l2_set_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE);
 }
 
 static inline void cluster_pmu_counter_set_value(u32 idx, u64 value)
 {
if (idx == l2_cycle_ctr_idx)
-   set_l2_indirect_reg(L2PMCCNTR, value);
+   kryo_l2_set_indirect_reg(L2PMCCNTR, value);
else
-   set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value);
+   kryo_l2_set_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value);
 }
 
 static inline u64 cluster_pmu_counter_get_value(u32 idx)
@@ -248,46 +206,46 @@ static inline u64 cluster_pmu_counter_get_value(u32 idx)
u64 value;
 
if (idx == l2_cycle_ctr_idx)
-   value = get_l2_indirect_reg(L2PMCCNTR);
+   value = kryo_l2_get_indirect_reg(L2PMCCNTR);
else
-

[PATCH v13 1/8] soc: qcom: Separate kryo l2 accessors from PMU driver

2018-06-14 Thread ilia . lin

From: Ilia Lin 

The driver provides kernel level API for other drivers
to access the MSM8996 L2 cache registers.
Separating the L2 access code from the PMU driver and
making it public to allow other drivers use it.
The accesses must be separated with a single spinlock,
maintained in this driver.

Signed-off-by: Ilia Lin 
Reviewed-by: Amit Kucheria 
Tested-by: Amit Kucheria 
---
 drivers/perf/Kconfig |  1 +
 drivers/perf/qcom_l2_pmu.c   | 90 ++--
 drivers/soc/qcom/Kconfig |  3 ++
 drivers/soc/qcom/Makefile|  1 +
 drivers/soc/qcom/kryo-l2-accessors.c | 56 ++
 include/soc/qcom/kryo-l2-accessors.h | 12 +
 6 files changed, 97 insertions(+), 66 deletions(-)
 create mode 100644 drivers/soc/qcom/kryo-l2-accessors.c
 create mode 100644 include/soc/qcom/kryo-l2-accessors.h

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 08ebaf7cca8b..5c3f07cd79f4 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -71,6 +71,7 @@ config HISI_PMU
 config QCOM_L2_PMU
bool "Qualcomm Technologies L2-cache PMU"
depends on ARCH_QCOM && ARM64 && ACPI
+   select QCOM_KRYO_L2_ACCESSORS
  help
  Provides support for the L2 cache performance monitor unit (PMU)
  in Qualcomm Technologies processors.
diff --git a/drivers/perf/qcom_l2_pmu.c b/drivers/perf/qcom_l2_pmu.c
index 842135cf35a3..cc31f5162942 100644
--- a/drivers/perf/qcom_l2_pmu.c
+++ b/drivers/perf/qcom_l2_pmu.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define MAX_L2_CTRS 9
 
@@ -87,8 +88,6 @@
 #define L2_COUNTER_RELOAD   BIT_ULL(31)
 #define L2_CYCLE_COUNTER_RELOAD BIT_ULL(63)
 
-#define L2CPUSRSELR_EL1 sys_reg(3, 3, 15, 0, 6)
-#define L2CPUSRDR_EL1   sys_reg(3, 3, 15, 0, 7)
 
 #define reg_idx(reg, i) (((i) * IA_L2_REG_OFFSET) + reg##_BASE)
 
@@ -107,48 +106,7 @@
 #define L2_EVENT_STREX 0x421
 #define L2_EVENT_CLREX 0x422
 
-static DEFINE_RAW_SPINLOCK(l2_access_lock);
 
-/**
- * set_l2_indirect_reg: write value to an L2 register
- * @reg: Address of L2 register.
- * @value: Value to be written to register.
- *
- * Use architecturally required barriers for ordering between system register
- * accesses
- */
-static void set_l2_indirect_reg(u64 reg, u64 val)
-{
-   unsigned long flags;
-
-   raw_spin_lock_irqsave(_access_lock, flags);
-   write_sysreg_s(reg, L2CPUSRSELR_EL1);
-   isb();
-   write_sysreg_s(val, L2CPUSRDR_EL1);
-   isb();
-   raw_spin_unlock_irqrestore(_access_lock, flags);
-}
-
-/**
- * get_l2_indirect_reg: read an L2 register value
- * @reg: Address of L2 register.
- *
- * Use architecturally required barriers for ordering between system register
- * accesses
- */
-static u64 get_l2_indirect_reg(u64 reg)
-{
-   u64 val;
-   unsigned long flags;
-
-   raw_spin_lock_irqsave(_access_lock, flags);
-   write_sysreg_s(reg, L2CPUSRSELR_EL1);
-   isb();
-   val = read_sysreg_s(L2CPUSRDR_EL1);
-   raw_spin_unlock_irqrestore(_access_lock, flags);
-
-   return val;
-}
 
 struct cluster_pmu;
 
@@ -219,28 +177,28 @@ static inline struct cluster_pmu *get_cluster_pmu(
 static void cluster_pmu_reset(void)
 {
/* Reset all counters */
-   set_l2_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
-   set_l2_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask);
-   set_l2_indirect_reg(L2PMINTENCLR, l2_counter_present_mask);
-   set_l2_indirect_reg(L2PMOVSCLR, l2_counter_present_mask);
+   kryo_l2_set_indirect_reg(L2PMCR, L2PMCR_RESET_ALL);
+   kryo_l2_set_indirect_reg(L2PMCNTENCLR, l2_counter_present_mask);
+   kryo_l2_set_indirect_reg(L2PMINTENCLR, l2_counter_present_mask);
+   kryo_l2_set_indirect_reg(L2PMOVSCLR, l2_counter_present_mask);
 }
 
 static inline void cluster_pmu_enable(void)
 {
-   set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE);
+   kryo_l2_set_indirect_reg(L2PMCR, L2PMCR_COUNTERS_ENABLE);
 }
 
 static inline void cluster_pmu_disable(void)
 {
-   set_l2_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE);
+   kryo_l2_set_indirect_reg(L2PMCR, L2PMCR_COUNTERS_DISABLE);
 }
 
 static inline void cluster_pmu_counter_set_value(u32 idx, u64 value)
 {
if (idx == l2_cycle_ctr_idx)
-   set_l2_indirect_reg(L2PMCCNTR, value);
+   kryo_l2_set_indirect_reg(L2PMCCNTR, value);
else
-   set_l2_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value);
+   kryo_l2_set_indirect_reg(reg_idx(IA_L2PMXEVCNTR, idx), value);
 }
 
 static inline u64 cluster_pmu_counter_get_value(u32 idx)
@@ -248,46 +206,46 @@ static inline u64 cluster_pmu_counter_get_value(u32 idx)
u64 value;
 
if (idx == l2_cycle_ctr_idx)
-   value = get_l2_indirect_reg(L2PMCCNTR);
+   value = kryo_l2_get_indirect_reg(L2PMCCNTR);
else
-

[PATCH v13 4/8] clk: qcom: Add CPU clock driver for msm8996

2018-06-14 Thread ilia . lin

From: Ilia Lin 

Each of the CPU clusters (Power and Perf) on msm8996 are
clocked via 2 PLLs, a primary and alternate. There are also
2 Mux'es, a primary and secondary all connected together
as shown below

 +---+
  XO |   |
  +-->0  |
 |   |
   PLL/2 | SMUX  ++
 +--->1  ||
 |   |   ||
 |   +---+|+---+
 |+>0  |
 | |   |
+---+| +--->1  | CPU clk
|Primary PLL++ PLL_EARLY   |   |   +-->
|   +--+---++-->2 PMUX |
+---+  ||  |   |
   |   +--+ |   +-->3  |
   +--^+  ACD +-+   |  +---+
+---+  +--+ |
|Alt PLL|   |
|   +---+
+---+ PLL_EARLY

The primary PLL is what drives the CPU clk, except for times
when we are reprogramming the PLL itself (for rate changes) when
we temporarily switch to an alternate PLL. A subsequent patch adds
support to switch between primary and alternate PLL during rate
changes.

The primary PLL operates on a single VCO range, between 600MHz
and 3GHz. However the CPUs do support OPPs with frequencies
between 300MHz and 600MHz. In order to support running the CPUs
at those frequencies we end up having to lock the PLL at twice
the rate and drive the CPU clk via the PLL/2 output and SMUX.

So for frequencies above 600MHz we follow the following path
 Primary PLL --> PLL_EARLY --> PMUX(1) --> CPU clk
and for frequencies between 300MHz and 600MHz we follow
 Primary PLL --> PLL/2 --> SMUX(1) --> PMUX(0) --> CPU clk
Support for this is added in a subsequent patch as well.

ACD stands for Adaptive Clock Distribution and is used to
detect voltage droops.

Signed-off-by: Rajendra Nayak 
Signed-off-by: Ilia Lin 
Tested-by: Amit Kucheria 
---
 drivers/clk/qcom/Kconfig |  10 +
 drivers/clk/qcom/Makefile|   1 +
 drivers/clk/qcom/clk-alpha-pll.h |   6 +
 drivers/clk/qcom/clk-cpu-8996.c  | 403 +++
 4 files changed, 420 insertions(+)
 create mode 100644 drivers/clk/qcom/clk-cpu-8996.c

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index 9c3480dcc38a..fe01df59f923 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -33,6 +33,16 @@ config QCOM_CLK_APCS_MSM8916
  Say Y if you want to support CPU frequency scaling on devices
  such as msm8916.
 
+config QCOM_CLK_APCC_MSM8996
+   tristate "MSM8996 CPU Clock Controller"
+   depends on ARM64
+   depends on COMMON_CLK_QCOM
+   select QCOM_KRYO_L2_ACCESSORS
+   help
+ Support for the CPU clock controller on msm8996 devices.
+ Say Y if you want to support CPU clock scaling using CPUfreq
+ drivers for dyanmic power management.
+
 config QCOM_CLK_RPM
tristate "RPM based Clock Controller"
depends on COMMON_CLK_QCOM && MFD_QCOM_RPM
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 762c01137c2f..d142778f6e92 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_MSM_MMCC_8974) += mmcc-msm8974.o
 obj-$(CONFIG_MSM_MMCC_8996) += mmcc-msm8996.o
 obj-$(CONFIG_QCOM_A53PLL) += a53-pll.o
 obj-$(CONFIG_QCOM_CLK_APCS_MSM8916) += apcs-msm8916.o
+obj-$(CONFIG_QCOM_CLK_APCC_MSM8996) += clk-cpu-8996.o
 obj-$(CONFIG_QCOM_CLK_RPM) += clk-rpm.o
 obj-$(CONFIG_QCOM_CLK_SMD_RPM) += clk-smd-rpm.o
 obj-$(CONFIG_SDM_GCC_845) += gcc-sdm845.o
diff --git a/drivers/clk/qcom/clk-alpha-pll.h b/drivers/clk/qcom/clk-alpha-pll.h
index f981b486c468..9ce2a32f30ab 100644
--- a/drivers/clk/qcom/clk-alpha-pll.h
+++ b/drivers/clk/qcom/clk-alpha-pll.h
@@ -50,6 +50,12 @@ struct pll_vco {
u32 val;
 };
 
+#define VCO(a, b, c) { \
+   .val = a,\
+   .min_freq = b,\
+   .max_freq = c,\
+}
+
 /**
  * struct clk_alpha_pll - phase locked loop (PLL)
  * @offset: base address of registers
diff --git a/drivers/clk/qcom/clk-cpu-8996.c b/drivers/clk/qcom/clk-cpu-8996.c
new file mode 100644
index ..d92cad93af20
--- /dev/null
+++ b/drivers/clk/qcom/clk-cpu-8996.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+/*
+ * Each of the CPU clusters (Power and Perf) on msm8996 are
+ * clocked via 2 PLLs, a primary and alternate. There are also
+ * 2 Mux'es, a primary and secondary all connected together
+ * as shown below
+ *
+ *  +---+
+ *   XO |   |
+ *

[PATCH v13 5/8] dt-bindings: clk: qcom: Add bindings for CPU clock for msm8996

2018-06-14 Thread ilia . lin

From: Ilia Lin 

Each of the CPU clusters (Power and Perf) on msm8996 are
clocked via 2 PLLs, a primary and alternate. There are also
2 Mux'es, a primary and secondary all connected together
as shown below

 +---+
  XO |   |
  +-->0  |
 |   |
   PLL/2 | SMUX  ++
 +--->1  ||
 |   |   ||
 |   +---+|+---+
 |+>0  |
 | |   |
+---+| +--->1  | CPU clk
|Primary PLL++ PLL_EARLY   |   |   +-->
|   +--+---++-->2 PMUX |
+---+  ||  |   |
   |   +--+ |   +-->3  |
   +--^+  ACD +-+   |  +---+
+---+  +--+ |
|Alt PLL|   |
|   +---+
+---+ PLL_EARLY

The primary PLL is what drives the CPU clk, except for times
when we are reprogramming the PLL itself (for rate changes) when
we temporarily switch to an alternate PLL. A subsequent patch adds
support to switch between primary and alternate PLL during rate
changes.

The primary PLL operates on a single VCO range, between 600MHz
and 3GHz. However the CPUs do support OPPs with frequencies
between 300MHz and 600MHz. In order to support running the CPUs
at those frequencies we end up having to lock the PLL at twice
the rate and drive the CPU clk via the PLL/2 output and SMUX.

Signed-off-by: Ilia Lin 
Reviewed-by: Rob Herring 
Tested-by: Amit Kucheria 
---
 Documentation/devicetree/bindings/clock/qcom,kryocc.txt | 17 +
 1 file changed, 17 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,kryocc.txt

diff --git a/Documentation/devicetree/bindings/clock/qcom,kryocc.txt 
b/Documentation/devicetree/bindings/clock/qcom,kryocc.txt
new file mode 100644
index ..8458783c5a1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,kryocc.txt
@@ -0,0 +1,17 @@
+Qualcomm CPUSS clock controller for Kryo CPUs
+
+
+Required properties :
+- compatible : shall contain only one of the following:
+
+   "qcom,msm8996-apcc"
+
+- reg : shall contain base register location and length
+- #clock-cells : shall contain 1
+
+Example:
+   kryocc: clock-controller@640 {
+   compatible = "qcom,msm8996-apcc";
+   reg = <0x640 0x9>;
+   #clock-cells = <1>;
+   };
-- 
2.11.0

[PATCH v13 4/8] clk: qcom: Add CPU clock driver for msm8996

2018-06-14 Thread ilia . lin

From: Ilia Lin 

Each of the CPU clusters (Power and Perf) on msm8996 are
clocked via 2 PLLs, a primary and alternate. There are also
2 Mux'es, a primary and secondary all connected together
as shown below

 +---+
  XO |   |
  +-->0  |
 |   |
   PLL/2 | SMUX  ++
 +--->1  ||
 |   |   ||
 |   +---+|+---+
 |+>0  |
 | |   |
+---+| +--->1  | CPU clk
|Primary PLL++ PLL_EARLY   |   |   +-->
|   +--+---++-->2 PMUX |
+---+  ||  |   |
   |   +--+ |   +-->3  |
   +--^+  ACD +-+   |  +---+
+---+  +--+ |
|Alt PLL|   |
|   +---+
+---+ PLL_EARLY

The primary PLL is what drives the CPU clk, except for times
when we are reprogramming the PLL itself (for rate changes) when
we temporarily switch to an alternate PLL. A subsequent patch adds
support to switch between primary and alternate PLL during rate
changes.

The primary PLL operates on a single VCO range, between 600MHz
and 3GHz. However the CPUs do support OPPs with frequencies
between 300MHz and 600MHz. In order to support running the CPUs
at those frequencies we end up having to lock the PLL at twice
the rate and drive the CPU clk via the PLL/2 output and SMUX.

So for frequencies above 600MHz we follow the following path
 Primary PLL --> PLL_EARLY --> PMUX(1) --> CPU clk
and for frequencies between 300MHz and 600MHz we follow
 Primary PLL --> PLL/2 --> SMUX(1) --> PMUX(0) --> CPU clk
Support for this is added in a subsequent patch as well.

ACD stands for Adaptive Clock Distribution and is used to
detect voltage droops.

Signed-off-by: Rajendra Nayak 
Signed-off-by: Ilia Lin 
Tested-by: Amit Kucheria 
---
 drivers/clk/qcom/Kconfig |  10 +
 drivers/clk/qcom/Makefile|   1 +
 drivers/clk/qcom/clk-alpha-pll.h |   6 +
 drivers/clk/qcom/clk-cpu-8996.c  | 403 +++
 4 files changed, 420 insertions(+)
 create mode 100644 drivers/clk/qcom/clk-cpu-8996.c

diff --git a/drivers/clk/qcom/Kconfig b/drivers/clk/qcom/Kconfig
index 9c3480dcc38a..fe01df59f923 100644
--- a/drivers/clk/qcom/Kconfig
+++ b/drivers/clk/qcom/Kconfig
@@ -33,6 +33,16 @@ config QCOM_CLK_APCS_MSM8916
  Say Y if you want to support CPU frequency scaling on devices
  such as msm8916.
 
+config QCOM_CLK_APCC_MSM8996
+   tristate "MSM8996 CPU Clock Controller"
+   depends on ARM64
+   depends on COMMON_CLK_QCOM
+   select QCOM_KRYO_L2_ACCESSORS
+   help
+ Support for the CPU clock controller on msm8996 devices.
+ Say Y if you want to support CPU clock scaling using CPUfreq
+ drivers for dyanmic power management.
+
 config QCOM_CLK_RPM
tristate "RPM based Clock Controller"
depends on COMMON_CLK_QCOM && MFD_QCOM_RPM
diff --git a/drivers/clk/qcom/Makefile b/drivers/clk/qcom/Makefile
index 762c01137c2f..d142778f6e92 100644
--- a/drivers/clk/qcom/Makefile
+++ b/drivers/clk/qcom/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_MSM_MMCC_8974) += mmcc-msm8974.o
 obj-$(CONFIG_MSM_MMCC_8996) += mmcc-msm8996.o
 obj-$(CONFIG_QCOM_A53PLL) += a53-pll.o
 obj-$(CONFIG_QCOM_CLK_APCS_MSM8916) += apcs-msm8916.o
+obj-$(CONFIG_QCOM_CLK_APCC_MSM8996) += clk-cpu-8996.o
 obj-$(CONFIG_QCOM_CLK_RPM) += clk-rpm.o
 obj-$(CONFIG_QCOM_CLK_SMD_RPM) += clk-smd-rpm.o
 obj-$(CONFIG_SDM_GCC_845) += gcc-sdm845.o
diff --git a/drivers/clk/qcom/clk-alpha-pll.h b/drivers/clk/qcom/clk-alpha-pll.h
index f981b486c468..9ce2a32f30ab 100644
--- a/drivers/clk/qcom/clk-alpha-pll.h
+++ b/drivers/clk/qcom/clk-alpha-pll.h
@@ -50,6 +50,12 @@ struct pll_vco {
u32 val;
 };
 
+#define VCO(a, b, c) { \
+   .val = a,\
+   .min_freq = b,\
+   .max_freq = c,\
+}
+
 /**
  * struct clk_alpha_pll - phase locked loop (PLL)
  * @offset: base address of registers
diff --git a/drivers/clk/qcom/clk-cpu-8996.c b/drivers/clk/qcom/clk-cpu-8996.c
new file mode 100644
index ..d92cad93af20
--- /dev/null
+++ b/drivers/clk/qcom/clk-cpu-8996.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+/*
+ * Each of the CPU clusters (Power and Perf) on msm8996 are
+ * clocked via 2 PLLs, a primary and alternate. There are also
+ * 2 Mux'es, a primary and secondary all connected together
+ * as shown below
+ *
+ *  +---+
+ *   XO |   |
+ *

[PATCH v13 5/8] dt-bindings: clk: qcom: Add bindings for CPU clock for msm8996

2018-06-14 Thread ilia . lin

From: Ilia Lin 

Each of the CPU clusters (Power and Perf) on msm8996 are
clocked via 2 PLLs, a primary and alternate. There are also
2 Mux'es, a primary and secondary all connected together
as shown below

 +---+
  XO |   |
  +-->0  |
 |   |
   PLL/2 | SMUX  ++
 +--->1  ||
 |   |   ||
 |   +---+|+---+
 |+>0  |
 | |   |
+---+| +--->1  | CPU clk
|Primary PLL++ PLL_EARLY   |   |   +-->
|   +--+---++-->2 PMUX |
+---+  ||  |   |
   |   +--+ |   +-->3  |
   +--^+  ACD +-+   |  +---+
+---+  +--+ |
|Alt PLL|   |
|   +---+
+---+ PLL_EARLY

The primary PLL is what drives the CPU clk, except for times
when we are reprogramming the PLL itself (for rate changes) when
we temporarily switch to an alternate PLL. A subsequent patch adds
support to switch between primary and alternate PLL during rate
changes.

The primary PLL operates on a single VCO range, between 600MHz
and 3GHz. However the CPUs do support OPPs with frequencies
between 300MHz and 600MHz. In order to support running the CPUs
at those frequencies we end up having to lock the PLL at twice
the rate and drive the CPU clk via the PLL/2 output and SMUX.

Signed-off-by: Ilia Lin 
Reviewed-by: Rob Herring 
Tested-by: Amit Kucheria 
---
 Documentation/devicetree/bindings/clock/qcom,kryocc.txt | 17 +
 1 file changed, 17 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/qcom,kryocc.txt

diff --git a/Documentation/devicetree/bindings/clock/qcom,kryocc.txt 
b/Documentation/devicetree/bindings/clock/qcom,kryocc.txt
new file mode 100644
index ..8458783c5a1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,kryocc.txt
@@ -0,0 +1,17 @@
+Qualcomm CPUSS clock controller for Kryo CPUs
+
+
+Required properties :
+- compatible : shall contain only one of the following:
+
+   "qcom,msm8996-apcc"
+
+- reg : shall contain base register location and length
+- #clock-cells : shall contain 1
+
+Example:
+   kryocc: clock-controller@640 {
+   compatible = "qcom,msm8996-apcc";
+   reg = <0x640 0x9>;
+   #clock-cells = <1>;
+   };
-- 
2.11.0

[PATCH v13 7/8] clk: qcom: cpu-8996: Add support to switch below 600Mhz

2018-06-14 Thread ilia . lin

From: Rajendra Nayak 

The CPU clock controller's primary PLL operates on a single VCO range,
between 600MHz and 3GHz. However the CPUs do support OPPs with
frequencies between 300MHz and 600MHz. In order to support running the
CPUs at those frequencies we end up having to lock the PLL at twice the
rate and drive the CPU clk via the PLL/2 output and SMUX.

So for frequencies above 600MHz we follow the following path
 Primary PLL --> PLL_EARLY --> PMUX(1) --> CPU clk
and for frequencies between 300MHz and 600MHz we follow
 Primary PLL --> PLL/2 --> SMUX(1) --> PMUX(0) --> CPU clk

Signed-off-by: Rajendra Nayak 
Signed-off-by: Ilia Lin 
Tested-by: Amit Kucheria 
---
 drivers/clk/qcom/clk-cpu-8996.c | 25 ++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/qcom/clk-cpu-8996.c b/drivers/clk/qcom/clk-cpu-8996.c
index 620fdc2266ba..ff5c0a5740d2 100644
--- a/drivers/clk/qcom/clk-cpu-8996.c
+++ b/drivers/clk/qcom/clk-cpu-8996.c
@@ -68,6 +68,8 @@ enum _pmux_input {
NUM_OF_PMUX_INPUTS
 };
 
+#define DIV_2_THRESHOLD6
+
 static const u8 prim_pll_regs[PLL_OFF_MAX_REGS] = {
[PLL_OFF_L_VAL] = 0x04,
[PLL_OFF_ALPHA_VAL] = 0x08,
@@ -95,10 +97,11 @@ static const u8 alt_pll_regs[PLL_OFF_MAX_REGS] = {
 
 static const struct alpha_pll_config hfpll_config = {
.l = 60,
-   .config_ctl_val = 0x200d4828,
+   .config_ctl_val = 0x200d4aa8,
.config_ctl_hi_val = 0x006,
.pre_div_mask = BIT(12),
.post_div_mask = 0x3 << 8,
+   .post_div_val = 0x1 << 8,
.main_output_mask = BIT(0),
.early_output_mask = BIT(3),
 };
@@ -140,7 +143,7 @@ static const struct alpha_pll_config altpll_config = {
.vco_mask = 0x3 << 20,
.config_ctl_val = 0x4001051b,
.post_div_mask = 0x3 << 8,
-   .post_div_val = 0x1,
+   .post_div_val = 0x1 << 8,
.main_output_mask = BIT(0),
.early_output_mask = BIT(3),
 };
@@ -181,6 +184,7 @@ struct clk_cpu_8996_mux {
u8  width;
struct notifier_block nb;
struct clk_hw   *pll;
+   struct clk_hw   *pll_div_2;
struct clk_regmap clkr;
 };
 
@@ -226,6 +230,13 @@ clk_cpu_8996_mux_determine_rate(struct clk_hw *hw, struct 
clk_rate_request *req)
struct clk_cpu_8996_mux *cpuclk = to_clk_cpu_8996_mux_hw(hw);
struct clk_hw *parent = cpuclk->pll;
 
+   if (cpuclk->pll_div_2 && req->rate < DIV_2_THRESHOLD) {
+   if (req->rate < (DIV_2_THRESHOLD / 2))
+   return -EINVAL;
+
+   parent = cpuclk->pll_div_2;
+   }
+
req->best_parent_rate = clk_hw_round_rate(parent, req->rate);
req->best_parent_hw = parent;
 
@@ -237,13 +248,19 @@ int cpu_clk_notifier_cb(struct notifier_block *nb, 
unsigned long event,
 {
int ret;
struct clk_cpu_8996_mux *cpuclk = to_clk_cpu_8996_mux_nb(nb);
+   struct clk_notifier_data *cnd = data;
 
switch (event) {
case PRE_RATE_CHANGE:
ret = clk_cpu_8996_mux_set_parent(>clkr.hw, ALT_INDEX);
break;
case POST_RATE_CHANGE:
-   ret = clk_cpu_8996_mux_set_parent(>clkr.hw, PLL_INDEX);
+   if (cnd->new_rate < DIV_2_THRESHOLD)
+   ret = clk_cpu_8996_mux_set_parent(>clkr.hw,
+ DIV_2_INDEX);
+   else
+   ret = clk_cpu_8996_mux_set_parent(>clkr.hw,
+ PLL_INDEX);
break;
default:
ret = 0;
@@ -295,6 +312,7 @@ static struct clk_cpu_8996_mux pwrcl_pmux = {
.shift = 0,
.width = 2,
.pll = _pll.clkr.hw,
+   .pll_div_2 = _smux.clkr.hw,
.nb.notifier_call = cpu_clk_notifier_cb,
.clkr.hw.init = &(struct clk_init_data) {
.name = "pwrcl_pmux",
@@ -315,6 +333,7 @@ static struct clk_cpu_8996_mux perfcl_pmux = {
.shift = 0,
.width = 2,
.pll = _pll.clkr.hw,
+   .pll_div_2 = _smux.clkr.hw,
.nb.notifier_call = cpu_clk_notifier_cb,
.clkr.hw.init = &(struct clk_init_data) {
.name = "perfcl_pmux",
-- 
2.11.0

[PATCH v13 7/8] clk: qcom: cpu-8996: Add support to switch below 600Mhz

2018-06-14 Thread ilia . lin

From: Rajendra Nayak 

The CPU clock controller's primary PLL operates on a single VCO range,
between 600MHz and 3GHz. However the CPUs do support OPPs with
frequencies between 300MHz and 600MHz. In order to support running the
CPUs at those frequencies we end up having to lock the PLL at twice the
rate and drive the CPU clk via the PLL/2 output and SMUX.

So for frequencies above 600MHz we follow the following path
 Primary PLL --> PLL_EARLY --> PMUX(1) --> CPU clk
and for frequencies between 300MHz and 600MHz we follow
 Primary PLL --> PLL/2 --> SMUX(1) --> PMUX(0) --> CPU clk

Signed-off-by: Rajendra Nayak 
Signed-off-by: Ilia Lin 
Tested-by: Amit Kucheria 
---
 drivers/clk/qcom/clk-cpu-8996.c | 25 ++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/clk/qcom/clk-cpu-8996.c b/drivers/clk/qcom/clk-cpu-8996.c
index 620fdc2266ba..ff5c0a5740d2 100644
--- a/drivers/clk/qcom/clk-cpu-8996.c
+++ b/drivers/clk/qcom/clk-cpu-8996.c
@@ -68,6 +68,8 @@ enum _pmux_input {
NUM_OF_PMUX_INPUTS
 };
 
+#define DIV_2_THRESHOLD6
+
 static const u8 prim_pll_regs[PLL_OFF_MAX_REGS] = {
[PLL_OFF_L_VAL] = 0x04,
[PLL_OFF_ALPHA_VAL] = 0x08,
@@ -95,10 +97,11 @@ static const u8 alt_pll_regs[PLL_OFF_MAX_REGS] = {
 
 static const struct alpha_pll_config hfpll_config = {
.l = 60,
-   .config_ctl_val = 0x200d4828,
+   .config_ctl_val = 0x200d4aa8,
.config_ctl_hi_val = 0x006,
.pre_div_mask = BIT(12),
.post_div_mask = 0x3 << 8,
+   .post_div_val = 0x1 << 8,
.main_output_mask = BIT(0),
.early_output_mask = BIT(3),
 };
@@ -140,7 +143,7 @@ static const struct alpha_pll_config altpll_config = {
.vco_mask = 0x3 << 20,
.config_ctl_val = 0x4001051b,
.post_div_mask = 0x3 << 8,
-   .post_div_val = 0x1,
+   .post_div_val = 0x1 << 8,
.main_output_mask = BIT(0),
.early_output_mask = BIT(3),
 };
@@ -181,6 +184,7 @@ struct clk_cpu_8996_mux {
u8  width;
struct notifier_block nb;
struct clk_hw   *pll;
+   struct clk_hw   *pll_div_2;
struct clk_regmap clkr;
 };
 
@@ -226,6 +230,13 @@ clk_cpu_8996_mux_determine_rate(struct clk_hw *hw, struct 
clk_rate_request *req)
struct clk_cpu_8996_mux *cpuclk = to_clk_cpu_8996_mux_hw(hw);
struct clk_hw *parent = cpuclk->pll;
 
+   if (cpuclk->pll_div_2 && req->rate < DIV_2_THRESHOLD) {
+   if (req->rate < (DIV_2_THRESHOLD / 2))
+   return -EINVAL;
+
+   parent = cpuclk->pll_div_2;
+   }
+
req->best_parent_rate = clk_hw_round_rate(parent, req->rate);
req->best_parent_hw = parent;
 
@@ -237,13 +248,19 @@ int cpu_clk_notifier_cb(struct notifier_block *nb, 
unsigned long event,
 {
int ret;
struct clk_cpu_8996_mux *cpuclk = to_clk_cpu_8996_mux_nb(nb);
+   struct clk_notifier_data *cnd = data;
 
switch (event) {
case PRE_RATE_CHANGE:
ret = clk_cpu_8996_mux_set_parent(>clkr.hw, ALT_INDEX);
break;
case POST_RATE_CHANGE:
-   ret = clk_cpu_8996_mux_set_parent(>clkr.hw, PLL_INDEX);
+   if (cnd->new_rate < DIV_2_THRESHOLD)
+   ret = clk_cpu_8996_mux_set_parent(>clkr.hw,
+ DIV_2_INDEX);
+   else
+   ret = clk_cpu_8996_mux_set_parent(>clkr.hw,
+ PLL_INDEX);
break;
default:
ret = 0;
@@ -295,6 +312,7 @@ static struct clk_cpu_8996_mux pwrcl_pmux = {
.shift = 0,
.width = 2,
.pll = _pll.clkr.hw,
+   .pll_div_2 = _smux.clkr.hw,
.nb.notifier_call = cpu_clk_notifier_cb,
.clkr.hw.init = &(struct clk_init_data) {
.name = "pwrcl_pmux",
@@ -315,6 +333,7 @@ static struct clk_cpu_8996_mux perfcl_pmux = {
.shift = 0,
.width = 2,
.pll = _pll.clkr.hw,
+   .pll_div_2 = _smux.clkr.hw,
.nb.notifier_call = cpu_clk_notifier_cb,
.clkr.hw.init = &(struct clk_init_data) {
.name = "perfcl_pmux",
-- 
2.11.0

Re: [PATCH 4.4 173/268] sched/rt: Fix rq->clock_update_flags < RQCF_ACT_SKIP warning

2018-06-14 Thread Ben Hutchings

On Mon, 2018-05-28 at 12:02 +0200, Greg Kroah-Hartman wrote:
> 4.4-stable review patch.  If anyone has any objections, please let me know.
> 
> --
> 
> From: Davidlohr Bueso 
> 
> [ Upstream commit d29a20645d5e929aa7e8616f28e5d8e1c49263ec ]
> 
> While running rt-tests' pi_stress program I got the following splat:
> 
>   rq->clock_update_flags < RQCF_ACT_SKIP
>   WARNING: CPU: 27 PID: 0 at kernel/sched/sched.h:960 
> assert_clock_updated.isra.38.part.39+0x13/0x20
> 
>   [...]
> 
>   
>   enqueue_top_rt_rq+0xf4/0x150
>   ? cpufreq_dbs_governor_start+0x170/0x170
>   sched_rt_rq_enqueue+0x65/0x80
>   sched_rt_period_timer+0x156/0x360
>   ? sched_rt_rq_enqueue+0x80/0x80
>   __hrtimer_run_queues+0xfa/0x260
>   hrtimer_interrupt+0xcb/0x220
>   smp_apic_timer_interrupt+0x62/0x120
>   apic_timer_interrupt+0xf/0x20
>   
> 
>   [...]
> 
>   do_idle+0x183/0x1e0
>   cpu_startup_entry+0x5f/0x70
>   start_secondary+0x192/0x1d0
>   secondary_startup_64+0xa5/0xb0
> 
> We can get rid of it be the "traditional" means of adding an
> update_rq_clock() call after acquiring the rq->lock in
> do_sched_rt_period_timer().
> 
> The case for the RT task throttling (which this workload also hits)
> can be ignored in that the skip_update call is actually bogus and
> quite the contrary (the request bits are removed/reverted).
> 
> By setting RQCF_UPDATED we really don't care if the skip is happening
> or not and will therefore make the assert_clock_updated() check happy.

There is no such flag or assertion in 4.4 or 4.9, so does this change
still make sense there?

Ben.

> Signed-off-by: Davidlohr Bueso 
> Reviewed-by: Matt Fleming 
> Acked-by: Peter Zijlstra (Intel) 
> Cc: Linus Torvalds 
> Cc: Mike Galbraith 
> Cc: Thomas Gleixner 
> Cc: d...@stgolabs.net
> Cc: linux-kernel@vger.kernel.org
> Cc: rost...@goodmis.org
> Link: http://lkml.kernel.org/r/20180402164954.16255-1-d...@stgolabs.net
> Signed-off-by: Ingo Molnar 
> Signed-off-by: Sasha Levin 
> Signed-off-by: Greg Kroah-Hartman 
> ---
>  kernel/sched/rt.c |2 ++
>  1 file changed, 2 insertions(+)
> 
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -822,6 +822,8 @@ static int do_sched_rt_period_timer(stru
>   struct rq *rq = rq_of_rt_rq(rt_rq);
>  
>   raw_spin_lock(>lock);
> + update_rq_clock(rq);
> +
>   if (rt_rq->rt_time) {
>   u64 runtime;
>  
> 
> 
> 
-- 
Ben Hutchings, Software Developer Codethink Ltd
https://www.codethink.co.uk/ Dale House, 35 Dale Street
 Manchester, M1 2HF, United Kingdom

[PATCH v13 2/8] clk: qcom: Make clk_alpha_pll_configure available to modules

2018-06-14 Thread ilia . lin

From: Rajendra Nayak 

Allow clk_alpha_pll_configure to be called from loadable
kernel modules.

Signed-off-by: Rajendra Nayak 
Signed-off-by: Ilia Lin 
Tested-by: Amit Kucheria 
---
 drivers/clk/qcom/clk-alpha-pll.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c
index 3c49a60072f1..a43f80ac94a4 100644
--- a/drivers/clk/qcom/clk-alpha-pll.c
+++ b/drivers/clk/qcom/clk-alpha-pll.c
@@ -228,6 +228,7 @@ void clk_alpha_pll_configure(struct clk_alpha_pll *pll, 
struct regmap *regmap,
if (pll->flags & SUPPORTS_FSM_MODE)
qcom_pll_set_fsm_mode(regmap, PLL_MODE(pll), 6, 0);
 }
+EXPORT_SYMBOL_GPL(clk_alpha_pll_configure);
 
 static int clk_alpha_pll_hwfsm_enable(struct clk_hw *hw)
 {
-- 
2.11.0

[PATCH v13 8/8] clk: qcom: Add ACD path to CPU clock driver for msm8996

2018-06-14 Thread ilia . lin

From: Ilia Lin 

The PMUX for each duplex allows for selection of ACD clock source.
The DVM (Dynamic Variation Monitor) will flag an error
when a voltage droop event is detected. This flagged error
enables ACD to provide a div-by-2 clock, sourced from the primary PLL.
The duplex will be provided the divided clock
until a pre-programmed delay has expired.

This change configures ACD during the probe and switches
the PMUXes to the ACD clock source.

Signed-off-by: Ilia Lin 
Tested-by: Amit Kucheria 
---
 drivers/clk/qcom/clk-cpu-8996.c | 75 +++--
 1 file changed, 65 insertions(+), 10 deletions(-)

diff --git a/drivers/clk/qcom/clk-cpu-8996.c b/drivers/clk/qcom/clk-cpu-8996.c
index ff5c0a5740d2..0a908d849cda 100644
--- a/drivers/clk/qcom/clk-cpu-8996.c
+++ b/drivers/clk/qcom/clk-cpu-8996.c
@@ -53,9 +53,11 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 #include "clk-alpha-pll.h"
 #include "clk-regmap.h"
@@ -69,6 +71,11 @@ enum _pmux_input {
 };
 
 #define DIV_2_THRESHOLD6
+#define PWRCL_REG_OFFSET 0x0
+#define PERFCL_REG_OFFSET 0x8
+#define MUX_OFFSET 0x40
+#define ALT_PLL_OFFSET 0x100
+#define SSSCTL_OFFSET 0x160
 
 static const u8 prim_pll_regs[PLL_OFF_MAX_REGS] = {
[PLL_OFF_L_VAL] = 0x04,
@@ -107,7 +114,7 @@ static const struct alpha_pll_config hfpll_config = {
 };
 
 static struct clk_alpha_pll perfcl_pll = {
-   .offset = 0x8,
+   .offset = PERFCL_REG_OFFSET,
.regs = prim_pll_regs,
.flags = SUPPORTS_DYNAMIC_UPDATE | SUPPORTS_FSM_MODE,
.clkr.hw.init = &(struct clk_init_data){
@@ -119,7 +126,7 @@ static struct clk_alpha_pll perfcl_pll = {
 };
 
 static struct clk_alpha_pll pwrcl_pll = {
-   .offset = 0x0,
+   .offset = PWRCL_REG_OFFSET,
.regs = prim_pll_regs,
.flags = SUPPORTS_DYNAMIC_UPDATE | SUPPORTS_FSM_MODE,
.clkr.hw.init = &(struct clk_init_data){
@@ -149,7 +156,7 @@ static const struct alpha_pll_config altpll_config = {
 };
 
 static struct clk_alpha_pll perfcl_alt_pll = {
-   .offset = 0x80100,
+   .offset = PERFCL_REG_OFFSET + ALT_PLL_OFFSET,
.regs = alt_pll_regs,
.vco_table = alt_pll_vco_modes,
.num_vco = ARRAY_SIZE(alt_pll_vco_modes),
@@ -163,7 +170,7 @@ static struct clk_alpha_pll perfcl_alt_pll = {
 };
 
 static struct clk_alpha_pll pwrcl_alt_pll = {
-   .offset = 0x100,
+   .offset = PWRCL_REG_OFFSET + ALT_PLL_OFFSET,
.regs = alt_pll_regs,
.vco_table = alt_pll_vco_modes,
.num_vco = ARRAY_SIZE(alt_pll_vco_modes),
@@ -176,6 +183,9 @@ static struct clk_alpha_pll pwrcl_alt_pll = {
},
 };
 
+void __iomem *base;
+static void qcom_cpu_clk_msm8996_acd_init(void __iomem *base);
+
 /* Mux'es */
 
 struct clk_cpu_8996_mux {
@@ -253,6 +263,7 @@ int cpu_clk_notifier_cb(struct notifier_block *nb, unsigned 
long event,
switch (event) {
case PRE_RATE_CHANGE:
ret = clk_cpu_8996_mux_set_parent(>clkr.hw, ALT_INDEX);
+   qcom_cpu_clk_msm8996_acd_init(base);
break;
case POST_RATE_CHANGE:
if (cnd->new_rate < DIV_2_THRESHOLD)
@@ -260,7 +271,7 @@ int cpu_clk_notifier_cb(struct notifier_block *nb, unsigned 
long event,
  DIV_2_INDEX);
else
ret = clk_cpu_8996_mux_set_parent(>clkr.hw,
- PLL_INDEX);
+ ACD_INDEX);
break;
default:
ret = 0;
@@ -276,7 +287,7 @@ const struct clk_ops clk_cpu_8996_mux_ops = {
 };
 
 static struct clk_cpu_8996_mux pwrcl_smux = {
-   .reg = 0x40,
+   .reg = PWRCL_REG_OFFSET + MUX_OFFSET,
.shift = 2,
.width = 2,
.clkr.hw.init = &(struct clk_init_data) {
@@ -292,7 +303,7 @@ static struct clk_cpu_8996_mux pwrcl_smux = {
 };
 
 static struct clk_cpu_8996_mux perfcl_smux = {
-   .reg = 0x80040,
+   .reg = PERFCL_REG_OFFSET + MUX_OFFSET,
.shift = 2,
.width = 2,
.clkr.hw.init = &(struct clk_init_data) {
@@ -308,7 +319,7 @@ static struct clk_cpu_8996_mux perfcl_smux = {
 };
 
 static struct clk_cpu_8996_mux pwrcl_pmux = {
-   .reg = 0x40,
+   .reg = PWRCL_REG_OFFSET + MUX_OFFSET,
.shift = 0,
.width = 2,
.pll = _pll.clkr.hw,
@@ -329,7 +340,7 @@ static struct clk_cpu_8996_mux pwrcl_pmux = {
 };
 
 static struct clk_cpu_8996_mux perfcl_pmux = {
-   .reg = 0x80040,
+   .reg = PERFCL_REG_OFFSET + MUX_OFFSET,
.shift = 0,
.width = 2,
.pll = _pll.clkr.hw,
@@ -393,6 +404,10 @@ qcom_cpu_clk_msm8996_register_clks(struct device *dev, 
struct regmap *regmap)
clk_alpha_pll_configure(_alt_pll, regmap, _config);
clk_alpha_pll_configure(_alt_pll, regmap, _config);
 
+   /* Enable alt PLLs */
+

[PATCH v13 2/8] clk: qcom: Make clk_alpha_pll_configure available to modules

2018-06-14 Thread ilia . lin

From: Rajendra Nayak 

Allow clk_alpha_pll_configure to be called from loadable
kernel modules.

Signed-off-by: Rajendra Nayak 
Signed-off-by: Ilia Lin 
Tested-by: Amit Kucheria 
---
 drivers/clk/qcom/clk-alpha-pll.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c
index 3c49a60072f1..a43f80ac94a4 100644
--- a/drivers/clk/qcom/clk-alpha-pll.c
+++ b/drivers/clk/qcom/clk-alpha-pll.c
@@ -228,6 +228,7 @@ void clk_alpha_pll_configure(struct clk_alpha_pll *pll, 
struct regmap *regmap,
if (pll->flags & SUPPORTS_FSM_MODE)
qcom_pll_set_fsm_mode(regmap, PLL_MODE(pll), 6, 0);
 }
+EXPORT_SYMBOL_GPL(clk_alpha_pll_configure);
 
 static int clk_alpha_pll_hwfsm_enable(struct clk_hw *hw)
 {
-- 
2.11.0

[PATCH v13 8/8] clk: qcom: Add ACD path to CPU clock driver for msm8996

2018-06-14 Thread ilia . lin

From: Ilia Lin 

The PMUX for each duplex allows for selection of ACD clock source.
The DVM (Dynamic Variation Monitor) will flag an error
when a voltage droop event is detected. This flagged error
enables ACD to provide a div-by-2 clock, sourced from the primary PLL.
The duplex will be provided the divided clock
until a pre-programmed delay has expired.

This change configures ACD during the probe and switches
the PMUXes to the ACD clock source.

Signed-off-by: Ilia Lin 
Tested-by: Amit Kucheria 
---
 drivers/clk/qcom/clk-cpu-8996.c | 75 +++--
 1 file changed, 65 insertions(+), 10 deletions(-)

diff --git a/drivers/clk/qcom/clk-cpu-8996.c b/drivers/clk/qcom/clk-cpu-8996.c
index ff5c0a5740d2..0a908d849cda 100644
--- a/drivers/clk/qcom/clk-cpu-8996.c
+++ b/drivers/clk/qcom/clk-cpu-8996.c
@@ -53,9 +53,11 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 
 #include "clk-alpha-pll.h"
 #include "clk-regmap.h"
@@ -69,6 +71,11 @@ enum _pmux_input {
 };
 
 #define DIV_2_THRESHOLD6
+#define PWRCL_REG_OFFSET 0x0
+#define PERFCL_REG_OFFSET 0x8
+#define MUX_OFFSET 0x40
+#define ALT_PLL_OFFSET 0x100
+#define SSSCTL_OFFSET 0x160
 
 static const u8 prim_pll_regs[PLL_OFF_MAX_REGS] = {
[PLL_OFF_L_VAL] = 0x04,
@@ -107,7 +114,7 @@ static const struct alpha_pll_config hfpll_config = {
 };
 
 static struct clk_alpha_pll perfcl_pll = {
-   .offset = 0x8,
+   .offset = PERFCL_REG_OFFSET,
.regs = prim_pll_regs,
.flags = SUPPORTS_DYNAMIC_UPDATE | SUPPORTS_FSM_MODE,
.clkr.hw.init = &(struct clk_init_data){
@@ -119,7 +126,7 @@ static struct clk_alpha_pll perfcl_pll = {
 };
 
 static struct clk_alpha_pll pwrcl_pll = {
-   .offset = 0x0,
+   .offset = PWRCL_REG_OFFSET,
.regs = prim_pll_regs,
.flags = SUPPORTS_DYNAMIC_UPDATE | SUPPORTS_FSM_MODE,
.clkr.hw.init = &(struct clk_init_data){
@@ -149,7 +156,7 @@ static const struct alpha_pll_config altpll_config = {
 };
 
 static struct clk_alpha_pll perfcl_alt_pll = {
-   .offset = 0x80100,
+   .offset = PERFCL_REG_OFFSET + ALT_PLL_OFFSET,
.regs = alt_pll_regs,
.vco_table = alt_pll_vco_modes,
.num_vco = ARRAY_SIZE(alt_pll_vco_modes),
@@ -163,7 +170,7 @@ static struct clk_alpha_pll perfcl_alt_pll = {
 };
 
 static struct clk_alpha_pll pwrcl_alt_pll = {
-   .offset = 0x100,
+   .offset = PWRCL_REG_OFFSET + ALT_PLL_OFFSET,
.regs = alt_pll_regs,
.vco_table = alt_pll_vco_modes,
.num_vco = ARRAY_SIZE(alt_pll_vco_modes),
@@ -176,6 +183,9 @@ static struct clk_alpha_pll pwrcl_alt_pll = {
},
 };
 
+void __iomem *base;
+static void qcom_cpu_clk_msm8996_acd_init(void __iomem *base);
+
 /* Mux'es */
 
 struct clk_cpu_8996_mux {
@@ -253,6 +263,7 @@ int cpu_clk_notifier_cb(struct notifier_block *nb, unsigned 
long event,
switch (event) {
case PRE_RATE_CHANGE:
ret = clk_cpu_8996_mux_set_parent(>clkr.hw, ALT_INDEX);
+   qcom_cpu_clk_msm8996_acd_init(base);
break;
case POST_RATE_CHANGE:
if (cnd->new_rate < DIV_2_THRESHOLD)
@@ -260,7 +271,7 @@ int cpu_clk_notifier_cb(struct notifier_block *nb, unsigned 
long event,
  DIV_2_INDEX);
else
ret = clk_cpu_8996_mux_set_parent(>clkr.hw,
- PLL_INDEX);
+ ACD_INDEX);
break;
default:
ret = 0;
@@ -276,7 +287,7 @@ const struct clk_ops clk_cpu_8996_mux_ops = {
 };
 
 static struct clk_cpu_8996_mux pwrcl_smux = {
-   .reg = 0x40,
+   .reg = PWRCL_REG_OFFSET + MUX_OFFSET,
.shift = 2,
.width = 2,
.clkr.hw.init = &(struct clk_init_data) {
@@ -292,7 +303,7 @@ static struct clk_cpu_8996_mux pwrcl_smux = {
 };
 
 static struct clk_cpu_8996_mux perfcl_smux = {
-   .reg = 0x80040,
+   .reg = PERFCL_REG_OFFSET + MUX_OFFSET,
.shift = 2,
.width = 2,
.clkr.hw.init = &(struct clk_init_data) {
@@ -308,7 +319,7 @@ static struct clk_cpu_8996_mux perfcl_smux = {
 };
 
 static struct clk_cpu_8996_mux pwrcl_pmux = {
-   .reg = 0x40,
+   .reg = PWRCL_REG_OFFSET + MUX_OFFSET,
.shift = 0,
.width = 2,
.pll = _pll.clkr.hw,
@@ -329,7 +340,7 @@ static struct clk_cpu_8996_mux pwrcl_pmux = {
 };
 
 static struct clk_cpu_8996_mux perfcl_pmux = {
-   .reg = 0x80040,
+   .reg = PERFCL_REG_OFFSET + MUX_OFFSET,
.shift = 0,
.width = 2,
.pll = _pll.clkr.hw,
@@ -393,6 +404,10 @@ qcom_cpu_clk_msm8996_register_clks(struct device *dev, 
struct regmap *regmap)
clk_alpha_pll_configure(_alt_pll, regmap, _config);
clk_alpha_pll_configure(_alt_pll, regmap, _config);
 
+   /* Enable alt PLLs */
+

Re: [PATCH 4.4 173/268] sched/rt: Fix rq->clock_update_flags < RQCF_ACT_SKIP warning

2018-06-14 Thread Ben Hutchings

On Mon, 2018-05-28 at 12:02 +0200, Greg Kroah-Hartman wrote:
> 4.4-stable review patch.  If anyone has any objections, please let me know.
> 
> --
> 
> From: Davidlohr Bueso 
> 
> [ Upstream commit d29a20645d5e929aa7e8616f28e5d8e1c49263ec ]
> 
> While running rt-tests' pi_stress program I got the following splat:
> 
>   rq->clock_update_flags < RQCF_ACT_SKIP
>   WARNING: CPU: 27 PID: 0 at kernel/sched/sched.h:960 
> assert_clock_updated.isra.38.part.39+0x13/0x20
> 
>   [...]
> 
>   
>   enqueue_top_rt_rq+0xf4/0x150
>   ? cpufreq_dbs_governor_start+0x170/0x170
>   sched_rt_rq_enqueue+0x65/0x80
>   sched_rt_period_timer+0x156/0x360
>   ? sched_rt_rq_enqueue+0x80/0x80
>   __hrtimer_run_queues+0xfa/0x260
>   hrtimer_interrupt+0xcb/0x220
>   smp_apic_timer_interrupt+0x62/0x120
>   apic_timer_interrupt+0xf/0x20
>   
> 
>   [...]
> 
>   do_idle+0x183/0x1e0
>   cpu_startup_entry+0x5f/0x70
>   start_secondary+0x192/0x1d0
>   secondary_startup_64+0xa5/0xb0
> 
> We can get rid of it be the "traditional" means of adding an
> update_rq_clock() call after acquiring the rq->lock in
> do_sched_rt_period_timer().
> 
> The case for the RT task throttling (which this workload also hits)
> can be ignored in that the skip_update call is actually bogus and
> quite the contrary (the request bits are removed/reverted).
> 
> By setting RQCF_UPDATED we really don't care if the skip is happening
> or not and will therefore make the assert_clock_updated() check happy.

There is no such flag or assertion in 4.4 or 4.9, so does this change
still make sense there?

Ben.

> Signed-off-by: Davidlohr Bueso 
> Reviewed-by: Matt Fleming 
> Acked-by: Peter Zijlstra (Intel) 
> Cc: Linus Torvalds 
> Cc: Mike Galbraith 
> Cc: Thomas Gleixner 
> Cc: d...@stgolabs.net
> Cc: linux-kernel@vger.kernel.org
> Cc: rost...@goodmis.org
> Link: http://lkml.kernel.org/r/20180402164954.16255-1-d...@stgolabs.net
> Signed-off-by: Ingo Molnar 
> Signed-off-by: Sasha Levin 
> Signed-off-by: Greg Kroah-Hartman 
> ---
>  kernel/sched/rt.c |2 ++
>  1 file changed, 2 insertions(+)
> 
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -822,6 +822,8 @@ static int do_sched_rt_period_timer(stru
>   struct rq *rq = rq_of_rt_rq(rt_rq);
>  
>   raw_spin_lock(>lock);
> + update_rq_clock(rq);
> +
>   if (rt_rq->rt_time) {
>   u64 runtime;
>  
> 
> 
> 
-- 
Ben Hutchings, Software Developer Codethink Ltd
https://www.codethink.co.uk/ Dale House, 35 Dale Street
 Manchester, M1 2HF, United Kingdom

< 1 2 3 4 5 6 7 8 9 10 >

101 - 200 of 1344 matches

Mail list logo