Re: Suspicious error for CMA stress test

2016-03-02 Thread Hanjun Guo
On 2016/3/3 15:42, Joonsoo Kim wrote:
> 2016-03-03 10:25 GMT+09:00 Laura Abbott :
>> (cc -mm and Joonsoo Kim)
>>
>>
>> On 03/02/2016 05:52 AM, Hanjun Guo wrote:
>>> Hi,
>>>
>>> I came across a suspicious error for CMA stress test:
>>>
>>> Before the test, I got:
>>> -bash-4.3# cat /proc/meminfo | grep Cma
>>> CmaTotal: 204800 kB
>>> CmaFree:  195044 kB
>>>
>>>
>>> After running the test:
>>> -bash-4.3# cat /proc/meminfo | grep Cma
>>> CmaTotal: 204800 kB
>>> CmaFree: 6602584 kB
>>>
>>> So the freed CMA memory is more than total..
>>>
>>> Also the the MemFree is more than mem total:
>>>
>>> -bash-4.3# cat /proc/meminfo
>>> MemTotal:   16342016 kB
>>> MemFree:22367268 kB
>>> MemAvailable:   22370528 kB
>>>
>>> Here is the kernel module doing the stress test below (if the test case
>>> is wrong, correct me), any help would be great appreciated.
>>>
>>> The test is running on ARM64 platform (hisilicon D02) with 4.4 kernel, I
>>> think
>>> the 4.5-rc is the same as I didn't notice the updates for it.
>>>
>>> int malloc_dma(void *data)
>>> {
>>>  void *vaddr;
>>>  struct platform_device * pdev=(struct platform_device*)data;
>>>  dma_addr_t dma_handle;
>>>  int i;
>>>
>>>  for(i=0; i<1000; i++) {
>>>  vaddr=dma_alloc_coherent(>dev, malloc_size, _handle,
>>> GFP_KERNEL);
>>>  if (!vaddr)
>>>  pr_err("alloc cma memory failed!\n");
>>>
>>>  mdelay(1);
>>>
>>>  if (vaddr)
>>>  dma_free_coherent(>dev,malloc_size,vaddr,
>>> dma_handle);
>>>  }
>>>  pr_info("alloc free cma memory success return!\n");
>>>  return 0;
>>> }
>>>
>>> static int dma_alloc_coherent_init(struct platform_device *pdev)
>>> {
>>>  int i;
>>>
>>>  for(i=0; i<100; i++)   {
>>>  task[i] = kthread_create(malloc_dma,pdev,"malloc_dma_%d",i);
>>>  if(!task[i]) {
>>>  printk("kthread_create faile %d\n",i);
>>>  continue;
>>>  }
>>>  wake_up_process(task[i]);
>>>  }
>>>  return 0;
>>> }
>>>
>>> Thanks
>>> Hanjun
>>>
>>> The whole /proc/meminfo:
>>>
>>> -bash-4.3# cat /proc/meminfo
>>> MemTotal:   16342016 kB
>>> MemFree:22367268 kB
>>> MemAvailable:   22370528 kB
>>> Buffers:4292 kB
>>> Cached:36444 kB
>>> SwapCached:0 kB
>>> Active:23564 kB
>>> Inactive:  25360 kB
>>> Active(anon):   8424 kB
>>> Inactive(anon):   64 kB
>>> Active(file):  15140 kB
>>> Inactive(file):25296 kB
>>> Unevictable:   0 kB
>>> Mlocked:   0 kB
>>> SwapTotal: 0 kB
>>> SwapFree:  0 kB
>>> Dirty: 0 kB
>>> Writeback: 0 kB
>>> AnonPages:  8196 kB
>>> Mapped:16448 kB
>>> Shmem:   296 kB
>>> Slab:  26832 kB
>>> SReclaimable:   6300 kB
>>> SUnreclaim:20532 kB
>>> KernelStack:3088 kB
>>> PageTables:  404 kB
>>> NFS_Unstable:  0 kB
>>> Bounce:0 kB
>>> WritebackTmp:  0 kB
>>> CommitLimit: 8171008 kB
>>> Committed_AS:  34336 kB
>>> VmallocTotal:   258998208 kB
>>> VmallocUsed:   0 kB
>>> VmallocChunk:  0 kB
>>> AnonHugePages: 0 kB
>>> CmaTotal: 204800 kB
>>> CmaFree: 6602584 kB
>>> HugePages_Total:   0
>>> HugePages_Free:0
>>> HugePages_Rsvd:0
>>> HugePages_Surp:0
>>> Hugepagesize:   2048 kB
>>>
>>
>> I played with this a bit and can see the same problem. The sanity
>> check of CmaFree < CmaTotal generally triggers in
>> __move_zone_freepage_state in unset_migratetype_isolate.
>> This also seems to be present as far back as v4.0 which was the
>> first version to have the updated accounting from Joonsoo.
>> Were there known limitations with the new freepage accounting,
>> Joonsoo?
> I don't know. I also played with this and looks like there is
> accounting problem, however, for my case, number of free page is slightly less
> than total. I will take a look.
>
> Hanjun, could you tell me your malloc_size? I tested with 1 and it doesn't
> look like your case.

 The malloc_size is 1M, and with 200M total (passed via boot commandline 
cma=200M),
any more information is needed, please let me know.

Thanks for the help!
Hanjun



Re: Suspicious error for CMA stress test

2016-03-02 Thread Hanjun Guo
On 2016/3/3 15:42, Joonsoo Kim wrote:
> 2016-03-03 10:25 GMT+09:00 Laura Abbott :
>> (cc -mm and Joonsoo Kim)
>>
>>
>> On 03/02/2016 05:52 AM, Hanjun Guo wrote:
>>> Hi,
>>>
>>> I came across a suspicious error for CMA stress test:
>>>
>>> Before the test, I got:
>>> -bash-4.3# cat /proc/meminfo | grep Cma
>>> CmaTotal: 204800 kB
>>> CmaFree:  195044 kB
>>>
>>>
>>> After running the test:
>>> -bash-4.3# cat /proc/meminfo | grep Cma
>>> CmaTotal: 204800 kB
>>> CmaFree: 6602584 kB
>>>
>>> So the freed CMA memory is more than total..
>>>
>>> Also the the MemFree is more than mem total:
>>>
>>> -bash-4.3# cat /proc/meminfo
>>> MemTotal:   16342016 kB
>>> MemFree:22367268 kB
>>> MemAvailable:   22370528 kB
>>>
>>> Here is the kernel module doing the stress test below (if the test case
>>> is wrong, correct me), any help would be great appreciated.
>>>
>>> The test is running on ARM64 platform (hisilicon D02) with 4.4 kernel, I
>>> think
>>> the 4.5-rc is the same as I didn't notice the updates for it.
>>>
>>> int malloc_dma(void *data)
>>> {
>>>  void *vaddr;
>>>  struct platform_device * pdev=(struct platform_device*)data;
>>>  dma_addr_t dma_handle;
>>>  int i;
>>>
>>>  for(i=0; i<1000; i++) {
>>>  vaddr=dma_alloc_coherent(>dev, malloc_size, _handle,
>>> GFP_KERNEL);
>>>  if (!vaddr)
>>>  pr_err("alloc cma memory failed!\n");
>>>
>>>  mdelay(1);
>>>
>>>  if (vaddr)
>>>  dma_free_coherent(>dev,malloc_size,vaddr,
>>> dma_handle);
>>>  }
>>>  pr_info("alloc free cma memory success return!\n");
>>>  return 0;
>>> }
>>>
>>> static int dma_alloc_coherent_init(struct platform_device *pdev)
>>> {
>>>  int i;
>>>
>>>  for(i=0; i<100; i++)   {
>>>  task[i] = kthread_create(malloc_dma,pdev,"malloc_dma_%d",i);
>>>  if(!task[i]) {
>>>  printk("kthread_create faile %d\n",i);
>>>  continue;
>>>  }
>>>  wake_up_process(task[i]);
>>>  }
>>>  return 0;
>>> }
>>>
>>> Thanks
>>> Hanjun
>>>
>>> The whole /proc/meminfo:
>>>
>>> -bash-4.3# cat /proc/meminfo
>>> MemTotal:   16342016 kB
>>> MemFree:22367268 kB
>>> MemAvailable:   22370528 kB
>>> Buffers:4292 kB
>>> Cached:36444 kB
>>> SwapCached:0 kB
>>> Active:23564 kB
>>> Inactive:  25360 kB
>>> Active(anon):   8424 kB
>>> Inactive(anon):   64 kB
>>> Active(file):  15140 kB
>>> Inactive(file):25296 kB
>>> Unevictable:   0 kB
>>> Mlocked:   0 kB
>>> SwapTotal: 0 kB
>>> SwapFree:  0 kB
>>> Dirty: 0 kB
>>> Writeback: 0 kB
>>> AnonPages:  8196 kB
>>> Mapped:16448 kB
>>> Shmem:   296 kB
>>> Slab:  26832 kB
>>> SReclaimable:   6300 kB
>>> SUnreclaim:20532 kB
>>> KernelStack:3088 kB
>>> PageTables:  404 kB
>>> NFS_Unstable:  0 kB
>>> Bounce:0 kB
>>> WritebackTmp:  0 kB
>>> CommitLimit: 8171008 kB
>>> Committed_AS:  34336 kB
>>> VmallocTotal:   258998208 kB
>>> VmallocUsed:   0 kB
>>> VmallocChunk:  0 kB
>>> AnonHugePages: 0 kB
>>> CmaTotal: 204800 kB
>>> CmaFree: 6602584 kB
>>> HugePages_Total:   0
>>> HugePages_Free:0
>>> HugePages_Rsvd:0
>>> HugePages_Surp:0
>>> Hugepagesize:   2048 kB
>>>
>>
>> I played with this a bit and can see the same problem. The sanity
>> check of CmaFree < CmaTotal generally triggers in
>> __move_zone_freepage_state in unset_migratetype_isolate.
>> This also seems to be present as far back as v4.0 which was the
>> first version to have the updated accounting from Joonsoo.
>> Were there known limitations with the new freepage accounting,
>> Joonsoo?
> I don't know. I also played with this and looks like there is
> accounting problem, however, for my case, number of free page is slightly less
> than total. I will take a look.
>
> Hanjun, could you tell me your malloc_size? I tested with 1 and it doesn't
> look like your case.

 The malloc_size is 1M, and with 200M total (passed via boot commandline 
cma=200M),
any more information is needed, please let me know.

Thanks for the help!
Hanjun



Re: [PATCH] objtool: Disable stack validation when CROSS_COMPILE is used

2016-03-02 Thread Stephen Rothwell
Hi Sedat,

On Thu, 3 Mar 2016 08:31:57 +0100 Sedat Dilek  wrote:
>
> Does Linux next-20160303 has this patch?
> On a quick view I could not find it.

It is applied as part of the merge commit that merges the tip tree, so
there is not a separate commit for it.

-- 
Cheers,
Stephen Rothwell


Re: [PATCH] objtool: Disable stack validation when CROSS_COMPILE is used

2016-03-02 Thread Stephen Rothwell
Hi Sedat,

On Thu, 3 Mar 2016 08:31:57 +0100 Sedat Dilek  wrote:
>
> Does Linux next-20160303 has this patch?
> On a quick view I could not find it.

It is applied as part of the merge commit that merges the tip tree, so
there is not a separate commit for it.

-- 
Cheers,
Stephen Rothwell


Re: [PATCHv9 1/3] rdmacg: Added rdma cgroup controller

2016-03-02 Thread Haggai Eran
On 03/03/2016 04:49, Parav Pandit wrote:
> Hi Tejun, Haggai,
> 
> On Thu, Mar 3, 2016 at 1:28 AM, Parav Pandit  wrote:
 + rpool->refcnt--;
 + if (rpool->refcnt == 0 && rpool->num_max_cnt == 
 pool_info->table_len) {
>>>
>>> If the caller charges 2 and then uncharges 1 two times, the refcnt
>>> underflows?  Why not just track how many usages are zero?
>>>
>> This is certainly must fix bug. Changed refcnt to usage_sum and changed to do
>> usage_sum -= num during uncharging
>> and
>> usage_sum += num during charing.
> 
> This is not sufficient as css_get() and put are done only once per
> call, which leads to similar problem as of refcnt.
Are css_get_many() and css_put_many() relevant here?

> As I think more, I realised that this particular test is missing that
> resulted in this related bug, I realize that we don't have use case to
> have "num" field from the IB stack side.
> For bulk free IB stack will have to keep track of different or same
> rdmacg returned values to call uncharge() with right number of
> resources, all of that complexity just doesn't make sense and not
> required.
> So as first step to further simplify this, I am removing "num" input
> field from charge and uncharge API.

IIRC there are no instances in the RDMA subsystem today where userspace
allocates more than one resource at a time.

Yishai, in your proposed RSS patchset did you have a verb to allocate
multiple work queues at once?

Haggai



Re: [PATCHv9 1/3] rdmacg: Added rdma cgroup controller

2016-03-02 Thread Haggai Eran
On 03/03/2016 04:49, Parav Pandit wrote:
> Hi Tejun, Haggai,
> 
> On Thu, Mar 3, 2016 at 1:28 AM, Parav Pandit  wrote:
 + rpool->refcnt--;
 + if (rpool->refcnt == 0 && rpool->num_max_cnt == 
 pool_info->table_len) {
>>>
>>> If the caller charges 2 and then uncharges 1 two times, the refcnt
>>> underflows?  Why not just track how many usages are zero?
>>>
>> This is certainly must fix bug. Changed refcnt to usage_sum and changed to do
>> usage_sum -= num during uncharging
>> and
>> usage_sum += num during charing.
> 
> This is not sufficient as css_get() and put are done only once per
> call, which leads to similar problem as of refcnt.
Are css_get_many() and css_put_many() relevant here?

> As I think more, I realised that this particular test is missing that
> resulted in this related bug, I realize that we don't have use case to
> have "num" field from the IB stack side.
> For bulk free IB stack will have to keep track of different or same
> rdmacg returned values to call uncharge() with right number of
> resources, all of that complexity just doesn't make sense and not
> required.
> So as first step to further simplify this, I am removing "num" input
> field from charge and uncharge API.

IIRC there are no instances in the RDMA subsystem today where userspace
allocates more than one resource at a time.

Yishai, in your proposed RSS patchset did you have a verb to allocate
multiple work queues at once?

Haggai



Re: [PULL] NBD for 4.6

2016-03-02 Thread Markus Pargmann
Hi Jens,

On Sunday, February 21, 2016 03:01:20 PM Markus Pargmann wrote:
> Hi Jens,
> 
> This pull request contains 7 patches for 4.6.

any news on this pull request?

Best Regards,

Markus

> 
> Patch 1 fixes some unnecessarily complicated code I introduced some versions
> ago for debugfs.
> 
> Patch 2 removes the criticised signal usage within NBD to kill the NBD threads
> after a timeout. This code was used for the last years and is now replaced by
> simply killing the tcp connection.
> 
> Patches 3-6 are some smaller cleanups.
> 
> Patch 7 uevents for the userspace. This way udev/systemd can react on 
> connected
> NBD devices.
> 
> Best Regards,
> 
> Markus
> 
> 
> 
> The following changes since commit 92e963f50fc74041b5e9e744c330dca48e04f08d:
> 
>   Linux 4.5-rc1 (2016-01-24 13:06:47 -0800)
> 
> are available in the git repository at:
> 
>   git://git.pengutronix.de/git/mpa/linux-nbd.git tags/nbd-for-4.6
> 
> for you to fetch changes up to 37091fdd831f28a6509008542174ed324dd645bc:
> 
>   nbd: Create size change events for userspace (2016-02-15 10:35:47 +0100)
> 
> 
> NBD for 4.6
> 
> 
> Dan Streetman (1):
>   nbd: ratelimit error msgs after socket close
> 
> Markus Pargmann (6):
>   nbd: Fix debugfs error handling
>   nbd: Remove signal usage
>   nbd: Timeouts are not user requested disconnects
>   nbd: Cleanup reset of nbd and bdev after a disconnect
>   nbd: Move flag parsing to a function
>   nbd: Create size change events for userspace
> 
>  drivers/block/nbd.c | 335 
> ++--
>  1 file changed, 170 insertions(+), 165 deletions(-)
> 
> 

-- 
Pengutronix e.K.   | |
Industrial Linux Solutions | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


signature.asc
Description: This is a digitally signed message part.


Re: [PULL] NBD for 4.6

2016-03-02 Thread Markus Pargmann
Hi Jens,

On Sunday, February 21, 2016 03:01:20 PM Markus Pargmann wrote:
> Hi Jens,
> 
> This pull request contains 7 patches for 4.6.

any news on this pull request?

Best Regards,

Markus

> 
> Patch 1 fixes some unnecessarily complicated code I introduced some versions
> ago for debugfs.
> 
> Patch 2 removes the criticised signal usage within NBD to kill the NBD threads
> after a timeout. This code was used for the last years and is now replaced by
> simply killing the tcp connection.
> 
> Patches 3-6 are some smaller cleanups.
> 
> Patch 7 uevents for the userspace. This way udev/systemd can react on 
> connected
> NBD devices.
> 
> Best Regards,
> 
> Markus
> 
> 
> 
> The following changes since commit 92e963f50fc74041b5e9e744c330dca48e04f08d:
> 
>   Linux 4.5-rc1 (2016-01-24 13:06:47 -0800)
> 
> are available in the git repository at:
> 
>   git://git.pengutronix.de/git/mpa/linux-nbd.git tags/nbd-for-4.6
> 
> for you to fetch changes up to 37091fdd831f28a6509008542174ed324dd645bc:
> 
>   nbd: Create size change events for userspace (2016-02-15 10:35:47 +0100)
> 
> 
> NBD for 4.6
> 
> 
> Dan Streetman (1):
>   nbd: ratelimit error msgs after socket close
> 
> Markus Pargmann (6):
>   nbd: Fix debugfs error handling
>   nbd: Remove signal usage
>   nbd: Timeouts are not user requested disconnects
>   nbd: Cleanup reset of nbd and bdev after a disconnect
>   nbd: Move flag parsing to a function
>   nbd: Create size change events for userspace
> 
>  drivers/block/nbd.c | 335 
> ++--
>  1 file changed, 170 insertions(+), 165 deletions(-)
> 
> 

-- 
Pengutronix e.K.   | |
Industrial Linux Solutions | http://www.pengutronix.de/  |
Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0|
Amtsgericht Hildesheim, HRA 2686   | Fax:   +49-5121-206917- |


signature.asc
Description: This is a digitally signed message part.


Re: [PATCH v4 1/2] mm: introduce page reference manipulation functions

2016-03-02 Thread Joonsoo Kim
2016-03-03 1:44 GMT+09:00 Vlastimil Babka :
> On 02/26/2016 01:58 AM, js1...@gmail.com wrote:
>>
>> From: Joonsoo Kim 
>>
>> Success of CMA allocation largely depends on success of migration
>> and key factor of it is page reference count. Until now, page reference
>> is manipulated by direct calling atomic functions so we cannot follow up
>> who and where manipulate it. Then, it is hard to find actual reason
>> of CMA allocation failure. CMA allocation should be guaranteed to succeed
>> so finding offending place is really important.
>>
>> In this patch, call sites where page reference is manipulated are
>> converted
>> to introduced wrapper function. This is preparation step to add tracepoint
>> to each page reference manipulation function. With this facility, we can
>> easily find reason of CMA allocation failure. There is no functional
>> change
>> in this patch.
>>
>> In addition, this patch also converts reference read sites. It will help
>> a second step that renames page._count to something else and prevents
>> later
>> attempt to direct access to it (Suggested by Andrew).
>>
>> Acked-by: Michal Nazarewicz 
>> Signed-off-by: Joonsoo Kim 
>
>
> Even without Patch 2/2 this is a nice improvement.
> Acked-by: Vlastimil Babka 
>
> Although somebody might be confused by page_ref_count() vs page_count(). Oh
> well.

Yes... it was pointed by Kirill before but consistency is not the purpose of
this patchset so I skipped it. There are too many sites (roughly 100) so I'm not
sure this code churn is worth doing now. If someone think it is really
important,
I will handle it after rc2.

Thanks.


Re: [PATCH v4 1/2] mm: introduce page reference manipulation functions

2016-03-02 Thread Joonsoo Kim
2016-03-03 1:44 GMT+09:00 Vlastimil Babka :
> On 02/26/2016 01:58 AM, js1...@gmail.com wrote:
>>
>> From: Joonsoo Kim 
>>
>> Success of CMA allocation largely depends on success of migration
>> and key factor of it is page reference count. Until now, page reference
>> is manipulated by direct calling atomic functions so we cannot follow up
>> who and where manipulate it. Then, it is hard to find actual reason
>> of CMA allocation failure. CMA allocation should be guaranteed to succeed
>> so finding offending place is really important.
>>
>> In this patch, call sites where page reference is manipulated are
>> converted
>> to introduced wrapper function. This is preparation step to add tracepoint
>> to each page reference manipulation function. With this facility, we can
>> easily find reason of CMA allocation failure. There is no functional
>> change
>> in this patch.
>>
>> In addition, this patch also converts reference read sites. It will help
>> a second step that renames page._count to something else and prevents
>> later
>> attempt to direct access to it (Suggested by Andrew).
>>
>> Acked-by: Michal Nazarewicz 
>> Signed-off-by: Joonsoo Kim 
>
>
> Even without Patch 2/2 this is a nice improvement.
> Acked-by: Vlastimil Babka 
>
> Although somebody might be confused by page_ref_count() vs page_count(). Oh
> well.

Yes... it was pointed by Kirill before but consistency is not the purpose of
this patchset so I skipped it. There are too many sites (roughly 100) so I'm not
sure this code churn is worth doing now. If someone think it is really
important,
I will handle it after rc2.

Thanks.


Re: fs: uninterruptible hang in handle_userfault

2016-03-02 Thread Sedat Dilek
On 3/3/16, Linus Torvalds  wrote:
> On Mar 2, 2016 23:14, "Sedat Dilek"  wrote:
>>
>> Is that commit [1] Linux-4.5 material or affects other versions, too?
>
> Hmm. I guess this affects anything with userfaultfd.
>

OK, Linux v4.4.y LTS has userfaultfd - is affected.

Just anorganizational question:
As this commit has no CC:stable tag, how do release-manager get the
information to include that in their series of patches?

One technical question:
How do I get the latest Linux version shipped userfaultfd first?
( Maybe there exist more elegant ways I do. Always open to improve my
Git knowledge. )

- Sedat -


Re: fs: uninterruptible hang in handle_userfault

2016-03-02 Thread Sedat Dilek
On 3/3/16, Linus Torvalds  wrote:
> On Mar 2, 2016 23:14, "Sedat Dilek"  wrote:
>>
>> Is that commit [1] Linux-4.5 material or affects other versions, too?
>
> Hmm. I guess this affects anything with userfaultfd.
>

OK, Linux v4.4.y LTS has userfaultfd - is affected.

Just anorganizational question:
As this commit has no CC:stable tag, how do release-manager get the
information to include that in their series of patches?

One technical question:
How do I get the latest Linux version shipped userfaultfd first?
( Maybe there exist more elegant ways I do. Always open to improve my
Git knowledge. )

- Sedat -


[PATCH v1 00/11] mm: page migration enhancement for thp

2016-03-02 Thread Naoya Horiguchi
Hi everyone,

This patchset enhances page migration functionality to handle thp migration
for various page migration's callers:
 - mbind(2)
 - move_pages(2)
 - migrate_pages(2)
 - cgroup/cpuset migration
 - memory hotremove
 - soft offline

The main benefit is that we can avoid unnecessary thp splits, which helps us
avoid performance decrease when your applications handles NUMA optimization on
their own.

The implementation is similar to that of normal page migration, the key point
is that we modify a pmd to a pmd migration entry in swap-entry like format.
pmd_present() is not simple and it's not enough by itself to determine whether
a given pmd is a pmd migration entry. See patch 3/11 and 5/11 for details.

Here're topics which might be helpful to start discussion:

- at this point, this functionality is limited to x86_64.

- there's alrealy an implementation of thp migration in autonuma code of which
  this patchset doesn't touch anything because it works fine as it is.

- fallback to thp split: current implementation just fails a migration trial if
  thp migration fails. It's possible to retry migration after splitting the thp,
  but that's not included in this version.

Any comments or advices are welcomed.

Thanks,
Naoya Horiguchi
---
Summary:

Naoya Horiguchi (11):
  mm: mempolicy: add queue_pages_node_check()
  mm: thp: introduce CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
  mm: thp: add helpers related to thp/pmd migration
  mm: thp: enable thp migration in generic path
  mm: thp: check pmd migration entry in common path
  mm: soft-dirty: keep soft-dirty bits over thp migration
  mm: hwpoison: fix race between unpoisoning and freeing migrate source page
  mm: hwpoison: soft offline supports thp migration
  mm: mempolicy: mbind and migrate_pages support thp migration
  mm: migrate: move_pages() supports thp migration
  mm: memory_hotplug: memory hotremove supports thp migration

 arch/x86/Kconfig |   4 +
 arch/x86/include/asm/pgtable.h   |  28 ++
 arch/x86/include/asm/pgtable_64.h|   2 +
 arch/x86/include/asm/pgtable_types.h |   8 +-
 arch/x86/mm/gup.c|   3 +
 fs/proc/task_mmu.c   |  25 +++--
 include/asm-generic/pgtable.h|  34 ++-
 include/linux/huge_mm.h  |  17 
 include/linux/swapops.h  |  64 +
 mm/Kconfig   |   3 +
 mm/gup.c |   8 ++
 mm/huge_memory.c | 175 +--
 mm/memcontrol.c  |   2 +
 mm/memory-failure.c  |  41 
 mm/memory.c  |   5 +
 mm/memory_hotplug.c  |   8 ++
 mm/mempolicy.c   | 110 --
 mm/migrate.c |  57 +---
 mm/page_isolation.c  |   8 ++
 mm/rmap.c|   7 +-
 20 files changed, 527 insertions(+), 82 deletions(-)


[PATCH v1 00/11] mm: page migration enhancement for thp

2016-03-02 Thread Naoya Horiguchi
Hi everyone,

This patchset enhances page migration functionality to handle thp migration
for various page migration's callers:
 - mbind(2)
 - move_pages(2)
 - migrate_pages(2)
 - cgroup/cpuset migration
 - memory hotremove
 - soft offline

The main benefit is that we can avoid unnecessary thp splits, which helps us
avoid performance decrease when your applications handles NUMA optimization on
their own.

The implementation is similar to that of normal page migration, the key point
is that we modify a pmd to a pmd migration entry in swap-entry like format.
pmd_present() is not simple and it's not enough by itself to determine whether
a given pmd is a pmd migration entry. See patch 3/11 and 5/11 for details.

Here're topics which might be helpful to start discussion:

- at this point, this functionality is limited to x86_64.

- there's alrealy an implementation of thp migration in autonuma code of which
  this patchset doesn't touch anything because it works fine as it is.

- fallback to thp split: current implementation just fails a migration trial if
  thp migration fails. It's possible to retry migration after splitting the thp,
  but that's not included in this version.

Any comments or advices are welcomed.

Thanks,
Naoya Horiguchi
---
Summary:

Naoya Horiguchi (11):
  mm: mempolicy: add queue_pages_node_check()
  mm: thp: introduce CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION
  mm: thp: add helpers related to thp/pmd migration
  mm: thp: enable thp migration in generic path
  mm: thp: check pmd migration entry in common path
  mm: soft-dirty: keep soft-dirty bits over thp migration
  mm: hwpoison: fix race between unpoisoning and freeing migrate source page
  mm: hwpoison: soft offline supports thp migration
  mm: mempolicy: mbind and migrate_pages support thp migration
  mm: migrate: move_pages() supports thp migration
  mm: memory_hotplug: memory hotremove supports thp migration

 arch/x86/Kconfig |   4 +
 arch/x86/include/asm/pgtable.h   |  28 ++
 arch/x86/include/asm/pgtable_64.h|   2 +
 arch/x86/include/asm/pgtable_types.h |   8 +-
 arch/x86/mm/gup.c|   3 +
 fs/proc/task_mmu.c   |  25 +++--
 include/asm-generic/pgtable.h|  34 ++-
 include/linux/huge_mm.h  |  17 
 include/linux/swapops.h  |  64 +
 mm/Kconfig   |   3 +
 mm/gup.c |   8 ++
 mm/huge_memory.c | 175 +--
 mm/memcontrol.c  |   2 +
 mm/memory-failure.c  |  41 
 mm/memory.c  |   5 +
 mm/memory_hotplug.c  |   8 ++
 mm/mempolicy.c   | 110 --
 mm/migrate.c |  57 +---
 mm/page_isolation.c  |   8 ++
 mm/rmap.c|   7 +-
 20 files changed, 527 insertions(+), 82 deletions(-)


[PATCH v1 04/11] mm: thp: enable thp migration in generic path

2016-03-02 Thread Naoya Horiguchi
This patch makes it possible to support thp migration gradually. If you fail
to allocate a destination page as a thp, you just split the source thp as we
do now, and then enter the normal page migration. If you succeed to allocate
destination thp, you enter thp migration. Subsequent patches actually enable
thp migration for each caller of page migration by allowing its get_new_page()
callback to allocate thps.

Signed-off-by: Naoya Horiguchi 
---
 mm/migrate.c | 2 +-
 mm/rmap.c| 7 +--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
index 14164f6..bd8bfa4 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
@@ -969,7 +969,7 @@ static ICE_noinline int unmap_and_move(new_page_t 
get_new_page,
goto out;
}
 
-   if (unlikely(PageTransHuge(page))) {
+   if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
lock_page(page);
rc = split_huge_page(page);
unlock_page(page);
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/rmap.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/rmap.c
index 02f0bfc..49198b8 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/rmap.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/rmap.c
@@ -1427,6 +1427,11 @@ static int try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
struct rmap_private *rp = arg;
enum ttu_flags flags = rp->flags;
 
+   if (!PageHuge(page) && PageTransHuge(page)) {
+   VM_BUG_ON_PAGE(!(flags & TTU_MIGRATION), page);
+   return set_pmd_migration_entry(page, mm, address);
+   }
+
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
goto out;
@@ -1610,8 +1615,6 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
.anon_lock = page_lock_anon_vma_read,
};
 
-   VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page);
-
/*
 * During exec, a temporary VMA is setup and later moved.
 * The VMA is moved under the anon_vma lock but not the
-- 
2.7.0



Re: Suspicious error for CMA stress test

2016-03-02 Thread Joonsoo Kim
2016-03-03 10:25 GMT+09:00 Laura Abbott :
> (cc -mm and Joonsoo Kim)
>
>
> On 03/02/2016 05:52 AM, Hanjun Guo wrote:
>>
>> Hi,
>>
>> I came across a suspicious error for CMA stress test:
>>
>> Before the test, I got:
>> -bash-4.3# cat /proc/meminfo | grep Cma
>> CmaTotal: 204800 kB
>> CmaFree:  195044 kB
>>
>>
>> After running the test:
>> -bash-4.3# cat /proc/meminfo | grep Cma
>> CmaTotal: 204800 kB
>> CmaFree: 6602584 kB
>>
>> So the freed CMA memory is more than total..
>>
>> Also the the MemFree is more than mem total:
>>
>> -bash-4.3# cat /proc/meminfo
>> MemTotal:   16342016 kB
>> MemFree:22367268 kB
>> MemAvailable:   22370528 kB
>>
>> Here is the kernel module doing the stress test below (if the test case
>> is wrong, correct me), any help would be great appreciated.
>>
>> The test is running on ARM64 platform (hisilicon D02) with 4.4 kernel, I
>> think
>> the 4.5-rc is the same as I didn't notice the updates for it.
>>
>> int malloc_dma(void *data)
>> {
>>  void *vaddr;
>>  struct platform_device * pdev=(struct platform_device*)data;
>>  dma_addr_t dma_handle;
>>  int i;
>>
>>  for(i=0; i<1000; i++) {
>>  vaddr=dma_alloc_coherent(>dev, malloc_size, _handle,
>> GFP_KERNEL);
>>  if (!vaddr)
>>  pr_err("alloc cma memory failed!\n");
>>
>>  mdelay(1);
>>
>>  if (vaddr)
>>  dma_free_coherent(>dev,malloc_size,vaddr,
>> dma_handle);
>>  }
>>  pr_info("alloc free cma memory success return!\n");
>>  return 0;
>> }
>>
>> static int dma_alloc_coherent_init(struct platform_device *pdev)
>> {
>>  int i;
>>
>>  for(i=0; i<100; i++)   {
>>  task[i] = kthread_create(malloc_dma,pdev,"malloc_dma_%d",i);
>>  if(!task[i]) {
>>  printk("kthread_create faile %d\n",i);
>>  continue;
>>  }
>>  wake_up_process(task[i]);
>>  }
>>  return 0;
>> }
>>
>> Thanks
>> Hanjun
>>
>> The whole /proc/meminfo:
>>
>> -bash-4.3# cat /proc/meminfo
>> MemTotal:   16342016 kB
>> MemFree:22367268 kB
>> MemAvailable:   22370528 kB
>> Buffers:4292 kB
>> Cached:36444 kB
>> SwapCached:0 kB
>> Active:23564 kB
>> Inactive:  25360 kB
>> Active(anon):   8424 kB
>> Inactive(anon):   64 kB
>> Active(file):  15140 kB
>> Inactive(file):25296 kB
>> Unevictable:   0 kB
>> Mlocked:   0 kB
>> SwapTotal: 0 kB
>> SwapFree:  0 kB
>> Dirty: 0 kB
>> Writeback: 0 kB
>> AnonPages:  8196 kB
>> Mapped:16448 kB
>> Shmem:   296 kB
>> Slab:  26832 kB
>> SReclaimable:   6300 kB
>> SUnreclaim:20532 kB
>> KernelStack:3088 kB
>> PageTables:  404 kB
>> NFS_Unstable:  0 kB
>> Bounce:0 kB
>> WritebackTmp:  0 kB
>> CommitLimit: 8171008 kB
>> Committed_AS:  34336 kB
>> VmallocTotal:   258998208 kB
>> VmallocUsed:   0 kB
>> VmallocChunk:  0 kB
>> AnonHugePages: 0 kB
>> CmaTotal: 204800 kB
>> CmaFree: 6602584 kB
>> HugePages_Total:   0
>> HugePages_Free:0
>> HugePages_Rsvd:0
>> HugePages_Surp:0
>> Hugepagesize:   2048 kB
>>
>
>
> I played with this a bit and can see the same problem. The sanity
> check of CmaFree < CmaTotal generally triggers in
> __move_zone_freepage_state in unset_migratetype_isolate.
> This also seems to be present as far back as v4.0 which was the
> first version to have the updated accounting from Joonsoo.
> Were there known limitations with the new freepage accounting,
> Joonsoo?

I don't know. I also played with this and looks like there is
accounting problem, however, for my case, number of free page is slightly less
than total. I will take a look.

Hanjun, could you tell me your malloc_size? I tested with 1 and it doesn't
look like your case.

Thanks.


[PATCH v1 04/11] mm: thp: enable thp migration in generic path

2016-03-02 Thread Naoya Horiguchi
This patch makes it possible to support thp migration gradually. If you fail
to allocate a destination page as a thp, you just split the source thp as we
do now, and then enter the normal page migration. If you succeed to allocate
destination thp, you enter thp migration. Subsequent patches actually enable
thp migration for each caller of page migration by allowing its get_new_page()
callback to allocate thps.

Signed-off-by: Naoya Horiguchi 
---
 mm/migrate.c | 2 +-
 mm/rmap.c| 7 +--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
index 14164f6..bd8bfa4 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
@@ -969,7 +969,7 @@ static ICE_noinline int unmap_and_move(new_page_t 
get_new_page,
goto out;
}
 
-   if (unlikely(PageTransHuge(page))) {
+   if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
lock_page(page);
rc = split_huge_page(page);
unlock_page(page);
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/rmap.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/rmap.c
index 02f0bfc..49198b8 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/rmap.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/rmap.c
@@ -1427,6 +1427,11 @@ static int try_to_unmap_one(struct page *page, struct 
vm_area_struct *vma,
struct rmap_private *rp = arg;
enum ttu_flags flags = rp->flags;
 
+   if (!PageHuge(page) && PageTransHuge(page)) {
+   VM_BUG_ON_PAGE(!(flags & TTU_MIGRATION), page);
+   return set_pmd_migration_entry(page, mm, address);
+   }
+
/* munlock has nothing to gain from examining un-locked vmas */
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
goto out;
@@ -1610,8 +1615,6 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
.anon_lock = page_lock_anon_vma_read,
};
 
-   VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page);
-
/*
 * During exec, a temporary VMA is setup and later moved.
 * The VMA is moved under the anon_vma lock but not the
-- 
2.7.0



Re: Suspicious error for CMA stress test

2016-03-02 Thread Joonsoo Kim
2016-03-03 10:25 GMT+09:00 Laura Abbott :
> (cc -mm and Joonsoo Kim)
>
>
> On 03/02/2016 05:52 AM, Hanjun Guo wrote:
>>
>> Hi,
>>
>> I came across a suspicious error for CMA stress test:
>>
>> Before the test, I got:
>> -bash-4.3# cat /proc/meminfo | grep Cma
>> CmaTotal: 204800 kB
>> CmaFree:  195044 kB
>>
>>
>> After running the test:
>> -bash-4.3# cat /proc/meminfo | grep Cma
>> CmaTotal: 204800 kB
>> CmaFree: 6602584 kB
>>
>> So the freed CMA memory is more than total..
>>
>> Also the the MemFree is more than mem total:
>>
>> -bash-4.3# cat /proc/meminfo
>> MemTotal:   16342016 kB
>> MemFree:22367268 kB
>> MemAvailable:   22370528 kB
>>
>> Here is the kernel module doing the stress test below (if the test case
>> is wrong, correct me), any help would be great appreciated.
>>
>> The test is running on ARM64 platform (hisilicon D02) with 4.4 kernel, I
>> think
>> the 4.5-rc is the same as I didn't notice the updates for it.
>>
>> int malloc_dma(void *data)
>> {
>>  void *vaddr;
>>  struct platform_device * pdev=(struct platform_device*)data;
>>  dma_addr_t dma_handle;
>>  int i;
>>
>>  for(i=0; i<1000; i++) {
>>  vaddr=dma_alloc_coherent(>dev, malloc_size, _handle,
>> GFP_KERNEL);
>>  if (!vaddr)
>>  pr_err("alloc cma memory failed!\n");
>>
>>  mdelay(1);
>>
>>  if (vaddr)
>>  dma_free_coherent(>dev,malloc_size,vaddr,
>> dma_handle);
>>  }
>>  pr_info("alloc free cma memory success return!\n");
>>  return 0;
>> }
>>
>> static int dma_alloc_coherent_init(struct platform_device *pdev)
>> {
>>  int i;
>>
>>  for(i=0; i<100; i++)   {
>>  task[i] = kthread_create(malloc_dma,pdev,"malloc_dma_%d",i);
>>  if(!task[i]) {
>>  printk("kthread_create faile %d\n",i);
>>  continue;
>>  }
>>  wake_up_process(task[i]);
>>  }
>>  return 0;
>> }
>>
>> Thanks
>> Hanjun
>>
>> The whole /proc/meminfo:
>>
>> -bash-4.3# cat /proc/meminfo
>> MemTotal:   16342016 kB
>> MemFree:22367268 kB
>> MemAvailable:   22370528 kB
>> Buffers:4292 kB
>> Cached:36444 kB
>> SwapCached:0 kB
>> Active:23564 kB
>> Inactive:  25360 kB
>> Active(anon):   8424 kB
>> Inactive(anon):   64 kB
>> Active(file):  15140 kB
>> Inactive(file):25296 kB
>> Unevictable:   0 kB
>> Mlocked:   0 kB
>> SwapTotal: 0 kB
>> SwapFree:  0 kB
>> Dirty: 0 kB
>> Writeback: 0 kB
>> AnonPages:  8196 kB
>> Mapped:16448 kB
>> Shmem:   296 kB
>> Slab:  26832 kB
>> SReclaimable:   6300 kB
>> SUnreclaim:20532 kB
>> KernelStack:3088 kB
>> PageTables:  404 kB
>> NFS_Unstable:  0 kB
>> Bounce:0 kB
>> WritebackTmp:  0 kB
>> CommitLimit: 8171008 kB
>> Committed_AS:  34336 kB
>> VmallocTotal:   258998208 kB
>> VmallocUsed:   0 kB
>> VmallocChunk:  0 kB
>> AnonHugePages: 0 kB
>> CmaTotal: 204800 kB
>> CmaFree: 6602584 kB
>> HugePages_Total:   0
>> HugePages_Free:0
>> HugePages_Rsvd:0
>> HugePages_Surp:0
>> Hugepagesize:   2048 kB
>>
>
>
> I played with this a bit and can see the same problem. The sanity
> check of CmaFree < CmaTotal generally triggers in
> __move_zone_freepage_state in unset_migratetype_isolate.
> This also seems to be present as far back as v4.0 which was the
> first version to have the updated accounting from Joonsoo.
> Were there known limitations with the new freepage accounting,
> Joonsoo?

I don't know. I also played with this and looks like there is
accounting problem, however, for my case, number of free page is slightly less
than total. I will take a look.

Hanjun, could you tell me your malloc_size? I tested with 1 and it doesn't
look like your case.

Thanks.


Re: [PATCH v4 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-03-02 Thread Joonsoo Kim
2016-03-03 1:58 GMT+09:00 Vlastimil Babka :
> On 02/26/2016 01:58 AM, js1...@gmail.com wrote:
>>
>> From: Joonsoo Kim 
>>
>> CMA allocation should be guaranteed to succeed by definition, but,
>> unfortunately, it would be failed sometimes. It is hard to track down
>> the problem, because it is related to page reference manipulation and
>> we don't have any facility to analyze it.
>>
>> This patch adds tracepoints to track down page reference manipulation.
>> With it, we can find exact reason of failure and can fix the problem.
>> Following is an example of tracepoint output. (note: this example is
>> stale version that printing flags as the number. Recent version will
>> print it as human readable string.)
>>
>> Enabling this feature bloat kernel text 30 KB in my configuration.
>>
>> textdata bss dec hex filename
>> 121273272243616 1507328 15878271 f2487f vmlinux_disabled
>> 121572082258880 1507328 15923416 f2f8d8 vmlinux_enabled
>>
>
> That's not bad, and it's even configurable. Thanks for taking the extra care
> about overhead since v1.
>
>> Note that, due to header file dependency problem between mm.h and
>> tracepoint.h, this feature has to open code the static key functions
>> for tracepoints. Proposed by Steven Rostedt in following link.
>>
>> https://lkml.org/lkml/2015/12/9/699
>>
>> v3:
>> o Add commit description and code comment why this patch open code
>> the static key functions for tracepoints.
>> o Notify that example is stale version.
>> o Add "depends on TRACEPOINTS".
>>
>> v2:
>> o Use static key of each tracepoints to avoid function call overhead
>> when tracepoints are disabled.
>> o Print human-readable page flag thanks to newly introduced %pgp option.
>> o Add more description to Kconfig.debug.
>>
>> Acked-by: Michal Nazarewicz 
>> Signed-off-by: Joonsoo Kim 
>
>
> Acked-by: Vlastimil Babka 
>
>> +config DEBUG_PAGE_REF
>> +   bool "Enable tracepoint to track down page reference manipulation"
>> +   depends on DEBUG_KERNEL
>> +   depends on TRACEPOINTS
>> +   ---help---
>> + This is the feature to add tracepoint for tracking down page
>> reference
>> + manipulation. This tracking is useful to diagnosis functional
>> failure
>> + due to migration failure caused by page reference mismatch. Be
>
>
> OK.
>
>> + careful to turn on this feature because it could bloat some
>> kernel
>> + text. In my configuration, it bloats 30 KB. Although kernel text
>> will
>> + be bloated, there would be no runtime performance overhead if
>> + tracepoint isn't enabled thanks to jump label.
>
>
> I would just write something like:
>
> Enabling this feature adds about 30 KB to the kernel code, but runtime
> performance overhead is virtually none until the tracepoints are actually
> enabled.

Okay, better!
Andrew, do you want fixup patch from me or could you simply handle it?

Thanks.


[PATCH v1 07/11] mm: hwpoison: fix race between unpoisoning and freeing migrate source page

2016-03-02 Thread Naoya Horiguchi
During testing thp migration, I saw the BUG_ON triggered due to the race between
soft offline and unpoison (what I actually saw was "bad page" warning of freeing
page with PageActive set, then subsequent bug messages differ each time.)

I tried to solve similar problem a few times (see commit f4c18e6f7b5b ("mm:
check __PG_HWPOISON separately from PAGE_FLAGS_CHECK_AT_*",) but the new
workload brings out a new problem of the previous solution.

Let's say that unpoison never works well if the target page is not properly
contained,) so now I'm going in the direction of limiting unpoison function
(as commit 230ac719c500 ("mm/hwpoison: don't try to unpoison containment-failed
pages" does). This patch takes another step in the direction by ensuring that
the target page is kicked out from any pcplist. With this change, the dirty hack
of calling put_page() instead of putback_lru_page() when migration reason is
MR_MEMORY_FAILURE is not necessary any more, so it's reverted.

Signed-off-by: Naoya Horiguchi 
---
 mm/memory-failure.c | 10 +-
 mm/migrate.c|  8 +---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory-failure.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory-failure.c
index 67c30eb..bfb63c6 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory-failure.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory-failure.c
@@ -1431,6 +1431,13 @@ int unpoison_memory(unsigned long pfn)
return 0;
}
 
+   /*
+* Soft-offlined pages might stay in PCP list because it's freed via
+* putback_lru_page(), and such pages shouldn't be unpoisoned because
+* it could cause list corruption. So let's drain pages to avoid that.
+*/
+   shake_page(page, 0);
+
nr_pages = 1 << compound_order(page);
 
if (!get_hwpoison_page(p)) {
@@ -1674,7 +1681,8 @@ static int __soft_offline_page(struct page *page, int 
flags)
pfn, ret, page->flags);
if (ret > 0)
ret = -EIO;
-   }
+   } else if (!TestSetPageHWPoison(page))
+   num_poisoned_pages_inc();
} else {
pr_info("soft offline: %#lx: isolation failed: %d, page count 
%d, type %lx\n",
pfn, ret, page_count(page), page->flags);
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
index bd8bfa4..31bc724 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
@@ -994,13 +994,7 @@ static ICE_noinline int unmap_and_move(new_page_t 
get_new_page,
list_del(>lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
-   /* Soft-offlined page shouldn't go through lru cache list */
-   if (reason == MR_MEMORY_FAILURE) {
-   put_page(page);
-   if (!test_set_page_hwpoison(page))
-   num_poisoned_pages_inc();
-   } else
-   putback_lru_page(page);
+   putback_lru_page(page);
}
 
/*
-- 
2.7.0



Re: [PATCH v4 2/2] mm/page_ref: add tracepoint to track down page reference manipulation

2016-03-02 Thread Joonsoo Kim
2016-03-03 1:58 GMT+09:00 Vlastimil Babka :
> On 02/26/2016 01:58 AM, js1...@gmail.com wrote:
>>
>> From: Joonsoo Kim 
>>
>> CMA allocation should be guaranteed to succeed by definition, but,
>> unfortunately, it would be failed sometimes. It is hard to track down
>> the problem, because it is related to page reference manipulation and
>> we don't have any facility to analyze it.
>>
>> This patch adds tracepoints to track down page reference manipulation.
>> With it, we can find exact reason of failure and can fix the problem.
>> Following is an example of tracepoint output. (note: this example is
>> stale version that printing flags as the number. Recent version will
>> print it as human readable string.)
>>
>> Enabling this feature bloat kernel text 30 KB in my configuration.
>>
>> textdata bss dec hex filename
>> 121273272243616 1507328 15878271 f2487f vmlinux_disabled
>> 121572082258880 1507328 15923416 f2f8d8 vmlinux_enabled
>>
>
> That's not bad, and it's even configurable. Thanks for taking the extra care
> about overhead since v1.
>
>> Note that, due to header file dependency problem between mm.h and
>> tracepoint.h, this feature has to open code the static key functions
>> for tracepoints. Proposed by Steven Rostedt in following link.
>>
>> https://lkml.org/lkml/2015/12/9/699
>>
>> v3:
>> o Add commit description and code comment why this patch open code
>> the static key functions for tracepoints.
>> o Notify that example is stale version.
>> o Add "depends on TRACEPOINTS".
>>
>> v2:
>> o Use static key of each tracepoints to avoid function call overhead
>> when tracepoints are disabled.
>> o Print human-readable page flag thanks to newly introduced %pgp option.
>> o Add more description to Kconfig.debug.
>>
>> Acked-by: Michal Nazarewicz 
>> Signed-off-by: Joonsoo Kim 
>
>
> Acked-by: Vlastimil Babka 
>
>> +config DEBUG_PAGE_REF
>> +   bool "Enable tracepoint to track down page reference manipulation"
>> +   depends on DEBUG_KERNEL
>> +   depends on TRACEPOINTS
>> +   ---help---
>> + This is the feature to add tracepoint for tracking down page
>> reference
>> + manipulation. This tracking is useful to diagnosis functional
>> failure
>> + due to migration failure caused by page reference mismatch. Be
>
>
> OK.
>
>> + careful to turn on this feature because it could bloat some
>> kernel
>> + text. In my configuration, it bloats 30 KB. Although kernel text
>> will
>> + be bloated, there would be no runtime performance overhead if
>> + tracepoint isn't enabled thanks to jump label.
>
>
> I would just write something like:
>
> Enabling this feature adds about 30 KB to the kernel code, but runtime
> performance overhead is virtually none until the tracepoints are actually
> enabled.

Okay, better!
Andrew, do you want fixup patch from me or could you simply handle it?

Thanks.


[PATCH v1 07/11] mm: hwpoison: fix race between unpoisoning and freeing migrate source page

2016-03-02 Thread Naoya Horiguchi
During testing thp migration, I saw the BUG_ON triggered due to the race between
soft offline and unpoison (what I actually saw was "bad page" warning of freeing
page with PageActive set, then subsequent bug messages differ each time.)

I tried to solve similar problem a few times (see commit f4c18e6f7b5b ("mm:
check __PG_HWPOISON separately from PAGE_FLAGS_CHECK_AT_*",) but the new
workload brings out a new problem of the previous solution.

Let's say that unpoison never works well if the target page is not properly
contained,) so now I'm going in the direction of limiting unpoison function
(as commit 230ac719c500 ("mm/hwpoison: don't try to unpoison containment-failed
pages" does). This patch takes another step in the direction by ensuring that
the target page is kicked out from any pcplist. With this change, the dirty hack
of calling put_page() instead of putback_lru_page() when migration reason is
MR_MEMORY_FAILURE is not necessary any more, so it's reverted.

Signed-off-by: Naoya Horiguchi 
---
 mm/memory-failure.c | 10 +-
 mm/migrate.c|  8 +---
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory-failure.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory-failure.c
index 67c30eb..bfb63c6 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory-failure.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory-failure.c
@@ -1431,6 +1431,13 @@ int unpoison_memory(unsigned long pfn)
return 0;
}
 
+   /*
+* Soft-offlined pages might stay in PCP list because it's freed via
+* putback_lru_page(), and such pages shouldn't be unpoisoned because
+* it could cause list corruption. So let's drain pages to avoid that.
+*/
+   shake_page(page, 0);
+
nr_pages = 1 << compound_order(page);
 
if (!get_hwpoison_page(p)) {
@@ -1674,7 +1681,8 @@ static int __soft_offline_page(struct page *page, int 
flags)
pfn, ret, page->flags);
if (ret > 0)
ret = -EIO;
-   }
+   } else if (!TestSetPageHWPoison(page))
+   num_poisoned_pages_inc();
} else {
pr_info("soft offline: %#lx: isolation failed: %d, page count 
%d, type %lx\n",
pfn, ret, page_count(page), page->flags);
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
index bd8bfa4..31bc724 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
@@ -994,13 +994,7 @@ static ICE_noinline int unmap_and_move(new_page_t 
get_new_page,
list_del(>lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
-   /* Soft-offlined page shouldn't go through lru cache list */
-   if (reason == MR_MEMORY_FAILURE) {
-   put_page(page);
-   if (!test_set_page_hwpoison(page))
-   num_poisoned_pages_inc();
-   } else
-   putback_lru_page(page);
+   putback_lru_page(page);
}
 
/*
-- 
2.7.0



[PATCH v1 05/11] mm: thp: check pmd migration entry in common path

2016-03-02 Thread Naoya Horiguchi
If one of callers of page migration starts to handle thp, memory management code
start to see pmd migration entry, so we need to prepare for it before enabling.
This patch changes various code point which checks the status of given pmds in
order to prevent race between thp migration and the pmd-related works.

Signed-off-by: Naoya Horiguchi 
---
 arch/x86/mm/gup.c  |  3 +++
 fs/proc/task_mmu.c | 25 +
 mm/gup.c   |  8 +++
 mm/huge_memory.c   | 66 --
 mm/memcontrol.c|  2 ++
 mm/memory.c|  5 +
 6 files changed, 93 insertions(+), 16 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/mm/gup.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/mm/gup.c
index f8d0b5e..34c3d43 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/mm/gup.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/mm/gup.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -210,6 +211,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
if (pmd_none(pmd))
return 0;
if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
+   if (unlikely(is_pmd_migration_entry(pmd)))
+   return 0;
/*
 * NUMA hinting faults need to be handled in the GUP
 * slowpath for accounting purposes and so that they
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/fs/proc/task_mmu.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/fs/proc/task_mmu.c
index fa95ab2..20205d4 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/fs/proc/task_mmu.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/fs/proc/task_mmu.c
@@ -907,6 +907,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
addr,
 
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
+   if (unlikely(is_pmd_migration_entry(*pmd)))
+   goto out;
+
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty_pmd(vma, addr, pmd);
goto out;
@@ -1184,19 +1187,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
+   struct page *page;
 
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
 
-   /*
-* Currently pmd for thp is always present because thp
-* can not be swapped-out, migrated, or HWPOISONed
-* (split in such cases instead.)
-* This if-check is just to prepare for future implementation.
-*/
-   if (pmd_present(pmd)) {
-   struct page *page = pmd_page(pmd);
-
+   if (is_pmd_migration_entry(pmd)) {
+   swp_entry_t entry = pmd_to_swp_entry(pmd);
+   frame = swp_type(entry) |
+   (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+   page = migration_entry_to_page(entry);
+   } else if (pmd_present(pmd)) {
+   page = pmd_page(pmd);
if (page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE;
 
@@ -1518,6 +1520,11 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long 
addr,
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
 
+   if (unlikely(is_pmd_migration_entry(*pmd))) {
+   spin_unlock(ptl);
+   return 0;
+   }
+
page = can_gather_numa_stats(huge_pte, vma, addr);
if (page)
gather_stats(page, md, pte_dirty(huge_pte),
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/gup.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/gup.c
index 36ca850..113930b 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/gup.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/gup.c
@@ -271,6 +271,11 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags);
}
+   if (is_pmd_migration_entry(*pmd)) {
+   spin_unlock(ptl);
+   return no_page_table(vma, flags);
+   }
+
if (flags & FOLL_SPLIT) {
int ret;
page = pmd_page(*pmd);
@@ -1324,6 +1329,9 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
return 0;
 
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
+   if (unlikely(is_pmd_migration_entry(pmd)))
+   return 0;
+
/*

[PATCH v1 05/11] mm: thp: check pmd migration entry in common path

2016-03-02 Thread Naoya Horiguchi
If one of callers of page migration starts to handle thp, memory management code
start to see pmd migration entry, so we need to prepare for it before enabling.
This patch changes various code point which checks the status of given pmds in
order to prevent race between thp migration and the pmd-related works.

Signed-off-by: Naoya Horiguchi 
---
 arch/x86/mm/gup.c  |  3 +++
 fs/proc/task_mmu.c | 25 +
 mm/gup.c   |  8 +++
 mm/huge_memory.c   | 66 --
 mm/memcontrol.c|  2 ++
 mm/memory.c|  5 +
 6 files changed, 93 insertions(+), 16 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/mm/gup.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/mm/gup.c
index f8d0b5e..34c3d43 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/mm/gup.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/mm/gup.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -210,6 +211,8 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
if (pmd_none(pmd))
return 0;
if (unlikely(pmd_large(pmd) || !pmd_present(pmd))) {
+   if (unlikely(is_pmd_migration_entry(pmd)))
+   return 0;
/*
 * NUMA hinting faults need to be handled in the GUP
 * slowpath for accounting purposes and so that they
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/fs/proc/task_mmu.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/fs/proc/task_mmu.c
index fa95ab2..20205d4 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/fs/proc/task_mmu.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/fs/proc/task_mmu.c
@@ -907,6 +907,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long 
addr,
 
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
+   if (unlikely(is_pmd_migration_entry(*pmd)))
+   goto out;
+
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty_pmd(vma, addr, pmd);
goto out;
@@ -1184,19 +1187,18 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long 
addr, unsigned long end,
if (ptl) {
u64 flags = 0, frame = 0;
pmd_t pmd = *pmdp;
+   struct page *page;
 
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
 
-   /*
-* Currently pmd for thp is always present because thp
-* can not be swapped-out, migrated, or HWPOISONed
-* (split in such cases instead.)
-* This if-check is just to prepare for future implementation.
-*/
-   if (pmd_present(pmd)) {
-   struct page *page = pmd_page(pmd);
-
+   if (is_pmd_migration_entry(pmd)) {
+   swp_entry_t entry = pmd_to_swp_entry(pmd);
+   frame = swp_type(entry) |
+   (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
+   page = migration_entry_to_page(entry);
+   } else if (pmd_present(pmd)) {
+   page = pmd_page(pmd);
if (page_mapcount(page) == 1)
flags |= PM_MMAP_EXCLUSIVE;
 
@@ -1518,6 +1520,11 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long 
addr,
pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
 
+   if (unlikely(is_pmd_migration_entry(*pmd))) {
+   spin_unlock(ptl);
+   return 0;
+   }
+
page = can_gather_numa_stats(huge_pte, vma, addr);
if (page)
gather_stats(page, md, pte_dirty(huge_pte),
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/gup.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/gup.c
index 36ca850..113930b 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/gup.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/gup.c
@@ -271,6 +271,11 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
spin_unlock(ptl);
return follow_page_pte(vma, address, pmd, flags);
}
+   if (is_pmd_migration_entry(*pmd)) {
+   spin_unlock(ptl);
+   return no_page_table(vma, flags);
+   }
+
if (flags & FOLL_SPLIT) {
int ret;
page = pmd_page(*pmd);
@@ -1324,6 +1329,9 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, 
unsigned long end,
return 0;
 
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd))) {
+   if (unlikely(is_pmd_migration_entry(pmd)))
+   return 0;
+
/*
 * NUMA 

[PATCH v1 10/11] mm: migrate: move_pages() supports thp migration

2016-03-02 Thread Naoya Horiguchi
This patch enables thp migration for move_pages(2).

Signed-off-by: Naoya Horiguchi 
---
 mm/migrate.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
index 31bc724..5653d49 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
@@ -1240,7 +1240,17 @@ static struct page *new_page_node(struct page *p, 
unsigned long private,
if (PageHuge(p))
return alloc_huge_page_node(page_hstate(compound_head(p)),
pm->node);
-   else
+   else if (thp_migration_supported() && PageTransHuge(p)) {
+   struct page *thp;
+
+   thp = alloc_pages_node(pm->node,
+   (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
+   HPAGE_PMD_ORDER);
+   if (!thp)
+   return NULL;
+   prep_transhuge_page(thp);
+   return thp;
+   } else
return __alloc_pages_node(pm->node,
GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
 }
@@ -1267,6 +1277,7 @@ static int do_move_page_to_node_array(struct mm_struct 
*mm,
for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
struct vm_area_struct *vma;
struct page *page;
+   unsigned int follflags;
 
err = -EFAULT;
vma = find_vma(mm, pp->addr);
@@ -1274,8 +1285,10 @@ static int do_move_page_to_node_array(struct mm_struct 
*mm,
goto set_status;
 
/* FOLL_DUMP to ignore special (like zero) pages */
-   page = follow_page(vma, pp->addr,
-   FOLL_GET | FOLL_SPLIT | FOLL_DUMP);
+   follflags = FOLL_GET | FOLL_SPLIT | FOLL_DUMP;
+   if (thp_migration_supported())
+   follflags &= ~FOLL_SPLIT;
+   page = follow_page(vma, pp->addr, follflags);
 
err = PTR_ERR(page);
if (IS_ERR(page))
@@ -1303,6 +1316,11 @@ static int do_move_page_to_node_array(struct mm_struct 
*mm,
if (PageHead(page))
isolate_huge_page(page, );
goto put_and_set;
+   } else if (PageTransCompound(page)) {
+   if (PageTail(page)) {
+   err = pp->node;
+   goto put_and_set;
+   }
}
 
err = isolate_lru_page(page);
-- 
2.7.0



[PATCH v1 09/11] mm: mempolicy: mbind and migrate_pages support thp migration

2016-03-02 Thread Naoya Horiguchi
This patch enables thp migration for mbind(2) and migrate_pages(2).

Signed-off-by: Naoya Horiguchi 
---
 mm/mempolicy.c | 94 --
 1 file changed, 72 insertions(+), 22 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/mempolicy.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/mempolicy.c
index 840a0ad..a9754dd 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/mempolicy.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/mempolicy.c
@@ -94,6 +94,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -487,6 +488,49 @@ static inline bool queue_pages_node_check(struct page 
*page,
return node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT);
 }
 
+static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+   unsigned long end, struct mm_walk *walk)
+{
+   int ret = 0;
+   struct page *page;
+   struct queue_pages *qp = walk->private;
+   unsigned long flags;
+
+   if (unlikely(is_pmd_migration_entry(*pmd))) {
+   ret = 1;
+   goto unlock;
+   }
+   page = pmd_page(*pmd);
+   if (is_huge_zero_page(page)) {
+   spin_unlock(ptl);
+   split_huge_pmd(walk->vma, pmd, addr);
+   goto out;
+   }
+   if ((end - addr != HPAGE_PMD_SIZE) || !thp_migration_supported()) {
+   get_page(page);
+   spin_unlock(ptl);
+   lock_page(page);
+   ret = split_huge_page(page);
+   unlock_page(page);
+   put_page(page);
+   goto out;
+   }
+   if (queue_pages_node_check(page, qp)) {
+   ret = 1;
+   goto unlock;
+   }
+
+   ret = 1;
+   flags = qp->flags;
+   /* go to thp migration */
+   if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+   migrate_page_add(page, qp->pagelist, flags);
+unlock:
+   spin_unlock(ptl);
+out:
+   return ret;
+}
+
 /*
  * Scan through pages checking if pages follow certain conditions,
  * and move them to the pagelist if they do.
@@ -498,32 +542,19 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned 
long addr,
struct page *page;
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
-   int nid, ret;
+   int ret;
pte_t *pte;
spinlock_t *ptl;
 
-   if (pmd_trans_huge(*pmd)) {
-   ptl = pmd_lock(walk->mm, pmd);
-   if (pmd_trans_huge(*pmd)) {
-   page = pmd_page(*pmd);
-   if (is_huge_zero_page(page)) {
-   spin_unlock(ptl);
-   split_huge_pmd(vma, pmd, addr);
-   } else {
-   get_page(page);
-   spin_unlock(ptl);
-   lock_page(page);
-   ret = split_huge_page(page);
-   unlock_page(page);
-   put_page(page);
-   if (ret)
-   return 0;
-   }
-   } else {
-   spin_unlock(ptl);
-   }
+   ptl = pmd_trans_huge_lock(pmd, vma);
+   if (ptl) {
+   ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
+   if (ret)
+   return 0;
}
 
+   if (pmd_trans_unstable(pmd))
+   return 0;
 retry:
pte = pte_offset_map_lock(walk->mm, pmd, addr, );
for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -980,7 +1011,17 @@ static struct page *new_node_page(struct page *page, 
unsigned long node, int **x
if (PageHuge(page))
return alloc_huge_page_node(page_hstate(compound_head(page)),
node);
-   else
+   else if (thp_migration_supported() && PageTransHuge(page)) {
+   struct page *thp;
+
+   thp = alloc_pages_node(node,
+   (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
+   HPAGE_PMD_ORDER);
+   if (!thp)
+   return NULL;
+   prep_transhuge_page(thp);
+   return thp;
+   } else
return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE |
__GFP_THISNODE, 0);
 }
@@ -1146,6 +1187,15 @@ static struct page *new_page(struct page *page, unsigned 
long start, int **x)
if (PageHuge(page)) {
BUG_ON(!vma);
return alloc_huge_page_noerr(vma, address, 1);
+   } else if (thp_migration_supported() && PageTransHuge(page)) {
+   struct page *thp;
+
+   thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
+   

[PATCH v1 11/11] mm: memory_hotplug: memory hotremove supports thp migration

2016-03-02 Thread Naoya Horiguchi
This patch enables thp migration for memory hotremove. Stub definition of
prep_transhuge_page() is added for CONFIG_TRANSPARENT_HUGEPAGE=n.

Signed-off-by: Naoya Horiguchi 
---
 include/linux/huge_mm.h | 3 +++
 mm/memory_hotplug.c | 8 
 mm/page_isolation.c | 8 
 3 files changed, 19 insertions(+)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/huge_mm.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/huge_mm.h
index 09b215d..7944346 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/huge_mm.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/huge_mm.h
@@ -175,6 +175,9 @@ static inline bool thp_migration_supported(void)
 #define transparent_hugepage_enabled(__vma) 0
 
 #define transparent_hugepage_flags 0UL
+static inline void prep_transhuge_page(struct page *page)
+{
+}
 static inline int
 split_huge_page_to_list(struct page *page, struct list_head *list)
 {
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory_hotplug.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory_hotplug.c
index e62aa07..b4b23d5 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory_hotplug.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory_hotplug.c
@@ -1511,6 +1511,14 @@ do_migrate_range(unsigned long start_pfn, unsigned long 
end_pfn)
if (isolate_huge_page(page, ))
move_pages -= 1 << compound_order(head);
continue;
+   } else if (thp_migration_supported() && PageTransHuge(page)) {
+   struct page *head = compound_head(page);
+
+   pfn = page_to_pfn(head) + (1< PFN_SECTION_SHIFT) {
+   ret = -EBUSY;
+   break;
+   }
}
 
if (!get_page_unless_zero(page))
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/page_isolation.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/page_isolation.c
index 92c4c36..b2d22e8 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/page_isolation.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/page_isolation.c
@@ -294,6 +294,14 @@ struct page *alloc_migrate_target(struct page *page, 
unsigned long private,
nodes_complement(dst, src);
return alloc_huge_page_node(page_hstate(compound_head(page)),
next_node(page_to_nid(page), dst));
+   } else if (thp_migration_supported() && PageTransHuge(page)) {
+   struct page *thp;
+
+   thp = alloc_pages(GFP_TRANSHUGE, HPAGE_PMD_ORDER);
+   if (!thp)
+   return NULL;
+   prep_transhuge_page(thp);
+   return thp;
}
 
if (PageHighMem(page))
-- 
2.7.0



[PATCH v1 03/11] mm: thp: add helpers related to thp/pmd migration

2016-03-02 Thread Naoya Horiguchi
This patch prepares thp migration's core code. These code will be open when
unmap_and_move() stops unconditionally splitting thp and get_new_page() starts
to allocate destination thps.

Signed-off-by: Naoya Horiguchi 
---
 arch/x86/include/asm/pgtable.h| 11 ++
 arch/x86/include/asm/pgtable_64.h |  2 +
 include/linux/swapops.h   | 62 +++
 mm/huge_memory.c  | 78 +++
 mm/migrate.c  | 23 
 5 files changed, 176 insertions(+)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable.h
index 0687c47..0df9afe 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable.h
@@ -515,6 +515,17 @@ static inline int pmd_present(pmd_t pmd)
return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
 }
 
+/*
+ * Unlike pmd_present(), __pmd_present() checks only _PAGE_PRESENT bit.
+ * Combined with is_migration_entry(), this routine is used to detect pmd
+ * migration entries. To make it work fine, callers should make sure that
+ * pmd_trans_huge() returns true beforehand.
+ */
+static inline int __pmd_present(pmd_t pmd)
+{
+   return pmd_flags(pmd) & _PAGE_PRESENT;
+}
+
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * These work without NUMA balancing but the kernel does not care. See the
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable_64.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable_64.h
index 2ee7811..df869d0 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable_64.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable_64.h
@@ -153,7 +153,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 ((type) << (_PAGE_BIT_PRESENT + 1)) \
 | ((offset) << SWP_OFFSET_SHIFT) })
 #define __pte_to_swp_entry(pte)((swp_entry_t) { pte_val((pte)) 
})
+#define __pmd_to_swp_entry(pte)((swp_entry_t) { pmd_val((pmd)) 
})
 #define __swp_entry_to_pte(x)  ((pte_t) { .pte = (x).val })
+#define __swp_entry_to_pmd(x)  ((pmd_t) { .pmd = (x).val })
 
 extern int kern_addr_valid(unsigned long addr);
 extern void cleanup_highmap(void);
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/swapops.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/swapops.h
index 5c3a5f3..b402a2c 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/swapops.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/swapops.h
@@ -163,6 +163,68 @@ static inline int is_write_migration_entry(swp_entry_t 
entry)
 
 #endif
 
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+extern int set_pmd_migration_entry(struct page *page,
+   struct mm_struct *mm, unsigned long address);
+
+extern int remove_migration_pmd(struct page *new,
+   struct vm_area_struct *vma, unsigned long addr, void *old);
+
+extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd);
+
+static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
+{
+   swp_entry_t arch_entry;
+
+   arch_entry = __pmd_to_swp_entry(pmd);
+   return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
+}
+
+static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
+{
+   swp_entry_t arch_entry;
+
+   arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
+   return __swp_entry_to_pmd(arch_entry);
+}
+
+static inline int is_pmd_migration_entry(pmd_t pmd)
+{
+   return !__pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
+}
+#else
+static inline int set_pmd_migration_entry(struct page *page,
+   struct mm_struct *mm, unsigned long address)
+{
+   return 0;
+}
+
+static inline int remove_migration_pmd(struct page *new,
+   struct vm_area_struct *vma, unsigned long addr, void *old)
+{
+   return 0;
+}
+
+static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { }
+
+static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
+{
+   return swp_entry(0, 0);
+}
+
+static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
+{
+   pmd_t pmd = {};
+
+   return pmd;
+}
+
+static inline int is_pmd_migration_entry(pmd_t pmd)
+{
+   return 0;
+}
+#endif
+
 #ifdef CONFIG_MEMORY_FAILURE
 
 extern atomic_long_t num_poisoned_pages __read_mostly;
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/huge_memory.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/huge_memory.c
index 46ad357..c6d5406 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/huge_memory.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/huge_memory.c
@@ -3657,3 +3657,81 @@ static int __init split_huge_pages_debugfs(void)
 }
 late_initcall(split_huge_pages_debugfs);
 #endif
+
+#ifdef 

[PATCH v1 08/11] mm: hwpoison: soft offline supports thp migration

2016-03-02 Thread Naoya Horiguchi
This patch enables thp migration for soft offline.

Signed-off-by: Naoya Horiguchi 
---
 mm/memory-failure.c | 31 ---
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory-failure.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory-failure.c
index bfb63c6..9099e78 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory-failure.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory-failure.c
@@ -1490,7 +1490,17 @@ static struct page *new_page(struct page *p, unsigned 
long private, int **x)
if (PageHuge(p))
return alloc_huge_page_node(page_hstate(compound_head(p)),
   nid);
-   else
+   else if (thp_migration_supported() && PageTransHuge(p)) {
+   struct page *thp;
+
+   thp = alloc_pages_node(nid,
+   (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
+   HPAGE_PMD_ORDER);
+   if (!thp)
+   return NULL;
+   prep_transhuge_page(thp);
+   return thp;
+   } else
return __alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0);
 }
 
@@ -1693,28 +1703,11 @@ static int __soft_offline_page(struct page *page, int 
flags)
 static int soft_offline_in_use_page(struct page *page, int flags)
 {
int ret;
-   struct page *hpage = compound_head(page);
-
-   if (!PageHuge(page) && PageTransHuge(hpage)) {
-   lock_page(hpage);
-   if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) {
-   unlock_page(hpage);
-   if (!PageAnon(hpage))
-   pr_info("soft offline: %#lx: non anonymous 
thp\n", page_to_pfn(page));
-   else
-   pr_info("soft offline: %#lx: thp split 
failed\n", page_to_pfn(page));
-   put_hwpoison_page(hpage);
-   return -EBUSY;
-   }
-   unlock_page(hpage);
-   get_hwpoison_page(page);
-   put_hwpoison_page(hpage);
-   }
 
if (PageHuge(page))
ret = soft_offline_huge_page(page, flags);
else
-   ret = __soft_offline_page(page, flags);
+   ret = __soft_offline_page(compound_head(page), flags);
 
return ret;
 }
-- 
2.7.0



[PATCH v1 10/11] mm: migrate: move_pages() supports thp migration

2016-03-02 Thread Naoya Horiguchi
This patch enables thp migration for move_pages(2).

Signed-off-by: Naoya Horiguchi 
---
 mm/migrate.c | 24 +---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
index 31bc724..5653d49 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/migrate.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/migrate.c
@@ -1240,7 +1240,17 @@ static struct page *new_page_node(struct page *p, 
unsigned long private,
if (PageHuge(p))
return alloc_huge_page_node(page_hstate(compound_head(p)),
pm->node);
-   else
+   else if (thp_migration_supported() && PageTransHuge(p)) {
+   struct page *thp;
+
+   thp = alloc_pages_node(pm->node,
+   (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
+   HPAGE_PMD_ORDER);
+   if (!thp)
+   return NULL;
+   prep_transhuge_page(thp);
+   return thp;
+   } else
return __alloc_pages_node(pm->node,
GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
 }
@@ -1267,6 +1277,7 @@ static int do_move_page_to_node_array(struct mm_struct 
*mm,
for (pp = pm; pp->node != MAX_NUMNODES; pp++) {
struct vm_area_struct *vma;
struct page *page;
+   unsigned int follflags;
 
err = -EFAULT;
vma = find_vma(mm, pp->addr);
@@ -1274,8 +1285,10 @@ static int do_move_page_to_node_array(struct mm_struct 
*mm,
goto set_status;
 
/* FOLL_DUMP to ignore special (like zero) pages */
-   page = follow_page(vma, pp->addr,
-   FOLL_GET | FOLL_SPLIT | FOLL_DUMP);
+   follflags = FOLL_GET | FOLL_SPLIT | FOLL_DUMP;
+   if (thp_migration_supported())
+   follflags &= ~FOLL_SPLIT;
+   page = follow_page(vma, pp->addr, follflags);
 
err = PTR_ERR(page);
if (IS_ERR(page))
@@ -1303,6 +1316,11 @@ static int do_move_page_to_node_array(struct mm_struct 
*mm,
if (PageHead(page))
isolate_huge_page(page, );
goto put_and_set;
+   } else if (PageTransCompound(page)) {
+   if (PageTail(page)) {
+   err = pp->node;
+   goto put_and_set;
+   }
}
 
err = isolate_lru_page(page);
-- 
2.7.0



[PATCH v1 09/11] mm: mempolicy: mbind and migrate_pages support thp migration

2016-03-02 Thread Naoya Horiguchi
This patch enables thp migration for mbind(2) and migrate_pages(2).

Signed-off-by: Naoya Horiguchi 
---
 mm/mempolicy.c | 94 --
 1 file changed, 72 insertions(+), 22 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/mempolicy.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/mempolicy.c
index 840a0ad..a9754dd 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/mempolicy.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/mempolicy.c
@@ -94,6 +94,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -487,6 +488,49 @@ static inline bool queue_pages_node_check(struct page 
*page,
return node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT);
 }
 
+static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
+   unsigned long end, struct mm_walk *walk)
+{
+   int ret = 0;
+   struct page *page;
+   struct queue_pages *qp = walk->private;
+   unsigned long flags;
+
+   if (unlikely(is_pmd_migration_entry(*pmd))) {
+   ret = 1;
+   goto unlock;
+   }
+   page = pmd_page(*pmd);
+   if (is_huge_zero_page(page)) {
+   spin_unlock(ptl);
+   split_huge_pmd(walk->vma, pmd, addr);
+   goto out;
+   }
+   if ((end - addr != HPAGE_PMD_SIZE) || !thp_migration_supported()) {
+   get_page(page);
+   spin_unlock(ptl);
+   lock_page(page);
+   ret = split_huge_page(page);
+   unlock_page(page);
+   put_page(page);
+   goto out;
+   }
+   if (queue_pages_node_check(page, qp)) {
+   ret = 1;
+   goto unlock;
+   }
+
+   ret = 1;
+   flags = qp->flags;
+   /* go to thp migration */
+   if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+   migrate_page_add(page, qp->pagelist, flags);
+unlock:
+   spin_unlock(ptl);
+out:
+   return ret;
+}
+
 /*
  * Scan through pages checking if pages follow certain conditions,
  * and move them to the pagelist if they do.
@@ -498,32 +542,19 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned 
long addr,
struct page *page;
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
-   int nid, ret;
+   int ret;
pte_t *pte;
spinlock_t *ptl;
 
-   if (pmd_trans_huge(*pmd)) {
-   ptl = pmd_lock(walk->mm, pmd);
-   if (pmd_trans_huge(*pmd)) {
-   page = pmd_page(*pmd);
-   if (is_huge_zero_page(page)) {
-   spin_unlock(ptl);
-   split_huge_pmd(vma, pmd, addr);
-   } else {
-   get_page(page);
-   spin_unlock(ptl);
-   lock_page(page);
-   ret = split_huge_page(page);
-   unlock_page(page);
-   put_page(page);
-   if (ret)
-   return 0;
-   }
-   } else {
-   spin_unlock(ptl);
-   }
+   ptl = pmd_trans_huge_lock(pmd, vma);
+   if (ptl) {
+   ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
+   if (ret)
+   return 0;
}
 
+   if (pmd_trans_unstable(pmd))
+   return 0;
 retry:
pte = pte_offset_map_lock(walk->mm, pmd, addr, );
for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -980,7 +1011,17 @@ static struct page *new_node_page(struct page *page, 
unsigned long node, int **x
if (PageHuge(page))
return alloc_huge_page_node(page_hstate(compound_head(page)),
node);
-   else
+   else if (thp_migration_supported() && PageTransHuge(page)) {
+   struct page *thp;
+
+   thp = alloc_pages_node(node,
+   (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
+   HPAGE_PMD_ORDER);
+   if (!thp)
+   return NULL;
+   prep_transhuge_page(thp);
+   return thp;
+   } else
return __alloc_pages_node(node, GFP_HIGHUSER_MOVABLE |
__GFP_THISNODE, 0);
 }
@@ -1146,6 +1187,15 @@ static struct page *new_page(struct page *page, unsigned 
long start, int **x)
if (PageHuge(page)) {
BUG_ON(!vma);
return alloc_huge_page_noerr(vma, address, 1);
+   } else if (thp_migration_supported() && PageTransHuge(page)) {
+   struct page *thp;
+
+   thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
+HPAGE_PMD_ORDER);

[PATCH v1 11/11] mm: memory_hotplug: memory hotremove supports thp migration

2016-03-02 Thread Naoya Horiguchi
This patch enables thp migration for memory hotremove. Stub definition of
prep_transhuge_page() is added for CONFIG_TRANSPARENT_HUGEPAGE=n.

Signed-off-by: Naoya Horiguchi 
---
 include/linux/huge_mm.h | 3 +++
 mm/memory_hotplug.c | 8 
 mm/page_isolation.c | 8 
 3 files changed, 19 insertions(+)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/huge_mm.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/huge_mm.h
index 09b215d..7944346 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/huge_mm.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/huge_mm.h
@@ -175,6 +175,9 @@ static inline bool thp_migration_supported(void)
 #define transparent_hugepage_enabled(__vma) 0
 
 #define transparent_hugepage_flags 0UL
+static inline void prep_transhuge_page(struct page *page)
+{
+}
 static inline int
 split_huge_page_to_list(struct page *page, struct list_head *list)
 {
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory_hotplug.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory_hotplug.c
index e62aa07..b4b23d5 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory_hotplug.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory_hotplug.c
@@ -1511,6 +1511,14 @@ do_migrate_range(unsigned long start_pfn, unsigned long 
end_pfn)
if (isolate_huge_page(page, ))
move_pages -= 1 << compound_order(head);
continue;
+   } else if (thp_migration_supported() && PageTransHuge(page)) {
+   struct page *head = compound_head(page);
+
+   pfn = page_to_pfn(head) + (1< PFN_SECTION_SHIFT) {
+   ret = -EBUSY;
+   break;
+   }
}
 
if (!get_page_unless_zero(page))
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/page_isolation.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/page_isolation.c
index 92c4c36..b2d22e8 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/page_isolation.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/page_isolation.c
@@ -294,6 +294,14 @@ struct page *alloc_migrate_target(struct page *page, 
unsigned long private,
nodes_complement(dst, src);
return alloc_huge_page_node(page_hstate(compound_head(page)),
next_node(page_to_nid(page), dst));
+   } else if (thp_migration_supported() && PageTransHuge(page)) {
+   struct page *thp;
+
+   thp = alloc_pages(GFP_TRANSHUGE, HPAGE_PMD_ORDER);
+   if (!thp)
+   return NULL;
+   prep_transhuge_page(thp);
+   return thp;
}
 
if (PageHighMem(page))
-- 
2.7.0



[PATCH v1 03/11] mm: thp: add helpers related to thp/pmd migration

2016-03-02 Thread Naoya Horiguchi
This patch prepares thp migration's core code. These code will be open when
unmap_and_move() stops unconditionally splitting thp and get_new_page() starts
to allocate destination thps.

Signed-off-by: Naoya Horiguchi 
---
 arch/x86/include/asm/pgtable.h| 11 ++
 arch/x86/include/asm/pgtable_64.h |  2 +
 include/linux/swapops.h   | 62 +++
 mm/huge_memory.c  | 78 +++
 mm/migrate.c  | 23 
 5 files changed, 176 insertions(+)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable.h
index 0687c47..0df9afe 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable.h
@@ -515,6 +515,17 @@ static inline int pmd_present(pmd_t pmd)
return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
 }
 
+/*
+ * Unlike pmd_present(), __pmd_present() checks only _PAGE_PRESENT bit.
+ * Combined with is_migration_entry(), this routine is used to detect pmd
+ * migration entries. To make it work fine, callers should make sure that
+ * pmd_trans_huge() returns true beforehand.
+ */
+static inline int __pmd_present(pmd_t pmd)
+{
+   return pmd_flags(pmd) & _PAGE_PRESENT;
+}
+
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * These work without NUMA balancing but the kernel does not care. See the
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable_64.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable_64.h
index 2ee7811..df869d0 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable_64.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable_64.h
@@ -153,7 +153,9 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 ((type) << (_PAGE_BIT_PRESENT + 1)) \
 | ((offset) << SWP_OFFSET_SHIFT) })
 #define __pte_to_swp_entry(pte)((swp_entry_t) { pte_val((pte)) 
})
+#define __pmd_to_swp_entry(pte)((swp_entry_t) { pmd_val((pmd)) 
})
 #define __swp_entry_to_pte(x)  ((pte_t) { .pte = (x).val })
+#define __swp_entry_to_pmd(x)  ((pmd_t) { .pmd = (x).val })
 
 extern int kern_addr_valid(unsigned long addr);
 extern void cleanup_highmap(void);
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/swapops.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/swapops.h
index 5c3a5f3..b402a2c 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/swapops.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/swapops.h
@@ -163,6 +163,68 @@ static inline int is_write_migration_entry(swp_entry_t 
entry)
 
 #endif
 
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+extern int set_pmd_migration_entry(struct page *page,
+   struct mm_struct *mm, unsigned long address);
+
+extern int remove_migration_pmd(struct page *new,
+   struct vm_area_struct *vma, unsigned long addr, void *old);
+
+extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd);
+
+static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
+{
+   swp_entry_t arch_entry;
+
+   arch_entry = __pmd_to_swp_entry(pmd);
+   return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
+}
+
+static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
+{
+   swp_entry_t arch_entry;
+
+   arch_entry = __swp_entry(swp_type(entry), swp_offset(entry));
+   return __swp_entry_to_pmd(arch_entry);
+}
+
+static inline int is_pmd_migration_entry(pmd_t pmd)
+{
+   return !__pmd_present(pmd) && is_migration_entry(pmd_to_swp_entry(pmd));
+}
+#else
+static inline int set_pmd_migration_entry(struct page *page,
+   struct mm_struct *mm, unsigned long address)
+{
+   return 0;
+}
+
+static inline int remove_migration_pmd(struct page *new,
+   struct vm_area_struct *vma, unsigned long addr, void *old)
+{
+   return 0;
+}
+
+static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { }
+
+static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
+{
+   return swp_entry(0, 0);
+}
+
+static inline pmd_t swp_entry_to_pmd(swp_entry_t entry)
+{
+   pmd_t pmd = {};
+
+   return pmd;
+}
+
+static inline int is_pmd_migration_entry(pmd_t pmd)
+{
+   return 0;
+}
+#endif
+
 #ifdef CONFIG_MEMORY_FAILURE
 
 extern atomic_long_t num_poisoned_pages __read_mostly;
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/huge_memory.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/huge_memory.c
index 46ad357..c6d5406 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/huge_memory.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/huge_memory.c
@@ -3657,3 +3657,81 @@ static int __init split_huge_pages_debugfs(void)
 }
 late_initcall(split_huge_pages_debugfs);
 #endif
+
+#ifdef 

[PATCH v1 08/11] mm: hwpoison: soft offline supports thp migration

2016-03-02 Thread Naoya Horiguchi
This patch enables thp migration for soft offline.

Signed-off-by: Naoya Horiguchi 
---
 mm/memory-failure.c | 31 ---
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory-failure.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory-failure.c
index bfb63c6..9099e78 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/memory-failure.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/memory-failure.c
@@ -1490,7 +1490,17 @@ static struct page *new_page(struct page *p, unsigned 
long private, int **x)
if (PageHuge(p))
return alloc_huge_page_node(page_hstate(compound_head(p)),
   nid);
-   else
+   else if (thp_migration_supported() && PageTransHuge(p)) {
+   struct page *thp;
+
+   thp = alloc_pages_node(nid,
+   (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
+   HPAGE_PMD_ORDER);
+   if (!thp)
+   return NULL;
+   prep_transhuge_page(thp);
+   return thp;
+   } else
return __alloc_pages_node(nid, GFP_HIGHUSER_MOVABLE, 0);
 }
 
@@ -1693,28 +1703,11 @@ static int __soft_offline_page(struct page *page, int 
flags)
 static int soft_offline_in_use_page(struct page *page, int flags)
 {
int ret;
-   struct page *hpage = compound_head(page);
-
-   if (!PageHuge(page) && PageTransHuge(hpage)) {
-   lock_page(hpage);
-   if (!PageAnon(hpage) || unlikely(split_huge_page(hpage))) {
-   unlock_page(hpage);
-   if (!PageAnon(hpage))
-   pr_info("soft offline: %#lx: non anonymous 
thp\n", page_to_pfn(page));
-   else
-   pr_info("soft offline: %#lx: thp split 
failed\n", page_to_pfn(page));
-   put_hwpoison_page(hpage);
-   return -EBUSY;
-   }
-   unlock_page(hpage);
-   get_hwpoison_page(page);
-   put_hwpoison_page(hpage);
-   }
 
if (PageHuge(page))
ret = soft_offline_huge_page(page, flags);
else
-   ret = __soft_offline_page(page, flags);
+   ret = __soft_offline_page(compound_head(page), flags);
 
return ret;
 }
-- 
2.7.0



[PATCH v1 06/11] mm: soft-dirty: keep soft-dirty bits over thp migration

2016-03-02 Thread Naoya Horiguchi
Soft dirty bit is designed to keep tracked over page migration, so this patch
makes it done for thp migration too.

This patch changes the bit for _PAGE_SWP_SOFT_DIRTY bit, because it's necessary
for thp migration (i.e. both of _PAGE_PSE and _PAGE_PRESENT is used to detect
pmd migration entry.) When soft-dirty was introduced, bit 6 was used for
nonlinear file mapping, but now that feature is replaced with emulation, so
we can relocate _PAGE_SWP_SOFT_DIRTY to bit 6.

Signed-off-by: Naoya Horiguchi 
---
 arch/x86/include/asm/pgtable.h   | 17 +
 arch/x86/include/asm/pgtable_types.h |  8 
 include/asm-generic/pgtable.h| 34 +-
 include/linux/swapops.h  |  2 ++
 mm/huge_memory.c | 33 +++--
 5 files changed, 87 insertions(+), 7 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable.h
index 0df9afe..e3da9fe 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable.h
@@ -920,6 +920,23 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 {
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
 }
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
+{
+   return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline int pmd_swp_soft_dirty(pmd_t pmd)
+{
+   return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY;
+}
+
+static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
+{
+   return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
+}
+#endif
 #endif
 
 #include 
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable_types.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable_types.h
index 4432ab7..a5d5e43 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable_types.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable_types.h
@@ -71,14 +71,14 @@
  * Tracking soft dirty bit when a page goes to a swap is tricky.
  * We need a bit which can be stored in pte _and_ not conflict
  * with swap entry format. On x86 bits 6 and 7 are *not* involved
- * into swap entry computation, but bit 6 is used for nonlinear
- * file mapping, so we borrow bit 7 for soft dirty tracking.
+ * into swap entry computation, but bit 7 is used for thp migration,
+ * so we borrow bit 6 for soft dirty tracking.
  *
  * Please note that this bit must be treated as swap dirty page
- * mark if and only if the PTE has present bit clear!
+ * mark if and only if the PTE/PMD has present bit clear!
  */
 #ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SWP_SOFT_DIRTY   _PAGE_PSE
+#define _PAGE_SWP_SOFT_DIRTY   _PAGE_DIRTY
 #else
 #define _PAGE_SWP_SOFT_DIRTY   (_AT(pteval_t, 0))
 #endif
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/asm-generic/pgtable.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/asm-generic/pgtable.h
index 9401f48..1b0d610 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/asm-generic/pgtable.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/asm-generic/pgtable.h
@@ -489,7 +489,24 @@ static inline void ptep_modify_prot_commit(struct 
mm_struct *mm,
 #define arch_start_context_switch(prev)do {} while (0)
 #endif
 
-#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
+{
+   return pmd;
+}
+
+static inline int pmd_swp_soft_dirty(pmd_t pmd)
+{
+   return 0;
+}
+
+static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
+{
+   return pmd;
+}
+#endif
+#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
 static inline int pte_soft_dirty(pte_t pte)
 {
return 0;
@@ -534,6 +551,21 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 {
return pte;
 }
+
+static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
+{
+   return pmd;
+}
+
+static inline int pmd_swp_soft_dirty(pmd_t pmd)
+{
+   return 0;
+}
+
+static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
+{
+   return pmd;
+}
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/swapops.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/swapops.h
index b402a2c..18f3744 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/swapops.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/swapops.h
@@ -176,6 +176,8 @@ static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
 {
swp_entry_t arch_entry;
 
+   if (pmd_swp_soft_dirty(pmd))
+   pmd = pmd_swp_clear_soft_dirty(pmd);
arch_entry = __pmd_to_swp_entry(pmd);
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/huge_memory.c 

[PATCH v1 06/11] mm: soft-dirty: keep soft-dirty bits over thp migration

2016-03-02 Thread Naoya Horiguchi
Soft dirty bit is designed to keep tracked over page migration, so this patch
makes it done for thp migration too.

This patch changes the bit for _PAGE_SWP_SOFT_DIRTY bit, because it's necessary
for thp migration (i.e. both of _PAGE_PSE and _PAGE_PRESENT is used to detect
pmd migration entry.) When soft-dirty was introduced, bit 6 was used for
nonlinear file mapping, but now that feature is replaced with emulation, so
we can relocate _PAGE_SWP_SOFT_DIRTY to bit 6.

Signed-off-by: Naoya Horiguchi 
---
 arch/x86/include/asm/pgtable.h   | 17 +
 arch/x86/include/asm/pgtable_types.h |  8 
 include/asm-generic/pgtable.h| 34 +-
 include/linux/swapops.h  |  2 ++
 mm/huge_memory.c | 33 +++--
 5 files changed, 87 insertions(+), 7 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable.h
index 0df9afe..e3da9fe 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable.h
@@ -920,6 +920,23 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 {
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
 }
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
+{
+   return pmd_set_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline int pmd_swp_soft_dirty(pmd_t pmd)
+{
+   return pmd_flags(pmd) & _PAGE_SWP_SOFT_DIRTY;
+}
+
+static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
+{
+   return pmd_clear_flags(pmd, _PAGE_SWP_SOFT_DIRTY);
+}
+#endif
 #endif
 
 #include 
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable_types.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable_types.h
index 4432ab7..a5d5e43 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/include/asm/pgtable_types.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/include/asm/pgtable_types.h
@@ -71,14 +71,14 @@
  * Tracking soft dirty bit when a page goes to a swap is tricky.
  * We need a bit which can be stored in pte _and_ not conflict
  * with swap entry format. On x86 bits 6 and 7 are *not* involved
- * into swap entry computation, but bit 6 is used for nonlinear
- * file mapping, so we borrow bit 7 for soft dirty tracking.
+ * into swap entry computation, but bit 7 is used for thp migration,
+ * so we borrow bit 6 for soft dirty tracking.
  *
  * Please note that this bit must be treated as swap dirty page
- * mark if and only if the PTE has present bit clear!
+ * mark if and only if the PTE/PMD has present bit clear!
  */
 #ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SWP_SOFT_DIRTY   _PAGE_PSE
+#define _PAGE_SWP_SOFT_DIRTY   _PAGE_DIRTY
 #else
 #define _PAGE_SWP_SOFT_DIRTY   (_AT(pteval_t, 0))
 #endif
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/asm-generic/pgtable.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/asm-generic/pgtable.h
index 9401f48..1b0d610 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/asm-generic/pgtable.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/asm-generic/pgtable.h
@@ -489,7 +489,24 @@ static inline void ptep_modify_prot_commit(struct 
mm_struct *mm,
 #define arch_start_context_switch(prev)do {} while (0)
 #endif
 
-#ifndef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
+{
+   return pmd;
+}
+
+static inline int pmd_swp_soft_dirty(pmd_t pmd)
+{
+   return 0;
+}
+
+static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
+{
+   return pmd;
+}
+#endif
+#else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */
 static inline int pte_soft_dirty(pte_t pte)
 {
return 0;
@@ -534,6 +551,21 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 {
return pte;
 }
+
+static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
+{
+   return pmd;
+}
+
+static inline int pmd_swp_soft_dirty(pmd_t pmd)
+{
+   return 0;
+}
+
+static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
+{
+   return pmd;
+}
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/swapops.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/swapops.h
index b402a2c..18f3744 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/swapops.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/swapops.h
@@ -176,6 +176,8 @@ static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd)
 {
swp_entry_t arch_entry;
 
+   if (pmd_swp_soft_dirty(pmd))
+   pmd = pmd_swp_clear_soft_dirty(pmd);
arch_entry = __pmd_to_swp_entry(pmd);
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/huge_memory.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/huge_memory.c

[PATCH v1 02/11] mm: thp: introduce CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION

2016-03-02 Thread Naoya Horiguchi
Introduces CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION to limit thp migration
functionality to x86_64, which should be safer at the first step.

Signed-off-by: Naoya Horiguchi 
---
 arch/x86/Kconfig|  4 
 include/linux/huge_mm.h | 14 ++
 mm/Kconfig  |  3 +++
 3 files changed, 21 insertions(+)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/Kconfig 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/Kconfig
index 993aca4..7a563cf 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/Kconfig
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/Kconfig
@@ -2198,6 +2198,10 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on X86_64 && HUGETLB_PAGE && MIGRATION
 
+config ARCH_ENABLE_THP_MIGRATION
+   def_bool y
+   depends on X86_64 && TRANSPARENT_HUGEPAGE && MIGRATION
+
 menu "Power management and ACPI options"
 
 config ARCH_HIBERNATION_HEADER
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/huge_mm.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/huge_mm.h
index 459fd25..09b215d 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/huge_mm.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/huge_mm.h
@@ -156,6 +156,15 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 
 struct page *get_huge_zero_page(void);
 
+static inline bool thp_migration_supported(void)
+{
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+   return true;
+#else
+   return false;
+#endif
+}
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -213,6 +222,11 @@ static inline struct page *follow_devmap_pmd(struct 
vm_area_struct *vma,
 {
return NULL;
 }
+
+static inline bool thp_migration_supported(void)
+{
+   return false;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/Kconfig 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/Kconfig
index f2c1a07..64e7ab6 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/Kconfig
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/Kconfig
@@ -265,6 +265,9 @@ config MIGRATION
 config ARCH_ENABLE_HUGEPAGE_MIGRATION
bool
 
+config ARCH_ENABLE_THP_MIGRATION
+   bool
+
 config PHYS_ADDR_T_64BIT
def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
 
-- 
2.7.0



[PATCH v1 01/11] mm: mempolicy: add queue_pages_node_check()

2016-03-02 Thread Naoya Horiguchi
Introduce a separate check routine related to MPOL_MF_INVERT flag. This patch
just does cleanup, no behavioral change.

Signed-off-by: Naoya Horiguchi 
---
 mm/mempolicy.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/mempolicy.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/mempolicy.c
index 8c5fd08..840a0ad 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/mempolicy.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/mempolicy.c
@@ -478,6 +478,15 @@ struct queue_pages {
struct vm_area_struct *prev;
 };
 
+static inline bool queue_pages_node_check(struct page *page,
+   struct queue_pages *qp)
+{
+   int nid = page_to_nid(page);
+   unsigned long flags = qp->flags;
+
+   return node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT);
+}
+
 /*
  * Scan through pages checking if pages follow certain conditions,
  * and move them to the pagelist if they do.
@@ -529,8 +538,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long 
addr,
 */
if (PageReserved(page))
continue;
-   nid = page_to_nid(page);
-   if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
+   if (queue_pages_node_check(page, qp))
continue;
if (PageTail(page) && PageAnon(page)) {
get_page(page);
@@ -562,7 +570,6 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long 
hmask,
 #ifdef CONFIG_HUGETLB_PAGE
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
-   int nid;
struct page *page;
spinlock_t *ptl;
pte_t entry;
@@ -572,8 +579,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long 
hmask,
if (!pte_present(entry))
goto unlock;
page = pte_page(entry);
-   nid = page_to_nid(page);
-   if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
+   if (queue_pages_node_check(page, qp))
goto unlock;
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
-- 
2.7.0



[PATCH v1 02/11] mm: thp: introduce CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION

2016-03-02 Thread Naoya Horiguchi
Introduces CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION to limit thp migration
functionality to x86_64, which should be safer at the first step.

Signed-off-by: Naoya Horiguchi 
---
 arch/x86/Kconfig|  4 
 include/linux/huge_mm.h | 14 ++
 mm/Kconfig  |  3 +++
 3 files changed, 21 insertions(+)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/Kconfig 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/Kconfig
index 993aca4..7a563cf 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/arch/x86/Kconfig
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/arch/x86/Kconfig
@@ -2198,6 +2198,10 @@ config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on X86_64 && HUGETLB_PAGE && MIGRATION
 
+config ARCH_ENABLE_THP_MIGRATION
+   def_bool y
+   depends on X86_64 && TRANSPARENT_HUGEPAGE && MIGRATION
+
 menu "Power management and ACPI options"
 
 config ARCH_HIBERNATION_HEADER
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/huge_mm.h 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/huge_mm.h
index 459fd25..09b215d 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/include/linux/huge_mm.h
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/include/linux/huge_mm.h
@@ -156,6 +156,15 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
 
 struct page *get_huge_zero_page(void);
 
+static inline bool thp_migration_supported(void)
+{
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+   return true;
+#else
+   return false;
+#endif
+}
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -213,6 +222,11 @@ static inline struct page *follow_devmap_pmd(struct 
vm_area_struct *vma,
 {
return NULL;
 }
+
+static inline bool thp_migration_supported(void)
+{
+   return false;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/Kconfig 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/Kconfig
index f2c1a07..64e7ab6 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/Kconfig
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/Kconfig
@@ -265,6 +265,9 @@ config MIGRATION
 config ARCH_ENABLE_HUGEPAGE_MIGRATION
bool
 
+config ARCH_ENABLE_THP_MIGRATION
+   bool
+
 config PHYS_ADDR_T_64BIT
def_bool 64BIT || ARCH_PHYS_ADDR_T_64BIT
 
-- 
2.7.0



[PATCH v1 01/11] mm: mempolicy: add queue_pages_node_check()

2016-03-02 Thread Naoya Horiguchi
Introduce a separate check routine related to MPOL_MF_INVERT flag. This patch
just does cleanup, no behavioral change.

Signed-off-by: Naoya Horiguchi 
---
 mm/mempolicy.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git v4.5-rc5-mmotm-2016-02-24-16-18/mm/mempolicy.c 
v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/mempolicy.c
index 8c5fd08..840a0ad 100644
--- v4.5-rc5-mmotm-2016-02-24-16-18/mm/mempolicy.c
+++ v4.5-rc5-mmotm-2016-02-24-16-18_patched/mm/mempolicy.c
@@ -478,6 +478,15 @@ struct queue_pages {
struct vm_area_struct *prev;
 };
 
+static inline bool queue_pages_node_check(struct page *page,
+   struct queue_pages *qp)
+{
+   int nid = page_to_nid(page);
+   unsigned long flags = qp->flags;
+
+   return node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT);
+}
+
 /*
  * Scan through pages checking if pages follow certain conditions,
  * and move them to the pagelist if they do.
@@ -529,8 +538,7 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long 
addr,
 */
if (PageReserved(page))
continue;
-   nid = page_to_nid(page);
-   if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
+   if (queue_pages_node_check(page, qp))
continue;
if (PageTail(page) && PageAnon(page)) {
get_page(page);
@@ -562,7 +570,6 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long 
hmask,
 #ifdef CONFIG_HUGETLB_PAGE
struct queue_pages *qp = walk->private;
unsigned long flags = qp->flags;
-   int nid;
struct page *page;
spinlock_t *ptl;
pte_t entry;
@@ -572,8 +579,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long 
hmask,
if (!pte_present(entry))
goto unlock;
page = pte_page(entry);
-   nid = page_to_nid(page);
-   if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
+   if (queue_pages_node_check(page, qp))
goto unlock;
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
-- 
2.7.0



Re: [PATCH] thermal: consistently use int for trip temp

2016-03-02 Thread kbuild test robot
Hi Wei,

[auto build test WARNING on thermal/next]
[also build test WARNING on v4.5-rc6 next-20160302]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Wei-Ni/thermal-consistently-use-int-for-trip-temp/20160303-151648
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git next
config: x86_64-randconfig-x015-201609 (attached as .config)
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/list.h:4,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   drivers/thermal/thermal_core.c: In function 'trip_point_temp_store':
   drivers/thermal/thermal_core.c:695:24: warning: passing argument 3 of 
'kstrtoul' from incompatible pointer type [-Wincompatible-pointer-types]
 if (kstrtoul(buf, 10, ))
   ^
   include/linux/compiler.h:147:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^
>> drivers/thermal/thermal_core.c:695:2: note: in expansion of macro 'if'
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^
   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/list.h:4,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   drivers/thermal/thermal_core.c:695:24: warning: passing argument 3 of 
'kstrtoul' from incompatible pointer type [-Wincompatible-pointer-types]
 if (kstrtoul(buf, 10, ))
   ^
   include/linux/compiler.h:147:42: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^
>> drivers/thermal/thermal_core.c:695:2: note: in expansion of macro 'if'
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^
   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/list.h:4,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   drivers/thermal/thermal_core.c:695:24: warning: passing argument 3 of 
'kstrtoul' from incompatible pointer type [-Wincompatible-pointer-types]
 if (kstrtoul(buf, 10, ))
   ^
   include/linux/compiler.h:158:16: note: in definition of macro '__trace_if'
  __r = !!(cond); \
   ^
>> drivers/thermal/thermal_core.c:695:2: note: in expansion of macro 'if'
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^
   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/list.h:4,
from 

Re: [PATCH] thermal: consistently use int for trip temp

2016-03-02 Thread kbuild test robot
Hi Wei,

[auto build test WARNING on thermal/next]
[also build test WARNING on v4.5-rc6 next-20160302]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Wei-Ni/thermal-consistently-use-int-for-trip-temp/20160303-151648
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git next
config: x86_64-randconfig-x015-201609 (attached as .config)
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/list.h:4,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   drivers/thermal/thermal_core.c: In function 'trip_point_temp_store':
   drivers/thermal/thermal_core.c:695:24: warning: passing argument 3 of 
'kstrtoul' from incompatible pointer type [-Wincompatible-pointer-types]
 if (kstrtoul(buf, 10, ))
   ^
   include/linux/compiler.h:147:30: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^
>> drivers/thermal/thermal_core.c:695:2: note: in expansion of macro 'if'
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^
   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/list.h:4,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   drivers/thermal/thermal_core.c:695:24: warning: passing argument 3 of 
'kstrtoul' from incompatible pointer type [-Wincompatible-pointer-types]
 if (kstrtoul(buf, 10, ))
   ^
   include/linux/compiler.h:147:42: note: in definition of macro '__trace_if'
 if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
 ^
>> drivers/thermal/thermal_core.c:695:2: note: in expansion of macro 'if'
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^
   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/list.h:4,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   drivers/thermal/thermal_core.c:695:24: warning: passing argument 3 of 
'kstrtoul' from incompatible pointer type [-Wincompatible-pointer-types]
 if (kstrtoul(buf, 10, ))
   ^
   include/linux/compiler.h:158:16: note: in definition of macro '__trace_if'
  __r = !!(cond); \
   ^
>> drivers/thermal/thermal_core.c:695:2: note: in expansion of macro 'if'
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^
   In file included from include/uapi/linux/stddef.h:1:0,
from include/linux/stddef.h:4,
from include/uapi/linux/posix_types.h:4,
from include/uapi/linux/types.h:13,
from include/linux/types.h:5,
from include/linux/list.h:4,
from 

Re: [PATCH 3/6] x86/mbm: Intel Memory B/W Monitoring enumeration and init

2016-03-02 Thread Thomas Gleixner
On Wed, 2 Mar 2016, Vikas Shivappa wrote:
> + if (cqm_enabled && mbm_enabled)
> + intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
> + else if (!cqm_enabled && mbm_enabled)
> + intel_cqm_events_group.attrs = intel_mbm_events_attr;
> + else if (cqm_enabled && !mbm_enabled)
> + intel_cqm_events_group.attrs = intel_cqm_events_attr;
> +
>   ret = perf_pmu_register(_cqm_pmu, "intel_cqm", -1);
>   if (ret) {
>   pr_err("Intel CQM perf registration failed: %d\n", ret);
>   goto out;

So what cleans up mbm_local and mbm_total in that case?

Thanks,

tglx


Re: [PATCH 3/6] x86/mbm: Intel Memory B/W Monitoring enumeration and init

2016-03-02 Thread Thomas Gleixner
On Wed, 2 Mar 2016, Vikas Shivappa wrote:
> + if (cqm_enabled && mbm_enabled)
> + intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
> + else if (!cqm_enabled && mbm_enabled)
> + intel_cqm_events_group.attrs = intel_mbm_events_attr;
> + else if (cqm_enabled && !mbm_enabled)
> + intel_cqm_events_group.attrs = intel_cqm_events_attr;
> +
>   ret = perf_pmu_register(_cqm_pmu, "intel_cqm", -1);
>   if (ret) {
>   pr_err("Intel CQM perf registration failed: %d\n", ret);
>   goto out;

So what cleans up mbm_local and mbm_total in that case?

Thanks,

tglx


Re: [PATCH 2/7] extcon: usb-gpio: add support for ACPI gpio interface

2016-03-02 Thread Lu Baolu


On 03/03/2016 03:24 PM, Chanwoo Choi wrote:
> Hi Lu,
>
> On 2016년 03월 03일 15:37, Lu Baolu wrote:
>> GPIO resource could be retrieved through APCI as well.
>>
>> Signed-off-by: Lu Baolu 
>> Reviewed-by: Felipe Balbi 
>> ---
>>  drivers/extcon/extcon-usb-gpio.c | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/extcon/extcon-usb-gpio.c 
>> b/drivers/extcon/extcon-usb-gpio.c
>> index af9c8b0..472c431 100644
>> --- a/drivers/extcon/extcon-usb-gpio.c
>> +++ b/drivers/extcon/extcon-usb-gpio.c
>> @@ -26,6 +26,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  
>>  #define USB_GPIO_DEBOUNCE_MS20  /* ms */
>>  
>> @@ -91,7 +92,7 @@ static int usb_extcon_probe(struct platform_device *pdev)
>>  struct usb_extcon_info *info;
>>  int ret;
>>  
>> -if (!np)
>> +if (!np && !ACPI_HANDLE(dev))
>>  return -EINVAL;
>>  
>>  info = devm_kzalloc(>dev, sizeof(*info), GFP_KERNEL);
>>
> Looks good to me.
>
> Acked-by: Chanwoo Choi 

Thank you!

>
> Best Regards,
> Chanwoo Choi
>



Re: [PATCH 2/7] extcon: usb-gpio: add support for ACPI gpio interface

2016-03-02 Thread Lu Baolu


On 03/03/2016 03:24 PM, Chanwoo Choi wrote:
> Hi Lu,
>
> On 2016년 03월 03일 15:37, Lu Baolu wrote:
>> GPIO resource could be retrieved through APCI as well.
>>
>> Signed-off-by: Lu Baolu 
>> Reviewed-by: Felipe Balbi 
>> ---
>>  drivers/extcon/extcon-usb-gpio.c | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/extcon/extcon-usb-gpio.c 
>> b/drivers/extcon/extcon-usb-gpio.c
>> index af9c8b0..472c431 100644
>> --- a/drivers/extcon/extcon-usb-gpio.c
>> +++ b/drivers/extcon/extcon-usb-gpio.c
>> @@ -26,6 +26,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  
>>  #define USB_GPIO_DEBOUNCE_MS20  /* ms */
>>  
>> @@ -91,7 +92,7 @@ static int usb_extcon_probe(struct platform_device *pdev)
>>  struct usb_extcon_info *info;
>>  int ret;
>>  
>> -if (!np)
>> +if (!np && !ACPI_HANDLE(dev))
>>  return -EINVAL;
>>  
>>  info = devm_kzalloc(>dev, sizeof(*info), GFP_KERNEL);
>>
> Looks good to me.
>
> Acked-by: Chanwoo Choi 

Thank you!

>
> Best Regards,
> Chanwoo Choi
>



Re: [PATCH 1/7] extcon: usb-gpio: add device binding for platform device

2016-03-02 Thread Lu Baolu


On 03/03/2016 03:24 PM, Chanwoo Choi wrote:
> Hell Lu,
>
> On 2016년 03월 03일 15:37, Lu Baolu wrote:
>> This is needed to handle the GPIO connected USB ID pin found on
>> Intel Baytrail devices.
>>
>> Signed-off-by: Lu Baolu 
>> Reviewed-by: Felipe Balbi 
>> ---
>>  drivers/extcon/extcon-usb-gpio.c | 7 +++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/drivers/extcon/extcon-usb-gpio.c 
>> b/drivers/extcon/extcon-usb-gpio.c
>> index 2b2fecf..af9c8b0 100644
>> --- a/drivers/extcon/extcon-usb-gpio.c
>> +++ b/drivers/extcon/extcon-usb-gpio.c
>> @@ -206,6 +206,12 @@ static const struct of_device_id usb_extcon_dt_match[] 
>> = {
>>  };
>>  MODULE_DEVICE_TABLE(of, usb_extcon_dt_match);
>>  
>> +static const struct platform_device_id usb_extcon_platform_ids[] = {
>> +{ .name = "extcon-usb-gpio", },
>> +{ /* sentinel */ }
>> +};
>> +MODULE_DEVICE_TABLE(platform, usb_extcon_platform_ids);
>> +
>>  static struct platform_driver usb_extcon_driver = {
>>  .probe  = usb_extcon_probe,
>>  .remove = usb_extcon_remove,
>> @@ -214,6 +220,7 @@ static struct platform_driver usb_extcon_driver = {
>>  .pm = _extcon_pm_ops,
>>  .of_match_table = usb_extcon_dt_match,
>>  },
>> +.id_table = usb_extcon_platform_ids,
>>  };
>>  
>>  module_platform_driver(usb_extcon_driver);
>>
> Looks good to me.
>
> Acked-by: Chanwoo Choi 

Thank you!

>
> Best Regards,
> Chanwoo Choi
>
>



Re: [PATCH 1/7] extcon: usb-gpio: add device binding for platform device

2016-03-02 Thread Lu Baolu


On 03/03/2016 03:24 PM, Chanwoo Choi wrote:
> Hell Lu,
>
> On 2016년 03월 03일 15:37, Lu Baolu wrote:
>> This is needed to handle the GPIO connected USB ID pin found on
>> Intel Baytrail devices.
>>
>> Signed-off-by: Lu Baolu 
>> Reviewed-by: Felipe Balbi 
>> ---
>>  drivers/extcon/extcon-usb-gpio.c | 7 +++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/drivers/extcon/extcon-usb-gpio.c 
>> b/drivers/extcon/extcon-usb-gpio.c
>> index 2b2fecf..af9c8b0 100644
>> --- a/drivers/extcon/extcon-usb-gpio.c
>> +++ b/drivers/extcon/extcon-usb-gpio.c
>> @@ -206,6 +206,12 @@ static const struct of_device_id usb_extcon_dt_match[] 
>> = {
>>  };
>>  MODULE_DEVICE_TABLE(of, usb_extcon_dt_match);
>>  
>> +static const struct platform_device_id usb_extcon_platform_ids[] = {
>> +{ .name = "extcon-usb-gpio", },
>> +{ /* sentinel */ }
>> +};
>> +MODULE_DEVICE_TABLE(platform, usb_extcon_platform_ids);
>> +
>>  static struct platform_driver usb_extcon_driver = {
>>  .probe  = usb_extcon_probe,
>>  .remove = usb_extcon_remove,
>> @@ -214,6 +220,7 @@ static struct platform_driver usb_extcon_driver = {
>>  .pm = _extcon_pm_ops,
>>  .of_match_table = usb_extcon_dt_match,
>>  },
>> +.id_table = usb_extcon_platform_ids,
>>  };
>>  
>>  module_platform_driver(usb_extcon_driver);
>>
> Looks good to me.
>
> Acked-by: Chanwoo Choi 

Thank you!

>
> Best Regards,
> Chanwoo Choi
>
>



Re: [tip:smp/hotplug] rcu: Make CPU_DYING_IDLE an explicit call

2016-03-02 Thread Thomas Gleixner
On Wed, 2 Mar 2016, Paul E. McKenney wrote:
> > --- a/kernel/cpu.c
> > +++ b/kernel/cpu.c
> > @@ -762,6 +762,7 @@ void cpuhp_report_idle_dead(void)
> > BUG_ON(st->state != CPUHP_AP_OFFLINE);
> > st->state = CPUHP_AP_IDLE_DEAD;
> > complete(>done);
> 
> Not to be repetitive or anything, but if we delay here, it can break
> RCU on a number of architectures.  Either the CPU can be killed holding
> one of RCU's locks or RCU can wrongly see the CPU as still being alive.
> Either can prevent future RCU grace periods from ever completing, thus
> OOMing the system.

Thanks for the reminder. I wanted to fix that, but then forgot again. Fix is
on the way.

Thanks,

tglx


Re: [tip:smp/hotplug] rcu: Make CPU_DYING_IDLE an explicit call

2016-03-02 Thread Thomas Gleixner
On Wed, 2 Mar 2016, Paul E. McKenney wrote:
> > --- a/kernel/cpu.c
> > +++ b/kernel/cpu.c
> > @@ -762,6 +762,7 @@ void cpuhp_report_idle_dead(void)
> > BUG_ON(st->state != CPUHP_AP_OFFLINE);
> > st->state = CPUHP_AP_IDLE_DEAD;
> > complete(>done);
> 
> Not to be repetitive or anything, but if we delay here, it can break
> RCU on a number of architectures.  Either the CPU can be killed holding
> one of RCU's locks or RCU can wrongly see the CPU as still being alive.
> Either can prevent future RCU grace periods from ever completing, thus
> OOMing the system.

Thanks for the reminder. I wanted to fix that, but then forgot again. Fix is
on the way.

Thanks,

tglx


Re: [PATCH] objtool: Disable stack validation when CROSS_COMPILE is used

2016-03-02 Thread Sedat Dilek
On 3/2/16, Stephen Rothwell  wrote:
> Hi Josh,
>
> On Tue, 1 Mar 2016 15:54:51 -0600 Josh Poimboeuf 
> wrote:
>>
>> Changing it to use the host compiler would probably be an easy fix, but
>> that would expose a harder bug related to endianness.
>
> Just by luck, my PowerPC host is little endian :-)
>
>> How about the below workaround patch to disable objtool and warn when
>> CROSS_COMPILE is used?  If anybody complains about lack of cross-compile
>> support later, we could try to fix it then.
>
> This seems reasonable.
>
>> From a3c65947011a420743f308b698171c4209105d3f Mon Sep 17 00:00:00 2001
>> Message-Id:
>> 
>> From: Josh Poimboeuf 
>> Date: Tue, 1 Mar 2016 13:35:51 -0600
>> Subject: [PATCH] objtool: Disable stack validation when CROSS_COMPILE is
>> used
>
> I have applied this to the merge of the tip tree in linux-next today
> and it compiles fine for me.  I will continue applying it until
> something better comes along or it is applied to the tip tree.
>

Does Linux next-20160303 has this patch?
On a quick view I could not find it.

- Sedat -

> Thanks for that.
> --
> Cheers,
> Stephen Rothwell
> --
> To unsubscribe from this list: send the line "unsubscribe linux-next" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>


Re: [PATCH] objtool: Disable stack validation when CROSS_COMPILE is used

2016-03-02 Thread Sedat Dilek
On 3/2/16, Stephen Rothwell  wrote:
> Hi Josh,
>
> On Tue, 1 Mar 2016 15:54:51 -0600 Josh Poimboeuf 
> wrote:
>>
>> Changing it to use the host compiler would probably be an easy fix, but
>> that would expose a harder bug related to endianness.
>
> Just by luck, my PowerPC host is little endian :-)
>
>> How about the below workaround patch to disable objtool and warn when
>> CROSS_COMPILE is used?  If anybody complains about lack of cross-compile
>> support later, we could try to fix it then.
>
> This seems reasonable.
>
>> From a3c65947011a420743f308b698171c4209105d3f Mon Sep 17 00:00:00 2001
>> Message-Id:
>> 
>> From: Josh Poimboeuf 
>> Date: Tue, 1 Mar 2016 13:35:51 -0600
>> Subject: [PATCH] objtool: Disable stack validation when CROSS_COMPILE is
>> used
>
> I have applied this to the merge of the tip tree in linux-next today
> and it compiles fine for me.  I will continue applying it until
> something better comes along or it is applied to the tip tree.
>

Does Linux next-20160303 has this patch?
On a quick view I could not find it.

- Sedat -

> Thanks for that.
> --
> Cheers,
> Stephen Rothwell
> --
> To unsubscribe from this list: send the line "unsubscribe linux-next" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>


Re: [PATCH] thermal: consistently use int for trip temp

2016-03-02 Thread kbuild test robot
Hi Wei,

[auto build test WARNING on thermal/next]
[also build test WARNING on v4.5-rc6 next-20160302]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Wei-Ni/thermal-consistently-use-int-for-trip-temp/20160303-151648
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git next
config: sparc64-allyesconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=sparc64 

All warnings (new ones prefixed by >>):

   drivers/thermal/thermal_core.c: In function 'trip_point_temp_store':
>> drivers/thermal/thermal_core.c:695:6: warning: passing argument 3 of 
>> 'kstrtoul' from incompatible pointer type
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^
   drivers/thermal/thermal_core.c: In function 'emul_temp_store':
   drivers/thermal/thermal_core.c:900:6: warning: passing argument 3 of 
'kstrtoul' from incompatible pointer type
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^

vim +/kstrtoul +695 drivers/thermal/thermal_core.c

203d3d4a drivers/thermal/thermal.c  Zhang Rui 2008-01-17  679  }
203d3d4a drivers/thermal/thermal.c  Zhang Rui 2008-01-17  680  
203d3d4a drivers/thermal/thermal.c  Zhang Rui 2008-01-17  681  
static ssize_t
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  682  
trip_point_temp_store(struct device *dev, struct device_attribute *attr,
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  683   
 const char *buf, size_t count)
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  684  {
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  685   
struct thermal_zone_device *tz = to_thermal_zone(dev);
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  686   
int trip, ret;
1afa53cf drivers/thermal/thermal_core.c Wei Ni2016-03-03  687   
int temperature;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  688  
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  689   
if (!tz->ops->set_trip_temp)
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  690   
return -EPERM;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  691  
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  692   
if (!sscanf(attr->attr.name, "trip_point_%d_temp", ))
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  693   
return -EINVAL;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  694  
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25 @695   
if (kstrtoul(buf, 10, ))
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  696   
return -EINVAL;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  697  
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  698   
ret = tz->ops->set_trip_temp(tz, trip, temperature);
ad74e46c drivers/thermal/thermal_core.c Kuninori Morimoto 2015-12-15  699   
if (ret)
ad74e46c drivers/thermal/thermal_core.c Kuninori Morimoto 2015-12-15  700   
return ret;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  701  
ad74e46c drivers/thermal/thermal_core.c Kuninori Morimoto 2015-12-15  702   
thermal_zone_device_update(tz);
ad74e46c drivers/thermal/thermal_core.c Kuninori Morimoto 2015-12-15  703  

:: The code at line 695 was first introduced by commit
:: c56f5c0342dfee11a1a13d2f5bb7618de5b17590 Thermal: Make Thermal trip 
points writeable

:: TO: Durgadoss R <dugardos...@intel.com>
:: CC: Len Brown <len.br...@intel.com>

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH] thermal: consistently use int for trip temp

2016-03-02 Thread kbuild test robot
Hi Wei,

[auto build test WARNING on thermal/next]
[also build test WARNING on v4.5-rc6 next-20160302]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improving the system]

url:
https://github.com/0day-ci/linux/commits/Wei-Ni/thermal-consistently-use-int-for-trip-temp/20160303-151648
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git next
config: sparc64-allyesconfig (attached as .config)
reproduce:
wget 
https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross
 -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=sparc64 

All warnings (new ones prefixed by >>):

   drivers/thermal/thermal_core.c: In function 'trip_point_temp_store':
>> drivers/thermal/thermal_core.c:695:6: warning: passing argument 3 of 
>> 'kstrtoul' from incompatible pointer type
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^
   drivers/thermal/thermal_core.c: In function 'emul_temp_store':
   drivers/thermal/thermal_core.c:900:6: warning: passing argument 3 of 
'kstrtoul' from incompatible pointer type
 if (kstrtoul(buf, 10, ))
 ^
   In file included from include/linux/list.h:8:0,
from include/linux/module.h:9,
from drivers/thermal/thermal_core.c:28:
   include/linux/kernel.h:291:32: note: expected 'long unsigned int *' but 
argument is of type 'int *'
static inline int __must_check kstrtoul(const char *s, unsigned int base, 
unsigned long *res)
   ^

vim +/kstrtoul +695 drivers/thermal/thermal_core.c

203d3d4a drivers/thermal/thermal.c  Zhang Rui 2008-01-17  679  }
203d3d4a drivers/thermal/thermal.c  Zhang Rui 2008-01-17  680  
203d3d4a drivers/thermal/thermal.c  Zhang Rui 2008-01-17  681  
static ssize_t
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  682  
trip_point_temp_store(struct device *dev, struct device_attribute *attr,
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  683   
 const char *buf, size_t count)
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  684  {
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  685   
struct thermal_zone_device *tz = to_thermal_zone(dev);
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  686   
int trip, ret;
1afa53cf drivers/thermal/thermal_core.c Wei Ni2016-03-03  687   
int temperature;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  688  
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  689   
if (!tz->ops->set_trip_temp)
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  690   
return -EPERM;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  691  
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  692   
if (!sscanf(attr->attr.name, "trip_point_%d_temp", ))
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  693   
return -EINVAL;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  694  
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25 @695   
if (kstrtoul(buf, 10, ))
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  696   
return -EINVAL;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  697  
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  698   
ret = tz->ops->set_trip_temp(tz, trip, temperature);
ad74e46c drivers/thermal/thermal_core.c Kuninori Morimoto 2015-12-15  699   
if (ret)
ad74e46c drivers/thermal/thermal_core.c Kuninori Morimoto 2015-12-15  700   
return ret;
c56f5c03 drivers/thermal/thermal_sys.c  Durgadoss R   2012-07-25  701  
ad74e46c drivers/thermal/thermal_core.c Kuninori Morimoto 2015-12-15  702   
thermal_zone_device_update(tz);
ad74e46c drivers/thermal/thermal_core.c Kuninori Morimoto 2015-12-15  703  

:: The code at line 695 was first introduced by commit
:: c56f5c0342dfee11a1a13d2f5bb7618de5b17590 Thermal: Make Thermal trip 
points writeable

:: TO: Durgadoss R 
:: CC: Len Brown 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: Binary data


Re: [PATCH 2/7] extcon: usb-gpio: add support for ACPI gpio interface

2016-03-02 Thread Chanwoo Choi
Hi Lu,

On 2016년 03월 03일 15:37, Lu Baolu wrote:
> GPIO resource could be retrieved through APCI as well.
> 
> Signed-off-by: Lu Baolu 
> Reviewed-by: Felipe Balbi 
> ---
>  drivers/extcon/extcon-usb-gpio.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/extcon/extcon-usb-gpio.c 
> b/drivers/extcon/extcon-usb-gpio.c
> index af9c8b0..472c431 100644
> --- a/drivers/extcon/extcon-usb-gpio.c
> +++ b/drivers/extcon/extcon-usb-gpio.c
> @@ -26,6 +26,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define USB_GPIO_DEBOUNCE_MS 20  /* ms */
>  
> @@ -91,7 +92,7 @@ static int usb_extcon_probe(struct platform_device *pdev)
>   struct usb_extcon_info *info;
>   int ret;
>  
> - if (!np)
> + if (!np && !ACPI_HANDLE(dev))
>   return -EINVAL;
>  
>   info = devm_kzalloc(>dev, sizeof(*info), GFP_KERNEL);
> 

Looks good to me.

Acked-by: Chanwoo Choi 

Best Regards,
Chanwoo Choi


Re: [PATCH 2/7] extcon: usb-gpio: add support for ACPI gpio interface

2016-03-02 Thread Chanwoo Choi
Hi Lu,

On 2016년 03월 03일 15:37, Lu Baolu wrote:
> GPIO resource could be retrieved through APCI as well.
> 
> Signed-off-by: Lu Baolu 
> Reviewed-by: Felipe Balbi 
> ---
>  drivers/extcon/extcon-usb-gpio.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/extcon/extcon-usb-gpio.c 
> b/drivers/extcon/extcon-usb-gpio.c
> index af9c8b0..472c431 100644
> --- a/drivers/extcon/extcon-usb-gpio.c
> +++ b/drivers/extcon/extcon-usb-gpio.c
> @@ -26,6 +26,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define USB_GPIO_DEBOUNCE_MS 20  /* ms */
>  
> @@ -91,7 +92,7 @@ static int usb_extcon_probe(struct platform_device *pdev)
>   struct usb_extcon_info *info;
>   int ret;
>  
> - if (!np)
> + if (!np && !ACPI_HANDLE(dev))
>   return -EINVAL;
>  
>   info = devm_kzalloc(>dev, sizeof(*info), GFP_KERNEL);
> 

Looks good to me.

Acked-by: Chanwoo Choi 

Best Regards,
Chanwoo Choi


Re: [PATCH v5 04/15] scsi: ufs: verify hba controller hce reg value

2016-03-02 Thread Hannes Reinecke
On 03/01/2016 09:32 PM, yga...@codeaurora.org wrote:
>> On 02/28/2016 09:32 PM, Yaniv Gardi wrote:
>>> Sometimes due to hw issues it takes some time to the
>>> host controller register to update. In order to verify the register
>>> has updated, a polling is done until its value is set.
>>>
>>> In addition the functions ufshcd_hba_stop() and
>>> ufshcd_wait_for_register() was updated with an additional input
>>> parameter, indicating the timeout between reads will
>>> be done by sleeping or spinning the cpu.
>>>
>>> Signed-off-by: Raviv Shvili 
>>> Signed-off-by: Yaniv Gardi 
>>>
>>> ---
>>>  drivers/scsi/ufs/ufshcd.c | 53
>>> ---
>>>  drivers/scsi/ufs/ufshcd.h | 12 +++
>>>  2 files changed, 35 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>> index 3400ceb..80031e6 100644
>>> --- a/drivers/scsi/ufs/ufshcd.c
>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>> @@ -240,11 +240,13 @@ static inline void ufshcd_disable_irq(struct
>>> ufs_hba *hba)
>>>   * @val - wait condition
>>>   * @interval_us - polling interval in microsecs
>>>   * @timeout_ms - timeout in millisecs
>>> + * @can_sleep - perform sleep or just spin
>>>   *
>>>   * Returns -ETIMEDOUT on error, zero on success
>>>   */
>>> -static int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32
>>> mask,
>>> -   u32 val, unsigned long interval_us, unsigned long timeout_ms)
>>> +int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
>>> +   u32 val, unsigned long interval_us,
>>> +   unsigned long timeout_ms, bool can_sleep)
>>>  {
>>> int err = 0;
>>> unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
>>> @@ -253,9 +255,10 @@ static int ufshcd_wait_for_register(struct ufs_hba
>>> *hba, u32 reg, u32 mask,
>>> val = val & mask;
>>>
>>> while ((ufshcd_readl(hba, reg) & mask) != val) {
>>> -   /* wakeup within 50us of expiry */
>>> -   usleep_range(interval_us, interval_us + 50);
>>> -
>>> +   if (can_sleep)
>>> +   usleep_range(interval_us, interval_us + 50);
>>> +   else
>>> +   udelay(interval_us);
>>> if (time_after(jiffies, timeout)) {
>>> if ((ufshcd_readl(hba, reg) & mask) != val)
>>> err = -ETIMEDOUT;
>>> @@ -1459,7 +1462,7 @@ ufshcd_clear_cmd(struct ufs_hba *hba, int tag)
>>>  */
>>> err = ufshcd_wait_for_register(hba,
>>> REG_UTP_TRANSFER_REQ_DOOR_BELL,
>>> -   mask, ~mask, 1000, 1000);
>>> +   mask, ~mask, 1000, 1000, true);
>>>
>>> return err;
>>>  }
>>> @@ -2815,6 +2818,23 @@ out:
>>>  }
>>>
>>>  /**
>>> + * ufshcd_hba_stop - Send controller to reset state
>>> + * @hba: per adapter instance
>>> + * @can_sleep: perform sleep or just spin
>>> + */
>>> +static inline void ufshcd_hba_stop(struct ufs_hba *hba, bool can_sleep)
>>> +{
>>> +   int err;
>>> +
>>> +   ufshcd_writel(hba, CONTROLLER_DISABLE,  REG_CONTROLLER_ENABLE);
>>> +   err = ufshcd_wait_for_register(hba, REG_CONTROLLER_ENABLE,
>>> +   CONTROLLER_ENABLE, CONTROLLER_DISABLE,
>>> +   10, 1, can_sleep);
>>> +   if (err)
>>> +   dev_err(hba->dev, "%s: Controller disable failed\n", __func__);
>>> +}
>>> +
>> Shouldn't you return an error here?
>> If the controller disable failed you probably need a hard reset or
>> something, otherwise I would assume that every other command from that
>> point on will not work as expected.
>>
>> Cheers,
>>
>> Hannes
> 
> 
> Hello Hannes,
> The original routine signature is:
> void ufshcd_hba_stop(struct ufs_hba *hba);
> 
> as you can see, no return value, the reason is simple - there is nothing
> we can do if writing to the register fails.
> 
> all we wanted to do here, was to add a graceful time to change the
> register value. also, we decided to add error msg in case the value is not
> change within this timeout.
> We can not do anything else, not to say, return error, as there is no
> error handling in such case.
> 
> So, as far as i see it, we only improved the already exists logic, by
> adding some graceful time to the register change, and also, by adding an
> error message that was absent before.
> 
Thanks for the explanation.

Reviewed-by: Hannes Reinecke 

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries & Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)


Re: [PATCH 1/7] extcon: usb-gpio: add device binding for platform device

2016-03-02 Thread Chanwoo Choi
Hell Lu,

On 2016년 03월 03일 15:37, Lu Baolu wrote:
> This is needed to handle the GPIO connected USB ID pin found on
> Intel Baytrail devices.
> 
> Signed-off-by: Lu Baolu 
> Reviewed-by: Felipe Balbi 
> ---
>  drivers/extcon/extcon-usb-gpio.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/extcon/extcon-usb-gpio.c 
> b/drivers/extcon/extcon-usb-gpio.c
> index 2b2fecf..af9c8b0 100644
> --- a/drivers/extcon/extcon-usb-gpio.c
> +++ b/drivers/extcon/extcon-usb-gpio.c
> @@ -206,6 +206,12 @@ static const struct of_device_id usb_extcon_dt_match[] = 
> {
>  };
>  MODULE_DEVICE_TABLE(of, usb_extcon_dt_match);
>  
> +static const struct platform_device_id usb_extcon_platform_ids[] = {
> + { .name = "extcon-usb-gpio", },
> + { /* sentinel */ }
> +};
> +MODULE_DEVICE_TABLE(platform, usb_extcon_platform_ids);
> +
>  static struct platform_driver usb_extcon_driver = {
>   .probe  = usb_extcon_probe,
>   .remove = usb_extcon_remove,
> @@ -214,6 +220,7 @@ static struct platform_driver usb_extcon_driver = {
>   .pm = _extcon_pm_ops,
>   .of_match_table = usb_extcon_dt_match,
>   },
> + .id_table = usb_extcon_platform_ids,
>  };
>  
>  module_platform_driver(usb_extcon_driver);
> 

Looks good to me.

Acked-by: Chanwoo Choi 

Best Regards,
Chanwoo Choi



Re: [PATCH 1/7] extcon: usb-gpio: add device binding for platform device

2016-03-02 Thread Chanwoo Choi
Hell Lu,

On 2016년 03월 03일 15:37, Lu Baolu wrote:
> This is needed to handle the GPIO connected USB ID pin found on
> Intel Baytrail devices.
> 
> Signed-off-by: Lu Baolu 
> Reviewed-by: Felipe Balbi 
> ---
>  drivers/extcon/extcon-usb-gpio.c | 7 +++
>  1 file changed, 7 insertions(+)
> 
> diff --git a/drivers/extcon/extcon-usb-gpio.c 
> b/drivers/extcon/extcon-usb-gpio.c
> index 2b2fecf..af9c8b0 100644
> --- a/drivers/extcon/extcon-usb-gpio.c
> +++ b/drivers/extcon/extcon-usb-gpio.c
> @@ -206,6 +206,12 @@ static const struct of_device_id usb_extcon_dt_match[] = 
> {
>  };
>  MODULE_DEVICE_TABLE(of, usb_extcon_dt_match);
>  
> +static const struct platform_device_id usb_extcon_platform_ids[] = {
> + { .name = "extcon-usb-gpio", },
> + { /* sentinel */ }
> +};
> +MODULE_DEVICE_TABLE(platform, usb_extcon_platform_ids);
> +
>  static struct platform_driver usb_extcon_driver = {
>   .probe  = usb_extcon_probe,
>   .remove = usb_extcon_remove,
> @@ -214,6 +220,7 @@ static struct platform_driver usb_extcon_driver = {
>   .pm = _extcon_pm_ops,
>   .of_match_table = usb_extcon_dt_match,
>   },
> + .id_table = usb_extcon_platform_ids,
>  };
>  
>  module_platform_driver(usb_extcon_driver);
> 

Looks good to me.

Acked-by: Chanwoo Choi 

Best Regards,
Chanwoo Choi



Re: [PATCH v5 04/15] scsi: ufs: verify hba controller hce reg value

2016-03-02 Thread Hannes Reinecke
On 03/01/2016 09:32 PM, yga...@codeaurora.org wrote:
>> On 02/28/2016 09:32 PM, Yaniv Gardi wrote:
>>> Sometimes due to hw issues it takes some time to the
>>> host controller register to update. In order to verify the register
>>> has updated, a polling is done until its value is set.
>>>
>>> In addition the functions ufshcd_hba_stop() and
>>> ufshcd_wait_for_register() was updated with an additional input
>>> parameter, indicating the timeout between reads will
>>> be done by sleeping or spinning the cpu.
>>>
>>> Signed-off-by: Raviv Shvili 
>>> Signed-off-by: Yaniv Gardi 
>>>
>>> ---
>>>  drivers/scsi/ufs/ufshcd.c | 53
>>> ---
>>>  drivers/scsi/ufs/ufshcd.h | 12 +++
>>>  2 files changed, 35 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
>>> index 3400ceb..80031e6 100644
>>> --- a/drivers/scsi/ufs/ufshcd.c
>>> +++ b/drivers/scsi/ufs/ufshcd.c
>>> @@ -240,11 +240,13 @@ static inline void ufshcd_disable_irq(struct
>>> ufs_hba *hba)
>>>   * @val - wait condition
>>>   * @interval_us - polling interval in microsecs
>>>   * @timeout_ms - timeout in millisecs
>>> + * @can_sleep - perform sleep or just spin
>>>   *
>>>   * Returns -ETIMEDOUT on error, zero on success
>>>   */
>>> -static int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32
>>> mask,
>>> -   u32 val, unsigned long interval_us, unsigned long timeout_ms)
>>> +int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask,
>>> +   u32 val, unsigned long interval_us,
>>> +   unsigned long timeout_ms, bool can_sleep)
>>>  {
>>> int err = 0;
>>> unsigned long timeout = jiffies + msecs_to_jiffies(timeout_ms);
>>> @@ -253,9 +255,10 @@ static int ufshcd_wait_for_register(struct ufs_hba
>>> *hba, u32 reg, u32 mask,
>>> val = val & mask;
>>>
>>> while ((ufshcd_readl(hba, reg) & mask) != val) {
>>> -   /* wakeup within 50us of expiry */
>>> -   usleep_range(interval_us, interval_us + 50);
>>> -
>>> +   if (can_sleep)
>>> +   usleep_range(interval_us, interval_us + 50);
>>> +   else
>>> +   udelay(interval_us);
>>> if (time_after(jiffies, timeout)) {
>>> if ((ufshcd_readl(hba, reg) & mask) != val)
>>> err = -ETIMEDOUT;
>>> @@ -1459,7 +1462,7 @@ ufshcd_clear_cmd(struct ufs_hba *hba, int tag)
>>>  */
>>> err = ufshcd_wait_for_register(hba,
>>> REG_UTP_TRANSFER_REQ_DOOR_BELL,
>>> -   mask, ~mask, 1000, 1000);
>>> +   mask, ~mask, 1000, 1000, true);
>>>
>>> return err;
>>>  }
>>> @@ -2815,6 +2818,23 @@ out:
>>>  }
>>>
>>>  /**
>>> + * ufshcd_hba_stop - Send controller to reset state
>>> + * @hba: per adapter instance
>>> + * @can_sleep: perform sleep or just spin
>>> + */
>>> +static inline void ufshcd_hba_stop(struct ufs_hba *hba, bool can_sleep)
>>> +{
>>> +   int err;
>>> +
>>> +   ufshcd_writel(hba, CONTROLLER_DISABLE,  REG_CONTROLLER_ENABLE);
>>> +   err = ufshcd_wait_for_register(hba, REG_CONTROLLER_ENABLE,
>>> +   CONTROLLER_ENABLE, CONTROLLER_DISABLE,
>>> +   10, 1, can_sleep);
>>> +   if (err)
>>> +   dev_err(hba->dev, "%s: Controller disable failed\n", __func__);
>>> +}
>>> +
>> Shouldn't you return an error here?
>> If the controller disable failed you probably need a hard reset or
>> something, otherwise I would assume that every other command from that
>> point on will not work as expected.
>>
>> Cheers,
>>
>> Hannes
> 
> 
> Hello Hannes,
> The original routine signature is:
> void ufshcd_hba_stop(struct ufs_hba *hba);
> 
> as you can see, no return value, the reason is simple - there is nothing
> we can do if writing to the register fails.
> 
> all we wanted to do here, was to add a graceful time to change the
> register value. also, we decided to add error msg in case the value is not
> change within this timeout.
> We can not do anything else, not to say, return error, as there is no
> error handling in such case.
> 
> So, as far as i see it, we only improved the already exists logic, by
> adding some graceful time to the register change, and also, by adding an
> error message that was absent before.
> 
Thanks for the explanation.

Reviewed-by: Hannes Reinecke 

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries & Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)


Re: [PATCH v5 03/15] scsi: ufs: implement scsi host timeout handler

2016-03-02 Thread Hannes Reinecke
On 03/01/2016 09:25 PM, yga...@codeaurora.org wrote:
>> On 02/28/2016 09:32 PM, Yaniv Gardi wrote:
>>> A race condition exists between request requeueing and scsi layer
>>> error handling:
>>> When UFS driver queuecommand returns a busy status for a request,
>>> it will be requeued and its tag will be freed and set to -1.
>>> At the same time it is possible that the request will timeout and
>>> scsi layer will start error handling for it. The scsi layer reuses
>>> the request and its tag to send error related commands to the device,
>>> however its tag is no longer valid.
>> Hmm. How can the host return a 'busy' status for a request?
>> From my understanding we have three possibilities:
>>
>> 1) queuecommand returns busy; however, that means that the command has
>> never been send and this issue shouldn't occur
>> 2) The command returns with BUSY status. But in this case it has already
>> been returned, so there cannot be any timeout coming in.
>> 3) The host receives a command with a tag which is already in-use.
>> However, that should have been prevented by the block-layer, which
>> really should ensure that this situation never happens.
>>
>> So either way I look at it, it really looks like a bug and adding a
>> timeout handler will just paper over it.
>> (Not that a timeout handler is a bad idea, in fact I'm convinced that
>> you need one. Just not for this purpose.)
>>
>> So can you elaborate how this 'busy' status comes about?
>> Is the command sent to the device?
>>
>> Cheers,
>>
>> Hannes
> 
> 
> Hi Hannes,
> 
> it's going to be a bit long :)
> I think you are missing the point.
> I will describe a race condition happened to us a while ago, that was
> quite difficult to understand and fix.
> So, this patch is not about the "busy" returning to the scsi dispatch
> routine. it's about the abort triggered after 30 seconds.
> 
> imagine a request being queued and sent to the scsi, and then to the ufs.
> a timer, initialized to 30 seconds start ticking.
> but the request is never sent to the ufs device, as queuecommand() returns
> with "SCSI_MLQUEUE_HOST_BUSY"
> by looking at the code, this could happen, for example:
>   err = ufshcd_hold(hba, true);
>   if (err) {
>   err = SCSI_MLQUEUE_HOST_BUSY;
>   goto out;
>   }
> 
Uuhhh.
You probably should not have pointed me to that piece of code ...
open-coding loops in ufshcd_hold() ... shudder.
(Did I ever review that one? Must've ...)
_Anyway_: sleeping in queuecommand is always a bad idea, as then
precisely those issues you've just described will happen.

Couldn't you just call
ufshcd_hold(hba, false)
instead of
ufshcd_hold(hba, true)
?
The request will be requeued more-or-less immediately, avoiding the
issue with timeout handler kicking in.
And the queue will remain blocked until the ungate work item returns, at
which point I/O submission will continue.
As the request will be requeued to the head of the queue there won't be
other I/O competing with tags, so it shouldn't have any adverse effects.

Wouldn't that work?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries & Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)


Re: [PATCH v5 03/15] scsi: ufs: implement scsi host timeout handler

2016-03-02 Thread Hannes Reinecke
On 03/01/2016 09:25 PM, yga...@codeaurora.org wrote:
>> On 02/28/2016 09:32 PM, Yaniv Gardi wrote:
>>> A race condition exists between request requeueing and scsi layer
>>> error handling:
>>> When UFS driver queuecommand returns a busy status for a request,
>>> it will be requeued and its tag will be freed and set to -1.
>>> At the same time it is possible that the request will timeout and
>>> scsi layer will start error handling for it. The scsi layer reuses
>>> the request and its tag to send error related commands to the device,
>>> however its tag is no longer valid.
>> Hmm. How can the host return a 'busy' status for a request?
>> From my understanding we have three possibilities:
>>
>> 1) queuecommand returns busy; however, that means that the command has
>> never been send and this issue shouldn't occur
>> 2) The command returns with BUSY status. But in this case it has already
>> been returned, so there cannot be any timeout coming in.
>> 3) The host receives a command with a tag which is already in-use.
>> However, that should have been prevented by the block-layer, which
>> really should ensure that this situation never happens.
>>
>> So either way I look at it, it really looks like a bug and adding a
>> timeout handler will just paper over it.
>> (Not that a timeout handler is a bad idea, in fact I'm convinced that
>> you need one. Just not for this purpose.)
>>
>> So can you elaborate how this 'busy' status comes about?
>> Is the command sent to the device?
>>
>> Cheers,
>>
>> Hannes
> 
> 
> Hi Hannes,
> 
> it's going to be a bit long :)
> I think you are missing the point.
> I will describe a race condition happened to us a while ago, that was
> quite difficult to understand and fix.
> So, this patch is not about the "busy" returning to the scsi dispatch
> routine. it's about the abort triggered after 30 seconds.
> 
> imagine a request being queued and sent to the scsi, and then to the ufs.
> a timer, initialized to 30 seconds start ticking.
> but the request is never sent to the ufs device, as queuecommand() returns
> with "SCSI_MLQUEUE_HOST_BUSY"
> by looking at the code, this could happen, for example:
>   err = ufshcd_hold(hba, true);
>   if (err) {
>   err = SCSI_MLQUEUE_HOST_BUSY;
>   goto out;
>   }
> 
Uuhhh.
You probably should not have pointed me to that piece of code ...
open-coding loops in ufshcd_hold() ... shudder.
(Did I ever review that one? Must've ...)
_Anyway_: sleeping in queuecommand is always a bad idea, as then
precisely those issues you've just described will happen.

Couldn't you just call
ufshcd_hold(hba, false)
instead of
ufshcd_hold(hba, true)
?
The request will be requeued more-or-less immediately, avoiding the
issue with timeout handler kicking in.
And the queue will remain blocked until the ungate work item returns, at
which point I/O submission will continue.
As the request will be requeued to the head of the queue there won't be
other I/O competing with tags, so it shouldn't have any adverse effects.

Wouldn't that work?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries & Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)


[PATCH v3 4/4] mtd: spi-nor: Disable Micron flash HW protection

2016-03-02 Thread Yunhui Cui
From: Yunhui Cui 

For Micron family ,The status register write enable/disable bit,
provides hardware data protection for the device.
When the enable/disable bit is set to 1, the status register
nonvolatile bits become read-only and the WRITE STATUS REGISTER
operation will not execute.

Signed-off-by: Yunhui Cui 
---
 drivers/mtd/spi-nor/spi-nor.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index ed0c19c..917f814 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -39,6 +39,7 @@
 
 #define SPI_NOR_MAX_ID_LEN 6
 #define SPI_NOR_MAX_ADDR_WIDTH 4
+#define SPI_NOR_MICRON_WRITE_ENABLE0x7f
 
 struct flash_info {
char*name;
@@ -1238,6 +1239,14 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, 
enum read_mode mode)
write_sr(nor, 0);
}
 
+   if (JEDEC_MFR(info) == SNOR_MFR_MICRON) {
+   ret = read_sr(nor);
+   ret &= SPI_NOR_MICRON_WRITE_ENABLE;
+
+   write_enable(nor);
+   write_sr(nor, ret);
+   }
+
if (!mtd->name)
mtd->name = dev_name(dev);
mtd->priv = nor;
-- 
2.1.0.27.g96db324



[PATCH v3 4/4] mtd: spi-nor: Disable Micron flash HW protection

2016-03-02 Thread Yunhui Cui
From: Yunhui Cui 

For Micron family ,The status register write enable/disable bit,
provides hardware data protection for the device.
When the enable/disable bit is set to 1, the status register
nonvolatile bits become read-only and the WRITE STATUS REGISTER
operation will not execute.

Signed-off-by: Yunhui Cui 
---
 drivers/mtd/spi-nor/spi-nor.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index ed0c19c..917f814 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -39,6 +39,7 @@
 
 #define SPI_NOR_MAX_ID_LEN 6
 #define SPI_NOR_MAX_ADDR_WIDTH 4
+#define SPI_NOR_MICRON_WRITE_ENABLE0x7f
 
 struct flash_info {
char*name;
@@ -1238,6 +1239,14 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, 
enum read_mode mode)
write_sr(nor, 0);
}
 
+   if (JEDEC_MFR(info) == SNOR_MFR_MICRON) {
+   ret = read_sr(nor);
+   ret &= SPI_NOR_MICRON_WRITE_ENABLE;
+
+   write_enable(nor);
+   write_sr(nor, ret);
+   }
+
if (!mtd->name)
mtd->name = dev_name(dev);
mtd->priv = nor;
-- 
2.1.0.27.g96db324



[PATCH v3 1/4] mtd:fsl-quadspi:use the property fields of SPI-NOR

2016-03-02 Thread Yunhui Cui
We can get the read/write/erase opcode from the spi nor framework
directly. This patch uses the information stored in the SPI-NOR to
remove the hardcode in the fsl_qspi_init_lut().

Signed-off-by: Yunhui Cui 
Signed-off-by: Yunhui Cui 
---
 drivers/mtd/spi-nor/fsl-quadspi.c | 40 ---
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c 
b/drivers/mtd/spi-nor/fsl-quadspi.c
index 9ab2b51..517ffe2 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -373,9 +373,13 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
void __iomem *base = q->iobase;
int rxfifo = q->devtype_data->rxfifo;
u32 lut_base;
-   u8 cmd, addrlen, dummy;
int i;
 
+   struct spi_nor *nor = >nor[0];
+   u8 addrlen = (nor->addr_width == 3) ? ADDR24BIT : ADDR32BIT;
+   u8 read_op = nor->read_opcode;
+   u8 read_dm = nor->read_dummy;
+
fsl_qspi_unlock_lut(q);
 
/* Clear all the LUT table */
@@ -385,20 +389,10 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
/* Quad Read */
lut_base = SEQID_QUAD_READ * 4;
 
-   if (q->nor_size <= SZ_16M) {
-   cmd = SPINOR_OP_READ_1_1_4;
-   addrlen = ADDR24BIT;
-   dummy = 8;
-   } else {
-   /* use the 4-byte address */
-   cmd = SPINOR_OP_READ_1_1_4;
-   addrlen = ADDR32BIT;
-   dummy = 8;
-   }
-
-   qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+   qspi_writel(q, LUT0(CMD, PAD1, read_op) | LUT1(ADDR, PAD1, addrlen),
base + QUADSPI_LUT(lut_base));
-   qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
+   qspi_writel(q, LUT0(DUMMY, PAD1, read_dm) |
+   LUT1(FSL_READ, PAD4, rxfifo),
base + QUADSPI_LUT(lut_base + 1));
 
/* Write enable */
@@ -409,16 +403,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
/* Page Program */
lut_base = SEQID_PP * 4;
 
-   if (q->nor_size <= SZ_16M) {
-   cmd = SPINOR_OP_PP;
-   addrlen = ADDR24BIT;
-   } else {
-   /* use the 4-byte address */
-   cmd = SPINOR_OP_PP;
-   addrlen = ADDR32BIT;
-   }
-
-   qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+   qspi_writel(q, LUT0(CMD, PAD1, nor->program_opcode) |
+   LUT1(ADDR, PAD1, addrlen),
base + QUADSPI_LUT(lut_base));
qspi_writel(q, LUT0(FSL_WRITE, PAD1, 0),
base + QUADSPI_LUT(lut_base + 1));
@@ -432,10 +418,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
/* Erase a sector */
lut_base = SEQID_SE * 4;
 
-   cmd = q->nor[0].erase_opcode;
-   addrlen = q->nor_size <= SZ_16M ? ADDR24BIT : ADDR32BIT;
-
-   qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+   qspi_writel(q, LUT0(CMD, PAD1, nor->erase_opcode) |
+   LUT1(ADDR, PAD1, addrlen),
base + QUADSPI_LUT(lut_base));
 
/* Erase the whole chip */
-- 
2.1.0.27.g96db324



[PATCH v3 1/4] mtd:fsl-quadspi:use the property fields of SPI-NOR

2016-03-02 Thread Yunhui Cui
We can get the read/write/erase opcode from the spi nor framework
directly. This patch uses the information stored in the SPI-NOR to
remove the hardcode in the fsl_qspi_init_lut().

Signed-off-by: Yunhui Cui 
Signed-off-by: Yunhui Cui 
---
 drivers/mtd/spi-nor/fsl-quadspi.c | 40 ---
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c 
b/drivers/mtd/spi-nor/fsl-quadspi.c
index 9ab2b51..517ffe2 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -373,9 +373,13 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
void __iomem *base = q->iobase;
int rxfifo = q->devtype_data->rxfifo;
u32 lut_base;
-   u8 cmd, addrlen, dummy;
int i;
 
+   struct spi_nor *nor = >nor[0];
+   u8 addrlen = (nor->addr_width == 3) ? ADDR24BIT : ADDR32BIT;
+   u8 read_op = nor->read_opcode;
+   u8 read_dm = nor->read_dummy;
+
fsl_qspi_unlock_lut(q);
 
/* Clear all the LUT table */
@@ -385,20 +389,10 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
/* Quad Read */
lut_base = SEQID_QUAD_READ * 4;
 
-   if (q->nor_size <= SZ_16M) {
-   cmd = SPINOR_OP_READ_1_1_4;
-   addrlen = ADDR24BIT;
-   dummy = 8;
-   } else {
-   /* use the 4-byte address */
-   cmd = SPINOR_OP_READ_1_1_4;
-   addrlen = ADDR32BIT;
-   dummy = 8;
-   }
-
-   qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+   qspi_writel(q, LUT0(CMD, PAD1, read_op) | LUT1(ADDR, PAD1, addrlen),
base + QUADSPI_LUT(lut_base));
-   qspi_writel(q, LUT0(DUMMY, PAD1, dummy) | LUT1(FSL_READ, PAD4, rxfifo),
+   qspi_writel(q, LUT0(DUMMY, PAD1, read_dm) |
+   LUT1(FSL_READ, PAD4, rxfifo),
base + QUADSPI_LUT(lut_base + 1));
 
/* Write enable */
@@ -409,16 +403,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
/* Page Program */
lut_base = SEQID_PP * 4;
 
-   if (q->nor_size <= SZ_16M) {
-   cmd = SPINOR_OP_PP;
-   addrlen = ADDR24BIT;
-   } else {
-   /* use the 4-byte address */
-   cmd = SPINOR_OP_PP;
-   addrlen = ADDR32BIT;
-   }
-
-   qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+   qspi_writel(q, LUT0(CMD, PAD1, nor->program_opcode) |
+   LUT1(ADDR, PAD1, addrlen),
base + QUADSPI_LUT(lut_base));
qspi_writel(q, LUT0(FSL_WRITE, PAD1, 0),
base + QUADSPI_LUT(lut_base + 1));
@@ -432,10 +418,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
/* Erase a sector */
lut_base = SEQID_SE * 4;
 
-   cmd = q->nor[0].erase_opcode;
-   addrlen = q->nor_size <= SZ_16M ? ADDR24BIT : ADDR32BIT;
-
-   qspi_writel(q, LUT0(CMD, PAD1, cmd) | LUT1(ADDR, PAD1, addrlen),
+   qspi_writel(q, LUT0(CMD, PAD1, nor->erase_opcode) |
+   LUT1(ADDR, PAD1, addrlen),
base + QUADSPI_LUT(lut_base));
 
/* Erase the whole chip */
-- 
2.1.0.27.g96db324



[PATCH] thermal: consistently use int for trip temp

2016-03-02 Thread Wei Ni
The commit 17e8351a7739 consistently use int for temperature,
however it missed a few in trip temperature and thermal_core.

In current codes, the trip->temperature used "unsigned long"
and zone->temperature used"int", if the temperature is negative
value, it will get wrong result when compare temperature with
trip temperature.

This patch can fix it.

Signed-off-by: Wei Ni 
---
 drivers/thermal/thermal_core.c | 4 ++--
 include/linux/thermal.h| 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index a0a8fd1235e2..2cde55474e34 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -684,7 +684,7 @@ trip_point_temp_store(struct device *dev, struct 
device_attribute *attr,
 {
struct thermal_zone_device *tz = to_thermal_zone(dev);
int trip, ret;
-   unsigned long temperature;
+   int temperature;
 
if (!tz->ops->set_trip_temp)
return -EPERM;
@@ -895,7 +895,7 @@ emul_temp_store(struct device *dev, struct device_attribute 
*attr,
 {
struct thermal_zone_device *tz = to_thermal_zone(dev);
int ret = 0;
-   unsigned long temperature;
+   int temperature;
 
if (kstrtoul(buf, 10, ))
return -EINVAL;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e13a1ace50e9..eee0b7ddd2c1 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -350,8 +350,8 @@ struct thermal_zone_of_device_ops {
 
 struct thermal_trip {
struct device_node *np;
-   unsigned long int temperature;
-   unsigned long int hysteresis;
+   int temperature;
+   int hysteresis;
enum thermal_trip_type type;
 };
 
-- 
1.9.1



[PATCH] thermal: consistently use int for trip temp

2016-03-02 Thread Wei Ni
The commit 17e8351a7739 consistently use int for temperature,
however it missed a few in trip temperature and thermal_core.

In current codes, the trip->temperature used "unsigned long"
and zone->temperature used"int", if the temperature is negative
value, it will get wrong result when compare temperature with
trip temperature.

This patch can fix it.

Signed-off-by: Wei Ni 
---
 drivers/thermal/thermal_core.c | 4 ++--
 include/linux/thermal.h| 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index a0a8fd1235e2..2cde55474e34 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -684,7 +684,7 @@ trip_point_temp_store(struct device *dev, struct 
device_attribute *attr,
 {
struct thermal_zone_device *tz = to_thermal_zone(dev);
int trip, ret;
-   unsigned long temperature;
+   int temperature;
 
if (!tz->ops->set_trip_temp)
return -EPERM;
@@ -895,7 +895,7 @@ emul_temp_store(struct device *dev, struct device_attribute 
*attr,
 {
struct thermal_zone_device *tz = to_thermal_zone(dev);
int ret = 0;
-   unsigned long temperature;
+   int temperature;
 
if (kstrtoul(buf, 10, ))
return -EINVAL;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e13a1ace50e9..eee0b7ddd2c1 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -350,8 +350,8 @@ struct thermal_zone_of_device_ops {
 
 struct thermal_trip {
struct device_node *np;
-   unsigned long int temperature;
-   unsigned long int hysteresis;
+   int temperature;
+   int hysteresis;
enum thermal_trip_type type;
 };
 
-- 
1.9.1



Re: fs: uninterruptible hang in handle_userfault

2016-03-02 Thread Sedat Dilek
On 3/2/16, Linus Torvalds  wrote:
> On Wed, Mar 2, 2016 at 6:55 AM, Andrea Arcangeli 
> wrote:
>>
>> Running page faults that late in the exit path with signal disabled
>> was frankly unexpected.
>
> I agree that it's less than wonderful.
>
>>Apparently it's not just
>> PF_EXITING that prevents SIGKILL to reach handle_userfault(). The
>> below change still didn't allow to kill the task:
>>
>> +   exit_futex(tsk); /* run before setting PF_EXITING */
>> exit_signals(tsk);  /* sets PF_EXITING */
>
> It's not just "exit_futex()" (what is that? I assume you mean
> exit_robust_list()) that triggers the problem, it's also the
>
> put_user(0, tsk->clear_child_tid);
>
> in mm_release().
>
> So it's not just about futexes.
>
> The might be other final user space accesses lurking too that I didn't
> even think about.
>
> Anyway, I committed (a) as the safest version with the least side
> effects. If people think some more about this and come up with
> solutions how to avoid these kinds of "very late user space accesses"
> cleanly, I think that would be great.
>

Is that commit [1] Linux-4.5 material or affects other versions, too?

commit 39680f50ae54cbbb6e72ac38b8329dd3eb9105f4
"userfaultfd: don't block on the last VM updates at exit time"

- Sedat -

[1] 
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=39680f50ae54cbbb6e72ac38b8329dd3eb9105f4


Re: fs: uninterruptible hang in handle_userfault

2016-03-02 Thread Sedat Dilek
On 3/2/16, Linus Torvalds  wrote:
> On Wed, Mar 2, 2016 at 6:55 AM, Andrea Arcangeli 
> wrote:
>>
>> Running page faults that late in the exit path with signal disabled
>> was frankly unexpected.
>
> I agree that it's less than wonderful.
>
>>Apparently it's not just
>> PF_EXITING that prevents SIGKILL to reach handle_userfault(). The
>> below change still didn't allow to kill the task:
>>
>> +   exit_futex(tsk); /* run before setting PF_EXITING */
>> exit_signals(tsk);  /* sets PF_EXITING */
>
> It's not just "exit_futex()" (what is that? I assume you mean
> exit_robust_list()) that triggers the problem, it's also the
>
> put_user(0, tsk->clear_child_tid);
>
> in mm_release().
>
> So it's not just about futexes.
>
> The might be other final user space accesses lurking too that I didn't
> even think about.
>
> Anyway, I committed (a) as the safest version with the least side
> effects. If people think some more about this and come up with
> solutions how to avoid these kinds of "very late user space accesses"
> cleanly, I think that would be great.
>

Is that commit [1] Linux-4.5 material or affects other versions, too?

commit 39680f50ae54cbbb6e72ac38b8329dd3eb9105f4
"userfaultfd: don't block on the last VM updates at exit time"

- Sedat -

[1] 
http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=39680f50ae54cbbb6e72ac38b8329dd3eb9105f4


[PATCH v3 2/4] mtd: fsl-quadspi: Rename SEQID_QUAD_READ to SEQID_READ

2016-03-02 Thread Yunhui Cui
There are some read modes for flash, such as NORMAL, FAST,
QUAD, DDR QUAD. These modes will use the identical lut table base
So rename SEQID_QUAD_READ to SEQID_READ.

Signed-off-by: Yunhui Cui 
Signed-off-by: Yunhui Cui 
---
 drivers/mtd/spi-nor/fsl-quadspi.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c 
b/drivers/mtd/spi-nor/fsl-quadspi.c
index 517ffe2..9861290 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -193,7 +193,7 @@
 #define QUADSPI_LUT_NUM64
 
 /* SEQID -- we can have 16 seqids at most. */
-#define SEQID_QUAD_READ0
+#define SEQID_READ 0
 #define SEQID_WREN 1
 #define SEQID_WRDI 2
 #define SEQID_RDSR 3
@@ -386,8 +386,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
for (i = 0; i < QUADSPI_LUT_NUM; i++)
qspi_writel(q, 0, base + QUADSPI_LUT_BASE + i * 4);
 
-   /* Quad Read */
-   lut_base = SEQID_QUAD_READ * 4;
+   /* Read */
+   lut_base = SEQID_READ * 4;
 
qspi_writel(q, LUT0(CMD, PAD1, read_op) | LUT1(ADDR, PAD1, addrlen),
base + QUADSPI_LUT(lut_base));
@@ -468,7 +468,7 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 {
switch (cmd) {
case SPINOR_OP_READ_1_1_4:
-   return SEQID_QUAD_READ;
+   return SEQID_READ;
case SPINOR_OP_WREN:
return SEQID_WREN;
case SPINOR_OP_WRDI:
-- 
2.1.0.27.g96db324



[PATCH v3 2/4] mtd: fsl-quadspi: Rename SEQID_QUAD_READ to SEQID_READ

2016-03-02 Thread Yunhui Cui
There are some read modes for flash, such as NORMAL, FAST,
QUAD, DDR QUAD. These modes will use the identical lut table base
So rename SEQID_QUAD_READ to SEQID_READ.

Signed-off-by: Yunhui Cui 
Signed-off-by: Yunhui Cui 
---
 drivers/mtd/spi-nor/fsl-quadspi.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c 
b/drivers/mtd/spi-nor/fsl-quadspi.c
index 517ffe2..9861290 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -193,7 +193,7 @@
 #define QUADSPI_LUT_NUM64
 
 /* SEQID -- we can have 16 seqids at most. */
-#define SEQID_QUAD_READ0
+#define SEQID_READ 0
 #define SEQID_WREN 1
 #define SEQID_WRDI 2
 #define SEQID_RDSR 3
@@ -386,8 +386,8 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
for (i = 0; i < QUADSPI_LUT_NUM; i++)
qspi_writel(q, 0, base + QUADSPI_LUT_BASE + i * 4);
 
-   /* Quad Read */
-   lut_base = SEQID_QUAD_READ * 4;
+   /* Read */
+   lut_base = SEQID_READ * 4;
 
qspi_writel(q, LUT0(CMD, PAD1, read_op) | LUT1(ADDR, PAD1, addrlen),
base + QUADSPI_LUT(lut_base));
@@ -468,7 +468,7 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 {
switch (cmd) {
case SPINOR_OP_READ_1_1_4:
-   return SEQID_QUAD_READ;
+   return SEQID_READ;
case SPINOR_OP_WREN:
return SEQID_WREN;
case SPINOR_OP_WRDI:
-- 
2.1.0.27.g96db324



[PATCH v3 3/4] mtd: spi-nor: fsl-quadspi: add fast-read mode support

2016-03-02 Thread Yunhui Cui
From: Yunhui Cui 

The qspi driver add generic fast-read mode for different
flash venders. There are some different board flash work on
different mode, such fast-read, quad-mode.

Signed-off-by: Yunhui Cui 
---
 drivers/mtd/spi-nor/fsl-quadspi.c | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c 
b/drivers/mtd/spi-nor/fsl-quadspi.c
index 9861290..09adaa4 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -389,11 +389,21 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
/* Read */
lut_base = SEQID_READ * 4;
 
-   qspi_writel(q, LUT0(CMD, PAD1, read_op) | LUT1(ADDR, PAD1, addrlen),
-   base + QUADSPI_LUT(lut_base));
-   qspi_writel(q, LUT0(DUMMY, PAD1, read_dm) |
-   LUT1(FSL_READ, PAD4, rxfifo),
-   base + QUADSPI_LUT(lut_base + 1));
+   if (nor->flash_read == SPI_NOR_FAST) {
+   qspi_writel(q, LUT0(CMD, PAD1, read_op) |
+   LUT1(ADDR, PAD1, addrlen),
+   base + QUADSPI_LUT(lut_base));
+   qspi_writel(q,  LUT0(DUMMY, PAD1, read_dm) |
+   LUT1(FSL_READ, PAD1, rxfifo),
+   base + QUADSPI_LUT(lut_base + 1));
+   } else if (nor->flash_read == SPI_NOR_QUAD) {
+   qspi_writel(q, LUT0(CMD, PAD1, read_op) |
+   LUT1(ADDR, PAD1, addrlen),
+   base + QUADSPI_LUT(lut_base));
+   qspi_writel(q, LUT0(DUMMY, PAD1, read_dm) |
+   LUT1(FSL_READ, PAD4, rxfifo),
+   base + QUADSPI_LUT(lut_base + 1));
+   }
 
/* Write enable */
lut_base = SEQID_WREN * 4;
@@ -468,6 +478,7 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 {
switch (cmd) {
case SPINOR_OP_READ_1_1_4:
+   case SPINOR_OP_READ_FAST:
return SEQID_READ;
case SPINOR_OP_WREN:
return SEQID_WREN;
-- 
2.1.0.27.g96db324



[PATCH v3 3/4] mtd: spi-nor: fsl-quadspi: add fast-read mode support

2016-03-02 Thread Yunhui Cui
From: Yunhui Cui 

The qspi driver add generic fast-read mode for different
flash venders. There are some different board flash work on
different mode, such fast-read, quad-mode.

Signed-off-by: Yunhui Cui 
---
 drivers/mtd/spi-nor/fsl-quadspi.c | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c 
b/drivers/mtd/spi-nor/fsl-quadspi.c
index 9861290..09adaa4 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -389,11 +389,21 @@ static void fsl_qspi_init_lut(struct fsl_qspi *q)
/* Read */
lut_base = SEQID_READ * 4;
 
-   qspi_writel(q, LUT0(CMD, PAD1, read_op) | LUT1(ADDR, PAD1, addrlen),
-   base + QUADSPI_LUT(lut_base));
-   qspi_writel(q, LUT0(DUMMY, PAD1, read_dm) |
-   LUT1(FSL_READ, PAD4, rxfifo),
-   base + QUADSPI_LUT(lut_base + 1));
+   if (nor->flash_read == SPI_NOR_FAST) {
+   qspi_writel(q, LUT0(CMD, PAD1, read_op) |
+   LUT1(ADDR, PAD1, addrlen),
+   base + QUADSPI_LUT(lut_base));
+   qspi_writel(q,  LUT0(DUMMY, PAD1, read_dm) |
+   LUT1(FSL_READ, PAD1, rxfifo),
+   base + QUADSPI_LUT(lut_base + 1));
+   } else if (nor->flash_read == SPI_NOR_QUAD) {
+   qspi_writel(q, LUT0(CMD, PAD1, read_op) |
+   LUT1(ADDR, PAD1, addrlen),
+   base + QUADSPI_LUT(lut_base));
+   qspi_writel(q, LUT0(DUMMY, PAD1, read_dm) |
+   LUT1(FSL_READ, PAD4, rxfifo),
+   base + QUADSPI_LUT(lut_base + 1));
+   }
 
/* Write enable */
lut_base = SEQID_WREN * 4;
@@ -468,6 +478,7 @@ static int fsl_qspi_get_seqid(struct fsl_qspi *q, u8 cmd)
 {
switch (cmd) {
case SPINOR_OP_READ_1_1_4:
+   case SPINOR_OP_READ_FAST:
return SEQID_READ;
case SPINOR_OP_WREN:
return SEQID_WREN;
-- 
2.1.0.27.g96db324



[PATCH RFC 0/2] mm: Enable page parallel initialisation for Power

2016-03-02 Thread Li Zhang
From: Li Zhang 

Uptream has supported page parallel initialisation for X86 and the
boot time is improved greately. Some tests have been done for Power.

Here is the result I have done with different memory size.

* 4GB memory:
boot time is as the following: 
with patch vs without patch: 10.4s vs 24.5s
boot time is improved 57%
* 200GB memory: 
   boot time looks the same with and without patches.
   boot time is about 38s
* 32TB memory: 
   boot time looks the same with and without patches 
   boot time is about 160s.
   The boot time is much shorter than X86 with 24TB memory.
   From community discussion, it costs about 694s for X86 24T system.

>From code view, parallel initialisation improve the performance by
deferring memory initilisation to kswap with N kthreads, it should
improve the performance therotically. 

>From the test result, On X86, performance is improved greatly with huge
memory. But on Power platform, it is improved greatly with less than 
100GB memory. For huge memory, it is not improved greatly. But it saves 
the time with several threads at least, as the following information 
shows(32TB system log):

[   22.648169] node 9 initialised, 16607461 pages in 280ms
[   22.783772] node 3 initialised, 23937243 pages in 410ms
[   22.858877] node 6 initialised, 29179347 pages in 490ms
[   22.863252] node 2 initialised, 29179347 pages in 490ms
[   22.907545] node 0 initialised, 32049614 pages in 540ms
[   22.920891] node 15 initialised, 32212280 pages in 550ms
[   22.923236] node 4 initialised, 32306127 pages in 550ms
[   22.923384] node 12 initialised, 32314319 pages in 550ms
[   22.924754] node 8 initialised, 32314319 pages in 550ms
[   22.940780] node 13 initialised, 33353677 pages in 570ms
[   22.940796] node 11 initialised, 33353677 pages in 570ms
[   22.941700] node 5 initialised, 33353677 pages in 570ms
[   22.941721] node 10 initialised, 33353677 pages in 570ms
[   22.941876] node 7 initialised, 33353677 pages in 570ms
[   22.944946] node 14 initialised, 33353677 pages in 570ms
[   22.946063] node 1 initialised, 33345485 pages in 580ms

It saves the time about 550*16 ms at least, although it can be ignore to 
compare 
the boot time about 160 seconds. What's more, the boot time is much shorter 
on Power even without patches than x86 for huge memory machine. 

So this patchset is still necessary to be enabled for Power. 

Li Zhang (2):
  mm: meminit: initialise more memory for inode/dentry hash tables in
early boot
  Enable page parallel initialisation

 arch/powerpc/Kconfig |  1 +
 mm/page_alloc.c  | 11 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

-- 
2.1.0



[PATCH RFC 2/2] powerpc/mm: Enable page parallel initialisation

2016-03-02 Thread Li Zhang
From: Li Zhang 

Parallel initialisation has been enabled for X86,
boot time is improved greatly.
On Power8, for small memory, it is improved greatly.
Here is the result from my test on Power8 platform:

For 4GB memory: 57% is improved
For 50GB memory: 22% is improve

Signed-off-by: Li Zhang 
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e4824fd..83073c2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -158,6 +158,7 @@ config PPC
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+   select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
 
 config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
-- 
2.1.0



[PATCH RFC 1/2] mm: meminit: initialise more memory for inode/dentry hash tables in early boot

2016-03-02 Thread Li Zhang
From: Li Zhang 

This patch is based on Mel Gorman's old patch in the mailing list,
https://lkml.org/lkml/2015/5/5/280 which is dicussed but it is
fixed with a completion to wait for all memory initialised in
page_alloc_init_late(). It is to fix the oom problem on X86
with 24TB memory which allocates memory in late initialisation.
But for Power platform with 32TB memory, it causes a call trace
in vfs_caches_init->inode_init() and inode hash table needs more
memory.
So this patch allocates 1GB for 0.25TB/node for large system
as it is mentioned in https://lkml.org/lkml/2015/5/1/627

This call trace is found on Power with 32TB memory, 1024CPUs, 16nodes.
The log from dmesg as the following:

[0.091780] Dentry cache hash table entries: 2147483648 (order: 18,
17179869184 bytes)
[2.891012] vmalloc: allocation failure, allocated 16021913600 of
17179934720 bytes
[2.891034] swapper/0: page allocation failure: order:0,
mode:0x2080020
[2.891038] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.0-0-ppc64
[2.891041] Call Trace:
[2.891046] [c12bfa00] [c07c4a50]
.dump_stack+0xb4/0xb664 (unreliable)
[2.891051] [c12bfa80] [c01f93d4]
.warn_alloc_failed+0x114/0x160
[2.891054] [c12bfb30] [c023c204]
.__vmalloc_area_node+0x1a4/0x2b0
[2.891058] [c12bfbf0] [c023c3f4]
.__vmalloc_node_range+0xe4/0x110
[2.891061] [c12bfc90] [c023c460]
.__vmalloc_node+0x40/0x50
[2.891065] [c12bfd10] [c0b67d60]
.alloc_large_system_hash+0x134/0x2a4
[2.891068] [c12bfdd0] [c0b70924]
.inode_init+0xa4/0xf0
[2.891071] [c12bfe60] [c0b706a0]
.vfs_caches_init+0x80/0x144
[2.891074] [c12bfef0] [c0b35208]
.start_kernel+0x40c/0x4e0
[2.891078] [c12bff90] [c0008cfc]
start_here_common+0x20/0x4a4
[2.891080] Mem-Info:

Signed-off-by: Li Zhang 
---
 mm/page_alloc.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 838ca8bb..4847f25 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -293,13 +293,20 @@ static inline bool update_defer_init(pg_data_t *pgdat,
unsigned long pfn, unsigned long zone_end,
unsigned long *nr_initialised)
 {
+   unsigned long max_initialise;
+
/* Always populate low zones for address-contrained allocations */
if (zone_end < pgdat_end_pfn(pgdat))
return true;
+   /*
+   * Initialise at least 2G of a node but also take into account that
+   * two large system hashes that can take up 1GB for 0.25TB/node.
+   */
+   max_initialise = max(2UL << (30 - PAGE_SHIFT),
+   (pgdat->node_spanned_pages >> 8));
 
-   /* Initialise at least 2G of the highest zone */
(*nr_initialised)++;
-   if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
+   if ((*nr_initialised > max_initialise) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
pgdat->first_deferred_pfn = pfn;
return false;
-- 
2.1.0



[PATCH RFC 0/2] mm: Enable page parallel initialisation for Power

2016-03-02 Thread Li Zhang
From: Li Zhang 

Uptream has supported page parallel initialisation for X86 and the
boot time is improved greately. Some tests have been done for Power.

Here is the result I have done with different memory size.

* 4GB memory:
boot time is as the following: 
with patch vs without patch: 10.4s vs 24.5s
boot time is improved 57%
* 200GB memory: 
   boot time looks the same with and without patches.
   boot time is about 38s
* 32TB memory: 
   boot time looks the same with and without patches 
   boot time is about 160s.
   The boot time is much shorter than X86 with 24TB memory.
   From community discussion, it costs about 694s for X86 24T system.

>From code view, parallel initialisation improve the performance by
deferring memory initilisation to kswap with N kthreads, it should
improve the performance therotically. 

>From the test result, On X86, performance is improved greatly with huge
memory. But on Power platform, it is improved greatly with less than 
100GB memory. For huge memory, it is not improved greatly. But it saves 
the time with several threads at least, as the following information 
shows(32TB system log):

[   22.648169] node 9 initialised, 16607461 pages in 280ms
[   22.783772] node 3 initialised, 23937243 pages in 410ms
[   22.858877] node 6 initialised, 29179347 pages in 490ms
[   22.863252] node 2 initialised, 29179347 pages in 490ms
[   22.907545] node 0 initialised, 32049614 pages in 540ms
[   22.920891] node 15 initialised, 32212280 pages in 550ms
[   22.923236] node 4 initialised, 32306127 pages in 550ms
[   22.923384] node 12 initialised, 32314319 pages in 550ms
[   22.924754] node 8 initialised, 32314319 pages in 550ms
[   22.940780] node 13 initialised, 33353677 pages in 570ms
[   22.940796] node 11 initialised, 33353677 pages in 570ms
[   22.941700] node 5 initialised, 33353677 pages in 570ms
[   22.941721] node 10 initialised, 33353677 pages in 570ms
[   22.941876] node 7 initialised, 33353677 pages in 570ms
[   22.944946] node 14 initialised, 33353677 pages in 570ms
[   22.946063] node 1 initialised, 33345485 pages in 580ms

It saves the time about 550*16 ms at least, although it can be ignore to 
compare 
the boot time about 160 seconds. What's more, the boot time is much shorter 
on Power even without patches than x86 for huge memory machine. 

So this patchset is still necessary to be enabled for Power. 

Li Zhang (2):
  mm: meminit: initialise more memory for inode/dentry hash tables in
early boot
  Enable page parallel initialisation

 arch/powerpc/Kconfig |  1 +
 mm/page_alloc.c  | 11 +--
 2 files changed, 10 insertions(+), 2 deletions(-)

-- 
2.1.0



[PATCH RFC 2/2] powerpc/mm: Enable page parallel initialisation

2016-03-02 Thread Li Zhang
From: Li Zhang 

Parallel initialisation has been enabled for X86,
boot time is improved greatly.
On Power8, for small memory, it is improved greatly.
Here is the result from my test on Power8 platform:

For 4GB memory: 57% is improved
For 50GB memory: 22% is improve

Signed-off-by: Li Zhang 
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e4824fd..83073c2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -158,6 +158,7 @@ config PPC
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+   select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT
 
 config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
-- 
2.1.0



[PATCH RFC 1/2] mm: meminit: initialise more memory for inode/dentry hash tables in early boot

2016-03-02 Thread Li Zhang
From: Li Zhang 

This patch is based on Mel Gorman's old patch in the mailing list,
https://lkml.org/lkml/2015/5/5/280 which is dicussed but it is
fixed with a completion to wait for all memory initialised in
page_alloc_init_late(). It is to fix the oom problem on X86
with 24TB memory which allocates memory in late initialisation.
But for Power platform with 32TB memory, it causes a call trace
in vfs_caches_init->inode_init() and inode hash table needs more
memory.
So this patch allocates 1GB for 0.25TB/node for large system
as it is mentioned in https://lkml.org/lkml/2015/5/1/627

This call trace is found on Power with 32TB memory, 1024CPUs, 16nodes.
The log from dmesg as the following:

[0.091780] Dentry cache hash table entries: 2147483648 (order: 18,
17179869184 bytes)
[2.891012] vmalloc: allocation failure, allocated 16021913600 of
17179934720 bytes
[2.891034] swapper/0: page allocation failure: order:0,
mode:0x2080020
[2.891038] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.0-0-ppc64
[2.891041] Call Trace:
[2.891046] [c12bfa00] [c07c4a50]
.dump_stack+0xb4/0xb664 (unreliable)
[2.891051] [c12bfa80] [c01f93d4]
.warn_alloc_failed+0x114/0x160
[2.891054] [c12bfb30] [c023c204]
.__vmalloc_area_node+0x1a4/0x2b0
[2.891058] [c12bfbf0] [c023c3f4]
.__vmalloc_node_range+0xe4/0x110
[2.891061] [c12bfc90] [c023c460]
.__vmalloc_node+0x40/0x50
[2.891065] [c12bfd10] [c0b67d60]
.alloc_large_system_hash+0x134/0x2a4
[2.891068] [c12bfdd0] [c0b70924]
.inode_init+0xa4/0xf0
[2.891071] [c12bfe60] [c0b706a0]
.vfs_caches_init+0x80/0x144
[2.891074] [c12bfef0] [c0b35208]
.start_kernel+0x40c/0x4e0
[2.891078] [c12bff90] [c0008cfc]
start_here_common+0x20/0x4a4
[2.891080] Mem-Info:

Signed-off-by: Li Zhang 
---
 mm/page_alloc.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 838ca8bb..4847f25 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -293,13 +293,20 @@ static inline bool update_defer_init(pg_data_t *pgdat,
unsigned long pfn, unsigned long zone_end,
unsigned long *nr_initialised)
 {
+   unsigned long max_initialise;
+
/* Always populate low zones for address-contrained allocations */
if (zone_end < pgdat_end_pfn(pgdat))
return true;
+   /*
+   * Initialise at least 2G of a node but also take into account that
+   * two large system hashes that can take up 1GB for 0.25TB/node.
+   */
+   max_initialise = max(2UL << (30 - PAGE_SHIFT),
+   (pgdat->node_spanned_pages >> 8));
 
-   /* Initialise at least 2G of the highest zone */
(*nr_initialised)++;
-   if (*nr_initialised > (2UL << (30 - PAGE_SHIFT)) &&
+   if ((*nr_initialised > max_initialise) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
pgdat->first_deferred_pfn = pfn;
return false;
-- 
2.1.0



[RFC][PATCH][v2] Enable livepatching for powerpc

2016-03-02 Thread Balbir Singh
Changelog:
1. Implement review comments by Michael
2. The previous version compared _NIP from the
   wrong location to check for whether we
   are going to a patched location

This applies on top of the patches posted by Michael 
https://patchwork.ozlabs.org/patch/589791/

It enables livepatching. This takes patch 6/8 and 7/8 of v8 as the base.
Removes the extra strict check in gcc-profile-kernel-notrace.sh
and adds logic for checking offsets in livepatch. The patch
for HAVE_C_RECORDMCOUNT is not required and not used here.

Depending on whether or not a TOC is generated, the offset
for _mcount can be +16,+12,+8,+4. The changes are such that the
offset checks are specific to powerpc.

TODOs
1. Build a version with offsets removed and rebuild
   ftrace_location() sort of functionality
2. Make livepatching experimental on powerpc

Comments? Testing? I tested the sample in the livepatch
directory

References

1. https://patchwork.ozlabs.org/patch/581521/
2. https://patchwork.ozlabs.org/patch/587464/

Signed-off-by: Torsten Duwe 
Signed-off-by: Balbir Singh 
---
 arch/powerpc/Kconfig |  3 ++
 arch/powerpc/include/asm/livepatch.h | 41 +++
 arch/powerpc/kernel/Makefile |  1 +
 arch/powerpc/kernel/entry_64.S   | 50 +
 arch/powerpc/kernel/kgdb.c   |  4 +++
 arch/powerpc/kernel/livepatch.c  | 54 
 include/linux/livepatch.h|  2 ++
 kernel/livepatch/core.c  | 12 ++--
 8 files changed, 164 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/include/asm/livepatch.h
 create mode 100644 arch/powerpc/kernel/livepatch.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 792e169..8278e5e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -159,6 +159,7 @@ config PPC
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+   select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
 
 config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
@@ -1109,3 +1110,5 @@ config PPC_LIB_RHEAP
bool
 
 source "arch/powerpc/kvm/Kconfig"
+
+source "kernel/livepatch/Kconfig"
diff --git a/arch/powerpc/include/asm/livepatch.h 
b/arch/powerpc/include/asm/livepatch.h
new file mode 100644
index 000..9ecd879
--- /dev/null
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -0,0 +1,41 @@
+/*
+ * livepatch.h - powerpc-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2015 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+#ifndef _ASM_POWERPC64_LIVEPATCH_H
+#define _ASM_POWERPC64_LIVEPATCH_H
+
+#include 
+#include 
+
+#ifdef CONFIG_LIVEPATCH
+
+static inline int klp_check_compiler_support(void)
+{
+   return 0;
+}
+
+extern int klp_write_module_reloc(struct module *mod, unsigned long type,
+  unsigned long loc, unsigned long value);
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+   regs->nip = ip;
+}
+
+#endif /* CONFIG_LIVEPATCH */
+#endif /* _ASM_POWERPC64_LIVEPATCH_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380f..b767e14 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -119,6 +119,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE)+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)+= ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)  += ftrace.o
 obj-$(CONFIG_TRACING)  += trace_clock.o
+obj-$(CONFIG_LIVEPATCH)+= livepatch.o
 
 ifneq ($(CONFIG_PPC_INDIRECT_PIO),y)
 obj-y  += iomap.o
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ec7f8aa..2d5333c 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1224,6 +1224,9 @@ _GLOBAL(ftrace_caller)
addir3,r3,function_trace_op@toc@l
ld  r5,0(r3)
 
+#ifdef CONFIG_LIVEPATCH
+   mr  r14,r7  /* remember old NIP */
+#endif
/* Calculate ip from nip-4 into r3 for call below */
subir3, r7, MCOUNT_INSN_SIZE
 
@@ -1248,6 +1251,9 @@ ftrace_call:
/* Load ctr with the possibly modified NIP */
ld  r3, _NIP(r1)
mtctr   r3
+#ifdef 

[RFC][PATCH][v2] Enable livepatching for powerpc

2016-03-02 Thread Balbir Singh
Changelog:
1. Implement review comments by Michael
2. The previous version compared _NIP from the
   wrong location to check for whether we
   are going to a patched location

This applies on top of the patches posted by Michael 
https://patchwork.ozlabs.org/patch/589791/

It enables livepatching. This takes patch 6/8 and 7/8 of v8 as the base.
Removes the extra strict check in gcc-profile-kernel-notrace.sh
and adds logic for checking offsets in livepatch. The patch
for HAVE_C_RECORDMCOUNT is not required and not used here.

Depending on whether or not a TOC is generated, the offset
for _mcount can be +16,+12,+8,+4. The changes are such that the
offset checks are specific to powerpc.

TODOs
1. Build a version with offsets removed and rebuild
   ftrace_location() sort of functionality
2. Make livepatching experimental on powerpc

Comments? Testing? I tested the sample in the livepatch
directory

References

1. https://patchwork.ozlabs.org/patch/581521/
2. https://patchwork.ozlabs.org/patch/587464/

Signed-off-by: Torsten Duwe 
Signed-off-by: Balbir Singh 
---
 arch/powerpc/Kconfig |  3 ++
 arch/powerpc/include/asm/livepatch.h | 41 +++
 arch/powerpc/kernel/Makefile |  1 +
 arch/powerpc/kernel/entry_64.S   | 50 +
 arch/powerpc/kernel/kgdb.c   |  4 +++
 arch/powerpc/kernel/livepatch.c  | 54 
 include/linux/livepatch.h|  2 ++
 kernel/livepatch/core.c  | 12 ++--
 8 files changed, 164 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/include/asm/livepatch.h
 create mode 100644 arch/powerpc/kernel/livepatch.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 792e169..8278e5e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -159,6 +159,7 @@ config PPC
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_SECCOMP_FILTER
select ARCH_HAS_UBSAN_SANITIZE_ALL
+   select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
 
 config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN
@@ -1109,3 +1110,5 @@ config PPC_LIB_RHEAP
bool
 
 source "arch/powerpc/kvm/Kconfig"
+
+source "kernel/livepatch/Kconfig"
diff --git a/arch/powerpc/include/asm/livepatch.h 
b/arch/powerpc/include/asm/livepatch.h
new file mode 100644
index 000..9ecd879
--- /dev/null
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -0,0 +1,41 @@
+/*
+ * livepatch.h - powerpc-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2015 SUSE
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see .
+ */
+#ifndef _ASM_POWERPC64_LIVEPATCH_H
+#define _ASM_POWERPC64_LIVEPATCH_H
+
+#include 
+#include 
+
+#ifdef CONFIG_LIVEPATCH
+
+static inline int klp_check_compiler_support(void)
+{
+   return 0;
+}
+
+extern int klp_write_module_reloc(struct module *mod, unsigned long type,
+  unsigned long loc, unsigned long value);
+
+static inline void klp_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+   regs->nip = ip;
+}
+
+#endif /* CONFIG_LIVEPATCH */
+#endif /* _ASM_POWERPC64_LIVEPATCH_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380f..b767e14 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -119,6 +119,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE)+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)+= ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)  += ftrace.o
 obj-$(CONFIG_TRACING)  += trace_clock.o
+obj-$(CONFIG_LIVEPATCH)+= livepatch.o
 
 ifneq ($(CONFIG_PPC_INDIRECT_PIO),y)
 obj-y  += iomap.o
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ec7f8aa..2d5333c 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1224,6 +1224,9 @@ _GLOBAL(ftrace_caller)
addir3,r3,function_trace_op@toc@l
ld  r5,0(r3)
 
+#ifdef CONFIG_LIVEPATCH
+   mr  r14,r7  /* remember old NIP */
+#endif
/* Calculate ip from nip-4 into r3 for call below */
subir3, r7, MCOUNT_INSN_SIZE
 
@@ -1248,6 +1251,9 @@ ftrace_call:
/* Load ctr with the possibly modified NIP */
ld  r3, _NIP(r1)
mtctr   r3
+#ifdef CONFIG_LIVEPATCH
+   cmpdr14,r3  

Re: [PATCH v3 1/8] powerpc: Create a helper for getting the kernel toc value

2016-03-02 Thread Kamalesh Babulal
* Michael Ellerman  [2016-03-03 15:26:53]:

> Move the logic to work out the kernel toc pointer into a header. This is
> a good cleanup, and also means we can use it elsewhere in future.
> 
> Reviewed-by: Kamalesh Babulal 
> Reviewed-by: Torsten Duwe 
> Signed-off-by: Michael Ellerman 

For the patchset,

Tested-by: Kamalesh Babulal 




Re: [PATCH v3 1/8] powerpc: Create a helper for getting the kernel toc value

2016-03-02 Thread Kamalesh Babulal
* Michael Ellerman  [2016-03-03 15:26:53]:

> Move the logic to work out the kernel toc pointer into a header. This is
> a good cleanup, and also means we can use it elsewhere in future.
> 
> Reviewed-by: Kamalesh Babulal 
> Reviewed-by: Torsten Duwe 
> Signed-off-by: Michael Ellerman 

For the patchset,

Tested-by: Kamalesh Babulal 




[PATCH] driver: input :touchscreen : add Raydium I2C touch driver

2016-03-02 Thread jeffrey.lin
Raydium I2C touch driver.

Signed-off-by: jeffrey.lin 
---
 drivers/input/touchscreen/Kconfig  |  13 +
 drivers/input/touchscreen/Makefile |   1 +
 drivers/input/touchscreen/raydium_i2c_ts.c | 953 +
 3 files changed, 967 insertions(+)
 create mode 100644 drivers/input/touchscreen/raydium_i2c_ts.c

diff --git a/drivers/input/touchscreen/Kconfig 
b/drivers/input/touchscreen/Kconfig
index 3f3f6ee..9adacf6 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -915,6 +915,19 @@ config TOUCHSCREEN_PCAP
  To compile this driver as a module, choose M here: the
  module will be called pcap_ts.
 
+config TOUCHSCREEN_RM_TS
+   tristate "Raydium I2C Touchscreen"
+   depends on I2C
+   help
+ Say Y here if you have Raydium series I2C touchscreen,
+ such as RM31100 , connected to your system.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called raydium_i2c_ts.
+
+
 config TOUCHSCREEN_ST1232
tristate "Sitronix ST1232 touchscreen controllers"
depends on I2C
diff --git a/drivers/input/touchscreen/Makefile 
b/drivers/input/touchscreen/Makefile
index 4941f2d..99e08cf 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_TOUCHSCREEN_USB_COMPOSITE)   += 
usbtouchscreen.o
 obj-$(CONFIG_TOUCHSCREEN_PCAP) += pcap_ts.o
 obj-$(CONFIG_TOUCHSCREEN_PENMOUNT) += penmount.o
 obj-$(CONFIG_TOUCHSCREEN_PIXCIR)   += pixcir_i2c_ts.o
+obj-$(CONFIG_TOUCHSCREEN_RM_TS)+= raydium_i2c_ts.o
 obj-$(CONFIG_TOUCHSCREEN_S3C2410)  += s3c2410_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ST1232)   += st1232.o
 obj-$(CONFIG_TOUCHSCREEN_STMPE)+= stmpe-ts.o
diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c 
b/drivers/input/touchscreen/raydium_i2c_ts.c
new file mode 100644
index 000..7ba681e
--- /dev/null
+++ b/drivers/input/touchscreen/raydium_i2c_ts.c
@@ -0,0 +1,953 @@
+/*
+ * Raydium touchscreen I2C driver.
+ *
+ * Copyright (C) 2012-2014, Raydium Semiconductor Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2, and only version 2, as published by the
+ * Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Raydium reserves the right to make changes without further notice
+ * to the materials described herein. Raydium does not assume any
+ * liability arising out of the application described herein.
+ *
+ * Contact Raydium Semiconductor Corporation at www.rad-ic.com
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Device, Driver information */
+#define DEVICE_NAME"raydium_i2c"
+
+/* Slave I2C mode*/
+#define RM_BOOT_BLDR   0x02
+#define RM_BOOT_MAIN   0x03
+
+/*I2C command */
+#define CMD_QUERY_BANK 0x2B
+#define CMD_DATA_BANK  0x4D
+#define CMD_ENTER_SLEEP0x4E
+#define CMD_BOOT_ACK   0x0A
+#define CMD_BOOT_WRT   0x5B
+#define CMD_BOOT_CHK   0x0C
+#define CMD_BANK_SWITCH0xAA
+
+/* Touch relative info */
+#define MAX_RETRIES3
+#define MAX_FW_UPDATE_RETRIES  30
+#define MAX_TOUCH_NUM  10
+#define MAX_PACKET_SIZE60
+#define BOOT_DELAY_MS  100
+
+#define RAYDIUM_FW_PAGESIZE128
+#define RAYDIUM_POWERON_DELAY_USEC 500
+#define RAYDIUM_RESET_DELAY_MSEC   50
+
+#define ADDR_INDEX 0x03
+#define HEADER_SIZE4
+
+enum raydium_boot_mode {
+   RAYDIUM_TS_MAIN,
+   RAYDIUM_TS_BLDR,
+};
+
+struct raydium_info {
+   u32 hw_ver;
+   u8 main_ver;
+   u8 sub_ver;
+   u16 ft_ver;
+   u8 x_num;
+   u8 y_num;
+   u16 x_max;
+   u16 y_max;
+   u8 x_res;   /* units/mm */
+   u8 y_res;   /* units/mm */
+};
+
+struct raydium_abs_info {
+   u8 state;/*1:touch, 0:no touch*/
+   u8 x_pos_lsb;
+   u8 x_pos_msb;
+   u8 y_pos_lsb;
+   u8 y_pos_msb;
+   u8 pressure;
+   u8 x_width;
+   u8 y_width;
+};
+
+struct raydium_object {
+   u32 data_bank_addr;
+   u8 pkg_size;
+};
+
+/* struct raydium_data - represents state of Raydium touchscreen device */
+struct raydium_data {
+   struct i2c_client *client;
+   struct input_dev *input;
+
+   struct regulator *vcc33;
+   struct regulator *vccio;
+   struct gpio_desc *reset_gpio;
+
+   u32 query_bank_info;
+
+   struct 

[PATCH] driver: input :touchscreen : add Raydium I2C touch driver

2016-03-02 Thread jeffrey.lin
Raydium I2C touch driver.

Signed-off-by: jeffrey.lin 
---
 drivers/input/touchscreen/Kconfig  |  13 +
 drivers/input/touchscreen/Makefile |   1 +
 drivers/input/touchscreen/raydium_i2c_ts.c | 953 +
 3 files changed, 967 insertions(+)
 create mode 100644 drivers/input/touchscreen/raydium_i2c_ts.c

diff --git a/drivers/input/touchscreen/Kconfig 
b/drivers/input/touchscreen/Kconfig
index 3f3f6ee..9adacf6 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -915,6 +915,19 @@ config TOUCHSCREEN_PCAP
  To compile this driver as a module, choose M here: the
  module will be called pcap_ts.
 
+config TOUCHSCREEN_RM_TS
+   tristate "Raydium I2C Touchscreen"
+   depends on I2C
+   help
+ Say Y here if you have Raydium series I2C touchscreen,
+ such as RM31100 , connected to your system.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called raydium_i2c_ts.
+
+
 config TOUCHSCREEN_ST1232
tristate "Sitronix ST1232 touchscreen controllers"
depends on I2C
diff --git a/drivers/input/touchscreen/Makefile 
b/drivers/input/touchscreen/Makefile
index 4941f2d..99e08cf 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_TOUCHSCREEN_USB_COMPOSITE)   += 
usbtouchscreen.o
 obj-$(CONFIG_TOUCHSCREEN_PCAP) += pcap_ts.o
 obj-$(CONFIG_TOUCHSCREEN_PENMOUNT) += penmount.o
 obj-$(CONFIG_TOUCHSCREEN_PIXCIR)   += pixcir_i2c_ts.o
+obj-$(CONFIG_TOUCHSCREEN_RM_TS)+= raydium_i2c_ts.o
 obj-$(CONFIG_TOUCHSCREEN_S3C2410)  += s3c2410_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ST1232)   += st1232.o
 obj-$(CONFIG_TOUCHSCREEN_STMPE)+= stmpe-ts.o
diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c 
b/drivers/input/touchscreen/raydium_i2c_ts.c
new file mode 100644
index 000..7ba681e
--- /dev/null
+++ b/drivers/input/touchscreen/raydium_i2c_ts.c
@@ -0,0 +1,953 @@
+/*
+ * Raydium touchscreen I2C driver.
+ *
+ * Copyright (C) 2012-2014, Raydium Semiconductor Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2, and only version 2, as published by the
+ * Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Raydium reserves the right to make changes without further notice
+ * to the materials described herein. Raydium does not assume any
+ * liability arising out of the application described herein.
+ *
+ * Contact Raydium Semiconductor Corporation at www.rad-ic.com
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Device, Driver information */
+#define DEVICE_NAME"raydium_i2c"
+
+/* Slave I2C mode*/
+#define RM_BOOT_BLDR   0x02
+#define RM_BOOT_MAIN   0x03
+
+/*I2C command */
+#define CMD_QUERY_BANK 0x2B
+#define CMD_DATA_BANK  0x4D
+#define CMD_ENTER_SLEEP0x4E
+#define CMD_BOOT_ACK   0x0A
+#define CMD_BOOT_WRT   0x5B
+#define CMD_BOOT_CHK   0x0C
+#define CMD_BANK_SWITCH0xAA
+
+/* Touch relative info */
+#define MAX_RETRIES3
+#define MAX_FW_UPDATE_RETRIES  30
+#define MAX_TOUCH_NUM  10
+#define MAX_PACKET_SIZE60
+#define BOOT_DELAY_MS  100
+
+#define RAYDIUM_FW_PAGESIZE128
+#define RAYDIUM_POWERON_DELAY_USEC 500
+#define RAYDIUM_RESET_DELAY_MSEC   50
+
+#define ADDR_INDEX 0x03
+#define HEADER_SIZE4
+
+enum raydium_boot_mode {
+   RAYDIUM_TS_MAIN,
+   RAYDIUM_TS_BLDR,
+};
+
+struct raydium_info {
+   u32 hw_ver;
+   u8 main_ver;
+   u8 sub_ver;
+   u16 ft_ver;
+   u8 x_num;
+   u8 y_num;
+   u16 x_max;
+   u16 y_max;
+   u8 x_res;   /* units/mm */
+   u8 y_res;   /* units/mm */
+};
+
+struct raydium_abs_info {
+   u8 state;/*1:touch, 0:no touch*/
+   u8 x_pos_lsb;
+   u8 x_pos_msb;
+   u8 y_pos_lsb;
+   u8 y_pos_msb;
+   u8 pressure;
+   u8 x_width;
+   u8 y_width;
+};
+
+struct raydium_object {
+   u32 data_bank_addr;
+   u8 pkg_size;
+};
+
+/* struct raydium_data - represents state of Raydium touchscreen device */
+struct raydium_data {
+   struct i2c_client *client;
+   struct input_dev *input;
+
+   struct regulator *vcc33;
+   struct regulator *vccio;
+   struct gpio_desc *reset_gpio;
+
+   u32 query_bank_info;
+
+   struct raydium_info info;
+   

[PATCH 1/7] extcon: usb-gpio: add device binding for platform device

2016-03-02 Thread Lu Baolu
This is needed to handle the GPIO connected USB ID pin found on
Intel Baytrail devices.

Signed-off-by: Lu Baolu 
Reviewed-by: Felipe Balbi 
---
 drivers/extcon/extcon-usb-gpio.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c
index 2b2fecf..af9c8b0 100644
--- a/drivers/extcon/extcon-usb-gpio.c
+++ b/drivers/extcon/extcon-usb-gpio.c
@@ -206,6 +206,12 @@ static const struct of_device_id usb_extcon_dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, usb_extcon_dt_match);
 
+static const struct platform_device_id usb_extcon_platform_ids[] = {
+   { .name = "extcon-usb-gpio", },
+   { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, usb_extcon_platform_ids);
+
 static struct platform_driver usb_extcon_driver = {
.probe  = usb_extcon_probe,
.remove = usb_extcon_remove,
@@ -214,6 +220,7 @@ static struct platform_driver usb_extcon_driver = {
.pm = _extcon_pm_ops,
.of_match_table = usb_extcon_dt_match,
},
+   .id_table = usb_extcon_platform_ids,
 };
 
 module_platform_driver(usb_extcon_driver);
-- 
2.1.4



[PATCH 1/7] extcon: usb-gpio: add device binding for platform device

2016-03-02 Thread Lu Baolu
This is needed to handle the GPIO connected USB ID pin found on
Intel Baytrail devices.

Signed-off-by: Lu Baolu 
Reviewed-by: Felipe Balbi 
---
 drivers/extcon/extcon-usb-gpio.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c
index 2b2fecf..af9c8b0 100644
--- a/drivers/extcon/extcon-usb-gpio.c
+++ b/drivers/extcon/extcon-usb-gpio.c
@@ -206,6 +206,12 @@ static const struct of_device_id usb_extcon_dt_match[] = {
 };
 MODULE_DEVICE_TABLE(of, usb_extcon_dt_match);
 
+static const struct platform_device_id usb_extcon_platform_ids[] = {
+   { .name = "extcon-usb-gpio", },
+   { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, usb_extcon_platform_ids);
+
 static struct platform_driver usb_extcon_driver = {
.probe  = usb_extcon_probe,
.remove = usb_extcon_remove,
@@ -214,6 +220,7 @@ static struct platform_driver usb_extcon_driver = {
.pm = _extcon_pm_ops,
.of_match_table = usb_extcon_dt_match,
},
+   .id_table = usb_extcon_platform_ids,
 };
 
 module_platform_driver(usb_extcon_driver);
-- 
2.1.4



[PATCH 2/7] extcon: usb-gpio: add support for ACPI gpio interface

2016-03-02 Thread Lu Baolu
GPIO resource could be retrieved through APCI as well.

Signed-off-by: Lu Baolu 
Reviewed-by: Felipe Balbi 
---
 drivers/extcon/extcon-usb-gpio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c
index af9c8b0..472c431 100644
--- a/drivers/extcon/extcon-usb-gpio.c
+++ b/drivers/extcon/extcon-usb-gpio.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define USB_GPIO_DEBOUNCE_MS   20  /* ms */
 
@@ -91,7 +92,7 @@ static int usb_extcon_probe(struct platform_device *pdev)
struct usb_extcon_info *info;
int ret;
 
-   if (!np)
+   if (!np && !ACPI_HANDLE(dev))
return -EINVAL;
 
info = devm_kzalloc(>dev, sizeof(*info), GFP_KERNEL);
-- 
2.1.4



[PATCH 4/7] usb: misc: add driver for Intel gpio controlled port mux

2016-03-02 Thread Lu Baolu
In some Intel platforms, a single usb port is shared between USB host
and device controller. The shared port is under control of GPIO pins.

This patch adds the support for USB GPIO controlled port mux.

Signed-off-by: David Cohen 
Signed-off-by: Lu Baolu 
Reviewed-by: Heikki Krogerus 
Reviewed-by: Felipe Balbi 
---
 MAINTAINERS   |   1 +
 drivers/usb/misc/Kconfig  |   9 +++
 drivers/usb/misc/Makefile |   1 +
 drivers/usb/misc/intel-mux-gpio.c | 126 ++
 4 files changed, 137 insertions(+)
 create mode 100644 drivers/usb/misc/intel-mux-gpio.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 45f1e1e..0f321e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11395,6 +11395,7 @@ L:  linux-...@vger.kernel.org
 S: Supported
 F: drivers/usb/misc/mux.c
 F: include/linux/usb/mux.h
+F: drivers/usb/misc/intel-mux-gpio.c
 
 USB PRINTER DRIVER (usblp)
 M: Pete Zaitcev 
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index 6496d17..33e6386 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -272,3 +272,12 @@ config USB_CHAOSKEY
 
  To compile this driver as a module, choose M here: the
  module will be called chaoskey.
+
+config INTEL_MUX_GPIO
+   tristate "Intel dual role port mux controlled by GPIOs"
+   depends on GPIOLIB
+   depends on ACPI
+   select USB_MUX
+   help
+ Say Y here to enable support for Intel dual role port mux
+ controlled by GPIOs.
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index fd79dd5..da4fb4e 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -31,3 +31,4 @@ obj-$(CONFIG_USB_SISUSBVGA)   += sisusbvga/
 obj-$(CONFIG_USB_LINK_LAYER_TEST)  += lvstest.o
 
 obj-$(CONFIG_USB_MUX)  += mux.o
+obj-$(CONFIG_INTEL_MUX_GPIO)   += intel-mux-gpio.o
diff --git a/drivers/usb/misc/intel-mux-gpio.c 
b/drivers/usb/misc/intel-mux-gpio.c
new file mode 100644
index 000..ae109e3
--- /dev/null
+++ b/drivers/usb/misc/intel-mux-gpio.c
@@ -0,0 +1,126 @@
+/*
+ * USB Dual Role Port Mux driver controlled by gpios
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ * Author: David Cohen 
+ * Author: Lu Baolu 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+struct vuport {
+   struct usb_mux_dev umdev;
+   struct gpio_desc *gpio_vbus_en;
+   struct gpio_desc *gpio_usb_mux;
+};
+
+/*
+ * id == 0, HOST connected, USB port should be set to peripheral
+ * id == 1, HOST disconnected, USB port should be set to host
+ *
+ * Peripheral: set USB mux to peripheral and disable VBUS
+ * Host: set USB mux to host and enable VBUS
+ */
+static inline int vuport_set_port(struct usb_mux_dev *umdev, int id)
+{
+   struct vuport *vup = container_of(umdev, struct vuport, umdev);
+
+   dev_dbg(umdev->dev, "USB PORT ID: %s\n", id ? "HOST" : "PERIPHERAL");
+
+   gpiod_set_value_cansleep(vup->gpio_usb_mux, !id);
+   gpiod_set_value_cansleep(vup->gpio_vbus_en, id);
+
+   return 0;
+}
+
+static int vuport_cable_set(struct usb_mux_dev *umdev)
+{
+   return vuport_set_port(umdev, 1);
+}
+
+static int vuport_cable_unset(struct usb_mux_dev *umdev)
+{
+   return vuport_set_port(umdev, 0);
+}
+
+static int vuport_probe(struct platform_device *pdev)
+{
+   struct usb_mux_dev *umdev;
+   struct device *dev = >dev;
+   struct vuport *vup;
+
+   vup = devm_kzalloc(dev, sizeof(*vup), GFP_KERNEL);
+   if (!vup)
+   return -ENOMEM;
+
+   /* retrieve vbus and mux gpios */
+   vup->gpio_vbus_en = devm_gpiod_get_optional(dev,
+   "vbus_en", GPIOD_ASIS);
+   if (IS_ERR(vup->gpio_vbus_en))
+   return PTR_ERR(vup->gpio_vbus_en);
+
+   vup->gpio_usb_mux = devm_gpiod_get_optional(dev,
+   "usb_mux", GPIOD_ASIS);
+   if (IS_ERR(vup->gpio_usb_mux))
+   return PTR_ERR(vup->gpio_usb_mux);
+
+   /* populate the mux generic structure */
+   umdev = >umdev;
+   umdev->dev = dev;
+   umdev->cable_name = "USB-HOST";
+   umdev->cable_set_cb = vuport_cable_set;
+   umdev->cable_unset_cb = vuport_cable_unset;
+
+   return usb_mux_register(umdev);
+}
+
+static int vuport_remove(struct platform_device *pdev)
+{
+   return usb_mux_unregister(>dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+/*
+ * In case a micro A cable was plugged in while device was sleeping,
+ * we missed the interrupt. We need to poll usb id gpio when waking the
+ * driver to detect the missed 

[PATCH 2/7] extcon: usb-gpio: add support for ACPI gpio interface

2016-03-02 Thread Lu Baolu
GPIO resource could be retrieved through APCI as well.

Signed-off-by: Lu Baolu 
Reviewed-by: Felipe Balbi 
---
 drivers/extcon/extcon-usb-gpio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/extcon/extcon-usb-gpio.c b/drivers/extcon/extcon-usb-gpio.c
index af9c8b0..472c431 100644
--- a/drivers/extcon/extcon-usb-gpio.c
+++ b/drivers/extcon/extcon-usb-gpio.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define USB_GPIO_DEBOUNCE_MS   20  /* ms */
 
@@ -91,7 +92,7 @@ static int usb_extcon_probe(struct platform_device *pdev)
struct usb_extcon_info *info;
int ret;
 
-   if (!np)
+   if (!np && !ACPI_HANDLE(dev))
return -EINVAL;
 
info = devm_kzalloc(>dev, sizeof(*info), GFP_KERNEL);
-- 
2.1.4



[PATCH 4/7] usb: misc: add driver for Intel gpio controlled port mux

2016-03-02 Thread Lu Baolu
In some Intel platforms, a single usb port is shared between USB host
and device controller. The shared port is under control of GPIO pins.

This patch adds the support for USB GPIO controlled port mux.

Signed-off-by: David Cohen 
Signed-off-by: Lu Baolu 
Reviewed-by: Heikki Krogerus 
Reviewed-by: Felipe Balbi 
---
 MAINTAINERS   |   1 +
 drivers/usb/misc/Kconfig  |   9 +++
 drivers/usb/misc/Makefile |   1 +
 drivers/usb/misc/intel-mux-gpio.c | 126 ++
 4 files changed, 137 insertions(+)
 create mode 100644 drivers/usb/misc/intel-mux-gpio.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 45f1e1e..0f321e4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11395,6 +11395,7 @@ L:  linux-...@vger.kernel.org
 S: Supported
 F: drivers/usb/misc/mux.c
 F: include/linux/usb/mux.h
+F: drivers/usb/misc/intel-mux-gpio.c
 
 USB PRINTER DRIVER (usblp)
 M: Pete Zaitcev 
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index 6496d17..33e6386 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -272,3 +272,12 @@ config USB_CHAOSKEY
 
  To compile this driver as a module, choose M here: the
  module will be called chaoskey.
+
+config INTEL_MUX_GPIO
+   tristate "Intel dual role port mux controlled by GPIOs"
+   depends on GPIOLIB
+   depends on ACPI
+   select USB_MUX
+   help
+ Say Y here to enable support for Intel dual role port mux
+ controlled by GPIOs.
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index fd79dd5..da4fb4e 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -31,3 +31,4 @@ obj-$(CONFIG_USB_SISUSBVGA)   += sisusbvga/
 obj-$(CONFIG_USB_LINK_LAYER_TEST)  += lvstest.o
 
 obj-$(CONFIG_USB_MUX)  += mux.o
+obj-$(CONFIG_INTEL_MUX_GPIO)   += intel-mux-gpio.o
diff --git a/drivers/usb/misc/intel-mux-gpio.c 
b/drivers/usb/misc/intel-mux-gpio.c
new file mode 100644
index 000..ae109e3
--- /dev/null
+++ b/drivers/usb/misc/intel-mux-gpio.c
@@ -0,0 +1,126 @@
+/*
+ * USB Dual Role Port Mux driver controlled by gpios
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ * Author: David Cohen 
+ * Author: Lu Baolu 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+struct vuport {
+   struct usb_mux_dev umdev;
+   struct gpio_desc *gpio_vbus_en;
+   struct gpio_desc *gpio_usb_mux;
+};
+
+/*
+ * id == 0, HOST connected, USB port should be set to peripheral
+ * id == 1, HOST disconnected, USB port should be set to host
+ *
+ * Peripheral: set USB mux to peripheral and disable VBUS
+ * Host: set USB mux to host and enable VBUS
+ */
+static inline int vuport_set_port(struct usb_mux_dev *umdev, int id)
+{
+   struct vuport *vup = container_of(umdev, struct vuport, umdev);
+
+   dev_dbg(umdev->dev, "USB PORT ID: %s\n", id ? "HOST" : "PERIPHERAL");
+
+   gpiod_set_value_cansleep(vup->gpio_usb_mux, !id);
+   gpiod_set_value_cansleep(vup->gpio_vbus_en, id);
+
+   return 0;
+}
+
+static int vuport_cable_set(struct usb_mux_dev *umdev)
+{
+   return vuport_set_port(umdev, 1);
+}
+
+static int vuport_cable_unset(struct usb_mux_dev *umdev)
+{
+   return vuport_set_port(umdev, 0);
+}
+
+static int vuport_probe(struct platform_device *pdev)
+{
+   struct usb_mux_dev *umdev;
+   struct device *dev = >dev;
+   struct vuport *vup;
+
+   vup = devm_kzalloc(dev, sizeof(*vup), GFP_KERNEL);
+   if (!vup)
+   return -ENOMEM;
+
+   /* retrieve vbus and mux gpios */
+   vup->gpio_vbus_en = devm_gpiod_get_optional(dev,
+   "vbus_en", GPIOD_ASIS);
+   if (IS_ERR(vup->gpio_vbus_en))
+   return PTR_ERR(vup->gpio_vbus_en);
+
+   vup->gpio_usb_mux = devm_gpiod_get_optional(dev,
+   "usb_mux", GPIOD_ASIS);
+   if (IS_ERR(vup->gpio_usb_mux))
+   return PTR_ERR(vup->gpio_usb_mux);
+
+   /* populate the mux generic structure */
+   umdev = >umdev;
+   umdev->dev = dev;
+   umdev->cable_name = "USB-HOST";
+   umdev->cable_set_cb = vuport_cable_set;
+   umdev->cable_unset_cb = vuport_cable_unset;
+
+   return usb_mux_register(umdev);
+}
+
+static int vuport_remove(struct platform_device *pdev)
+{
+   return usb_mux_unregister(>dev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+/*
+ * In case a micro A cable was plugged in while device was sleeping,
+ * we missed the interrupt. We need to poll usb id gpio when waking the
+ * driver to detect the missed event.
+ * We use 'complete' callback to give time to all extcon listeners to
+ * resume before we send new events.
+ */
+static const struct dev_pm_ops vuport_pm_ops = {
+   

[PATCH 5/7] usb: misc: add driver for Intel drcfg controlled port mux

2016-03-02 Thread Lu Baolu
Several Intel PCHs and SOCs have an internal mux that is used to
share one USB port between device controller and host controller.
The mux is handled through the Dual Role Configuration Register.

Signed-off-by: Heikki Krogerus 
Signed-off-by: Lu Baolu 
Signed-off-by: Wu Hao 
Reviewed-by: Felipe Balbi 
---
 MAINTAINERS|   1 +
 drivers/usb/misc/Kconfig   |   7 ++
 drivers/usb/misc/Makefile  |   1 +
 drivers/usb/misc/intel-mux-drcfg.c | 174 +
 4 files changed, 183 insertions(+)
 create mode 100644 drivers/usb/misc/intel-mux-drcfg.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 0f321e4..20eb873 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11396,6 +11396,7 @@ S:  Supported
 F: drivers/usb/misc/mux.c
 F: include/linux/usb/mux.h
 F: drivers/usb/misc/intel-mux-gpio.c
+F: drivers/usb/misc/intel-mux-drcfg.c
 
 USB PRINTER DRIVER (usblp)
 M: Pete Zaitcev 
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index 33e6386..befd910 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -281,3 +281,10 @@ config INTEL_MUX_GPIO
help
  Say Y here to enable support for Intel dual role port mux
  controlled by GPIOs.
+
+config INTEL_MUX_DRCFG
+   tristate "Intel dual role port mux controlled by register"
+   select USB_MUX
+   help
+ Say Y here to enable support for Intel dual role port mux
+ controlled by the Dual Role Configuration Registers.
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index da4fb4e..c4d19a0 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -32,3 +32,4 @@ obj-$(CONFIG_USB_LINK_LAYER_TEST) += lvstest.o
 
 obj-$(CONFIG_USB_MUX)  += mux.o
 obj-$(CONFIG_INTEL_MUX_GPIO)   += intel-mux-gpio.o
+obj-$(CONFIG_INTEL_MUX_DRCFG)  += intel-mux-drcfg.o
diff --git a/drivers/usb/misc/intel-mux-drcfg.c 
b/drivers/usb/misc/intel-mux-drcfg.c
new file mode 100644
index 000..29081c5
--- /dev/null
+++ b/drivers/usb/misc/intel-mux-drcfg.c
@@ -0,0 +1,174 @@
+/**
+ * intel-mux-drcfg.c - Driver for Intel USB mux via register
+ *
+ * Copyright (C) 2016 Intel Corporation
+ * Author: Heikki Krogerus 
+ * Author: Lu Baolu 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define INTEL_MUX_CFG0 0x00
+#define INTEL_MUX_CFG1 0x04
+#define CFG0_SW_IDPIN  BIT(20)
+#define CFG0_SW_IDPIN_EN   BIT(21)
+#define CFG0_SW_VBUS_VALID BIT(24)
+#define CFG1_SW_MODE   BIT(29)
+#define CFG1_POLL_TIMEOUT  1000
+
+struct intel_usb_mux {
+   struct usb_mux_dev umdev;
+   void __iomem *regs;
+   u32 cfg0_ctx;
+};
+
+static inline int intel_mux_drcfg_switch(struct usb_mux_dev *umdev, bool host)
+{
+   struct intel_usb_mux *mux;
+   unsigned long timeout;
+   u32 data;
+
+   mux = container_of(umdev, struct intel_usb_mux, umdev);
+
+   /* Check and set mux to SW controlled mode */
+   data = readl(mux->regs + INTEL_MUX_CFG0);
+   if (!(data & CFG0_SW_IDPIN_EN)) {
+   data |= CFG0_SW_IDPIN_EN;
+   writel(data, mux->regs + INTEL_MUX_CFG0);
+   }
+
+   /*
+* Configure CFG0 to switch the mux and VBUS_VALID bit is
+* required for device mode.
+*/
+   data = readl(mux->regs + INTEL_MUX_CFG0);
+   if (host)
+   data &= ~(CFG0_SW_IDPIN | CFG0_SW_VBUS_VALID);
+   else
+   data |= (CFG0_SW_IDPIN | CFG0_SW_VBUS_VALID);
+   writel(data, mux->regs + INTEL_MUX_CFG0);
+
+   /*
+* Polling CFG1 for safety, most case it takes about 600ms
+* to finish mode switching, set TIMEOUT long enough.
+*/
+   timeout = jiffies + msecs_to_jiffies(CFG1_POLL_TIMEOUT);
+
+   /* Polling on CFG1 register to confirm mode switch. */
+   while (!time_after(jiffies, timeout)) {
+   data = readl(mux->regs + INTEL_MUX_CFG1);
+   if (!(host ^ (data & CFG1_SW_MODE)))
+   return 0;
+   /* interval for polling is set to about 5ms */
+   usleep_range(5000, 5100);
+   }
+
+   return -ETIMEDOUT;
+}
+
+static int intel_mux_drcfg_cable_set(struct usb_mux_dev *umdev)
+{
+   dev_dbg(umdev->dev, "drcfg mux switch to HOST\n");
+
+   return intel_mux_drcfg_switch(umdev, true);
+}
+
+static int intel_mux_drcfg_cable_unset(struct usb_mux_dev *umdev)
+{
+   dev_dbg(umdev->dev, "drcfg mux switch to DEVICE\n");
+
+   return intel_mux_drcfg_switch(umdev, false);
+}
+

[PATCH 3/7] usb: misc: add common code for Intel dual role port mux

2016-03-02 Thread Lu Baolu
Several Intel PCHs and SOCs have an internal mux that is used to
share one USB port between device controller and host controller.

A usb port mux could be abstracted as the following elements:
1) mux state: HOST or PERIPHERAL;
2) an extcon cable which triggers the change of mux state between
   HOST and PERIPHERAL;
3) The required action to do the real port switch.

This patch adds the common code to handle usb port mux. With this
common code, the individual mux driver, which always is platform
dependent, could focus on the real operation of mux switch.

Signed-off-by: Lu Baolu 
Reviewed-by: Heikki Krogerus 
Reviewed-by: Felipe Balbi 
---
 MAINTAINERS   |   7 ++
 drivers/usb/misc/Kconfig  |   4 ++
 drivers/usb/misc/Makefile |   2 +
 drivers/usb/misc/mux.c| 172 ++
 include/linux/usb/mux.h   |  71 +++
 5 files changed, 256 insertions(+)
 create mode 100644 drivers/usb/misc/mux.c
 create mode 100644 include/linux/usb/mux.h

diff --git a/MAINTAINERS b/MAINTAINERS
index d894ee2..45f1e1e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11389,6 +11389,13 @@ T: git 
git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S: Maintained
 F: drivers/usb/phy/
 
+USB PORT MUX DRIVER
+M: Lu Baolu 
+L: linux-...@vger.kernel.org
+S: Supported
+F: drivers/usb/misc/mux.c
+F: include/linux/usb/mux.h
+
 USB PRINTER DRIVER (usblp)
 M: Pete Zaitcev 
 L: linux-...@vger.kernel.org
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index f7a7fc2..6496d17 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -3,6 +3,10 @@
 #
 comment "USB Miscellaneous drivers"
 
+config USB_MUX
+   select EXTCON
+   def_bool n
+
 config USB_EMI62
tristate "EMI 6|2m USB Audio interface support"
---help---
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index 45fd4ac..fd79dd5 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -29,3 +29,5 @@ obj-$(CONFIG_USB_CHAOSKEY)+= chaoskey.o
 
 obj-$(CONFIG_USB_SISUSBVGA)+= sisusbvga/
 obj-$(CONFIG_USB_LINK_LAYER_TEST)  += lvstest.o
+
+obj-$(CONFIG_USB_MUX)  += mux.o
diff --git a/drivers/usb/misc/mux.c b/drivers/usb/misc/mux.c
new file mode 100644
index 000..e353fff
--- /dev/null
+++ b/drivers/usb/misc/mux.c
@@ -0,0 +1,172 @@
+/**
+ * mux.c - USB Port Mux support
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Author: Lu Baolu 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+
+static int usb_mux_change_state(struct usb_mux *mux, int state)
+{
+   int ret;
+   struct usb_mux_dev *umdev = mux->umdev;
+
+   dev_WARN_ONCE(umdev->dev, !mutex_is_locked(>mux_mutex),
+   "mutex is unlocked\n");
+
+   mux->mux_state = state;
+
+   if (mux->mux_state)
+   ret = umdev->cable_set_cb(umdev);
+   else
+   ret = umdev->cable_unset_cb(umdev);
+
+   return ret;
+}
+
+static int usb_mux_notifier(struct notifier_block *nb,
+   unsigned long event, void *ptr)
+{
+   struct usb_mux *mux;
+   int state;
+   int ret = NOTIFY_DONE;
+
+   mux = container_of(nb, struct usb_mux, nb);
+
+   state = extcon_get_cable_state(mux->obj.edev,
+   mux->umdev->cable_name);
+
+   if (mux->mux_state == -1 || mux->mux_state != state) {
+   mutex_lock(>mux_mutex);
+   ret = usb_mux_change_state(mux, state);
+   mutex_unlock(>mux_mutex);
+   }
+
+   return ret;
+}
+
+static ssize_t mux_debug_read(struct file *file, char __user *user_buf,
+   size_t len, loff_t *offset)
+{
+   struct usb_mux *mux = file->private_data;
+   char output_buf[16];
+
+   memset(output_buf, 0, sizeof(output_buf));
+   if (mux->mux_state)
+   strcpy(output_buf, "host\n");
+   else
+   strcpy(output_buf, "peripheral\n");
+
+   return simple_read_from_buffer(user_buf, len, offset,
+   output_buf, strlen(output_buf));
+}
+
+static ssize_t mux_debug_write(struct file *file, const char __user *user_buf,
+   size_t count, loff_t *offset)
+{
+   struct usb_mux *mux = file->private_data;
+   char input_buf[16];
+   int size, state;
+
+   size = min(count, sizeof(input_buf) - 1);
+   memset(input_buf, 0, sizeof(input_buf));
+   if (strncpy_from_user(input_buf, user_buf, size) < 0)
+   return -EFAULT;
+
+   if (!strncmp(input_buf, "host", 4))
+   state = 1;
+   else if 

[PATCH 5/7] usb: misc: add driver for Intel drcfg controlled port mux

2016-03-02 Thread Lu Baolu
Several Intel PCHs and SOCs have an internal mux that is used to
share one USB port between device controller and host controller.
The mux is handled through the Dual Role Configuration Register.

Signed-off-by: Heikki Krogerus 
Signed-off-by: Lu Baolu 
Signed-off-by: Wu Hao 
Reviewed-by: Felipe Balbi 
---
 MAINTAINERS|   1 +
 drivers/usb/misc/Kconfig   |   7 ++
 drivers/usb/misc/Makefile  |   1 +
 drivers/usb/misc/intel-mux-drcfg.c | 174 +
 4 files changed, 183 insertions(+)
 create mode 100644 drivers/usb/misc/intel-mux-drcfg.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 0f321e4..20eb873 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11396,6 +11396,7 @@ S:  Supported
 F: drivers/usb/misc/mux.c
 F: include/linux/usb/mux.h
 F: drivers/usb/misc/intel-mux-gpio.c
+F: drivers/usb/misc/intel-mux-drcfg.c
 
 USB PRINTER DRIVER (usblp)
 M: Pete Zaitcev 
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index 33e6386..befd910 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -281,3 +281,10 @@ config INTEL_MUX_GPIO
help
  Say Y here to enable support for Intel dual role port mux
  controlled by GPIOs.
+
+config INTEL_MUX_DRCFG
+   tristate "Intel dual role port mux controlled by register"
+   select USB_MUX
+   help
+ Say Y here to enable support for Intel dual role port mux
+ controlled by the Dual Role Configuration Registers.
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index da4fb4e..c4d19a0 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -32,3 +32,4 @@ obj-$(CONFIG_USB_LINK_LAYER_TEST) += lvstest.o
 
 obj-$(CONFIG_USB_MUX)  += mux.o
 obj-$(CONFIG_INTEL_MUX_GPIO)   += intel-mux-gpio.o
+obj-$(CONFIG_INTEL_MUX_DRCFG)  += intel-mux-drcfg.o
diff --git a/drivers/usb/misc/intel-mux-drcfg.c 
b/drivers/usb/misc/intel-mux-drcfg.c
new file mode 100644
index 000..29081c5
--- /dev/null
+++ b/drivers/usb/misc/intel-mux-drcfg.c
@@ -0,0 +1,174 @@
+/**
+ * intel-mux-drcfg.c - Driver for Intel USB mux via register
+ *
+ * Copyright (C) 2016 Intel Corporation
+ * Author: Heikki Krogerus 
+ * Author: Lu Baolu 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define INTEL_MUX_CFG0 0x00
+#define INTEL_MUX_CFG1 0x04
+#define CFG0_SW_IDPIN  BIT(20)
+#define CFG0_SW_IDPIN_EN   BIT(21)
+#define CFG0_SW_VBUS_VALID BIT(24)
+#define CFG1_SW_MODE   BIT(29)
+#define CFG1_POLL_TIMEOUT  1000
+
+struct intel_usb_mux {
+   struct usb_mux_dev umdev;
+   void __iomem *regs;
+   u32 cfg0_ctx;
+};
+
+static inline int intel_mux_drcfg_switch(struct usb_mux_dev *umdev, bool host)
+{
+   struct intel_usb_mux *mux;
+   unsigned long timeout;
+   u32 data;
+
+   mux = container_of(umdev, struct intel_usb_mux, umdev);
+
+   /* Check and set mux to SW controlled mode */
+   data = readl(mux->regs + INTEL_MUX_CFG0);
+   if (!(data & CFG0_SW_IDPIN_EN)) {
+   data |= CFG0_SW_IDPIN_EN;
+   writel(data, mux->regs + INTEL_MUX_CFG0);
+   }
+
+   /*
+* Configure CFG0 to switch the mux and VBUS_VALID bit is
+* required for device mode.
+*/
+   data = readl(mux->regs + INTEL_MUX_CFG0);
+   if (host)
+   data &= ~(CFG0_SW_IDPIN | CFG0_SW_VBUS_VALID);
+   else
+   data |= (CFG0_SW_IDPIN | CFG0_SW_VBUS_VALID);
+   writel(data, mux->regs + INTEL_MUX_CFG0);
+
+   /*
+* Polling CFG1 for safety, most case it takes about 600ms
+* to finish mode switching, set TIMEOUT long enough.
+*/
+   timeout = jiffies + msecs_to_jiffies(CFG1_POLL_TIMEOUT);
+
+   /* Polling on CFG1 register to confirm mode switch. */
+   while (!time_after(jiffies, timeout)) {
+   data = readl(mux->regs + INTEL_MUX_CFG1);
+   if (!(host ^ (data & CFG1_SW_MODE)))
+   return 0;
+   /* interval for polling is set to about 5ms */
+   usleep_range(5000, 5100);
+   }
+
+   return -ETIMEDOUT;
+}
+
+static int intel_mux_drcfg_cable_set(struct usb_mux_dev *umdev)
+{
+   dev_dbg(umdev->dev, "drcfg mux switch to HOST\n");
+
+   return intel_mux_drcfg_switch(umdev, true);
+}
+
+static int intel_mux_drcfg_cable_unset(struct usb_mux_dev *umdev)
+{
+   dev_dbg(umdev->dev, "drcfg mux switch to DEVICE\n");
+
+   return intel_mux_drcfg_switch(umdev, false);
+}
+
+static int intel_mux_drcfg_probe(struct platform_device *pdev)
+{
+   struct intel_usb_mux *mux;
+   struct usb_mux_dev *umdev;
+   struct device *dev = >dev;
+   

[PATCH 3/7] usb: misc: add common code for Intel dual role port mux

2016-03-02 Thread Lu Baolu
Several Intel PCHs and SOCs have an internal mux that is used to
share one USB port between device controller and host controller.

A usb port mux could be abstracted as the following elements:
1) mux state: HOST or PERIPHERAL;
2) an extcon cable which triggers the change of mux state between
   HOST and PERIPHERAL;
3) The required action to do the real port switch.

This patch adds the common code to handle usb port mux. With this
common code, the individual mux driver, which always is platform
dependent, could focus on the real operation of mux switch.

Signed-off-by: Lu Baolu 
Reviewed-by: Heikki Krogerus 
Reviewed-by: Felipe Balbi 
---
 MAINTAINERS   |   7 ++
 drivers/usb/misc/Kconfig  |   4 ++
 drivers/usb/misc/Makefile |   2 +
 drivers/usb/misc/mux.c| 172 ++
 include/linux/usb/mux.h   |  71 +++
 5 files changed, 256 insertions(+)
 create mode 100644 drivers/usb/misc/mux.c
 create mode 100644 include/linux/usb/mux.h

diff --git a/MAINTAINERS b/MAINTAINERS
index d894ee2..45f1e1e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11389,6 +11389,13 @@ T: git 
git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S: Maintained
 F: drivers/usb/phy/
 
+USB PORT MUX DRIVER
+M: Lu Baolu 
+L: linux-...@vger.kernel.org
+S: Supported
+F: drivers/usb/misc/mux.c
+F: include/linux/usb/mux.h
+
 USB PRINTER DRIVER (usblp)
 M: Pete Zaitcev 
 L: linux-...@vger.kernel.org
diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index f7a7fc2..6496d17 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -3,6 +3,10 @@
 #
 comment "USB Miscellaneous drivers"
 
+config USB_MUX
+   select EXTCON
+   def_bool n
+
 config USB_EMI62
tristate "EMI 6|2m USB Audio interface support"
---help---
diff --git a/drivers/usb/misc/Makefile b/drivers/usb/misc/Makefile
index 45fd4ac..fd79dd5 100644
--- a/drivers/usb/misc/Makefile
+++ b/drivers/usb/misc/Makefile
@@ -29,3 +29,5 @@ obj-$(CONFIG_USB_CHAOSKEY)+= chaoskey.o
 
 obj-$(CONFIG_USB_SISUSBVGA)+= sisusbvga/
 obj-$(CONFIG_USB_LINK_LAYER_TEST)  += lvstest.o
+
+obj-$(CONFIG_USB_MUX)  += mux.o
diff --git a/drivers/usb/misc/mux.c b/drivers/usb/misc/mux.c
new file mode 100644
index 000..e353fff
--- /dev/null
+++ b/drivers/usb/misc/mux.c
@@ -0,0 +1,172 @@
+/**
+ * mux.c - USB Port Mux support
+ *
+ * Copyright (C) 2016 Intel Corporation
+ *
+ * Author: Lu Baolu 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include 
+#include 
+#include 
+#include 
+
+static int usb_mux_change_state(struct usb_mux *mux, int state)
+{
+   int ret;
+   struct usb_mux_dev *umdev = mux->umdev;
+
+   dev_WARN_ONCE(umdev->dev, !mutex_is_locked(>mux_mutex),
+   "mutex is unlocked\n");
+
+   mux->mux_state = state;
+
+   if (mux->mux_state)
+   ret = umdev->cable_set_cb(umdev);
+   else
+   ret = umdev->cable_unset_cb(umdev);
+
+   return ret;
+}
+
+static int usb_mux_notifier(struct notifier_block *nb,
+   unsigned long event, void *ptr)
+{
+   struct usb_mux *mux;
+   int state;
+   int ret = NOTIFY_DONE;
+
+   mux = container_of(nb, struct usb_mux, nb);
+
+   state = extcon_get_cable_state(mux->obj.edev,
+   mux->umdev->cable_name);
+
+   if (mux->mux_state == -1 || mux->mux_state != state) {
+   mutex_lock(>mux_mutex);
+   ret = usb_mux_change_state(mux, state);
+   mutex_unlock(>mux_mutex);
+   }
+
+   return ret;
+}
+
+static ssize_t mux_debug_read(struct file *file, char __user *user_buf,
+   size_t len, loff_t *offset)
+{
+   struct usb_mux *mux = file->private_data;
+   char output_buf[16];
+
+   memset(output_buf, 0, sizeof(output_buf));
+   if (mux->mux_state)
+   strcpy(output_buf, "host\n");
+   else
+   strcpy(output_buf, "peripheral\n");
+
+   return simple_read_from_buffer(user_buf, len, offset,
+   output_buf, strlen(output_buf));
+}
+
+static ssize_t mux_debug_write(struct file *file, const char __user *user_buf,
+   size_t count, loff_t *offset)
+{
+   struct usb_mux *mux = file->private_data;
+   char input_buf[16];
+   int size, state;
+
+   size = min(count, sizeof(input_buf) - 1);
+   memset(input_buf, 0, sizeof(input_buf));
+   if (strncpy_from_user(input_buf, user_buf, size) < 0)
+   return -EFAULT;
+
+   if (!strncmp(input_buf, "host", 4))
+   state = 1;
+   else if (!strncmp(input_buf, "peripheral", 10))
+   state = 0;
+   else
+   state = -1;
+
+   if (state != -1) {
+   

[PATCH 0/7] usb: add support for Intel dual role port mux

2016-03-02 Thread Lu Baolu
Intel SOC chips are featured with USB dual role. The host role is
provided by Intel xHCI IP, and the gadget role is provided by IP
from designware. Tablet platform designs always share a single
port for both host and gadget controllers. There is a mux to
switch the port to the right controller according to the cable
type. OS needs to provide the callback to control the mux when
a plug-in event raises. The method to control the mux is platform
dependent. At least three types of implementation can be found
across current devices. 1) GPIO pins; 2) a unit which can be
controlled by memory mapped registers; 3) ACPI ASL code.

This patch series adds supports for Intel dual role port mux.
It includes:
(1) A helper layer on top of extcon for individual mux driver.
It listens to the USB-HOST extcon cable and call the switch
call-back when the cable state changes.
(2) Drivers for GPIO controlled port mux which could be found on
Baytrail devices. A mfd driver is used to split the GPIOs into
USB gpio extcon device and a USB mux device. Driver for USB
gpio extcon device is already in upstream Linux. This patch
series includes a driver for GPIO USB mux.
(3) Drivers for USB port mux controlled through memory mapped
registers and the logic to create the mux device. This type
of dual role port mux could be found in Cherry Trail and
Broxton devices.

Lu Baolu (7):
  extcon: usb-gpio: add device binding for platform device
  extcon: usb-gpio: add support for ACPI gpio interface
  usb: misc: add common code for Intel dual role port mux
  usb: misc: add driver for Intel gpio controlled port mux
  usb: misc: add driver for Intel drcfg controlled port mux
  usb: pci-quirks: add Intel USB drcfg mux device
  mfd: intel_vuport: Add Intel virtual USB port MFD Driver

 MAINTAINERS|  10 +++
 drivers/extcon/extcon-usb-gpio.c   |  10 ++-
 drivers/mfd/Kconfig|   7 ++
 drivers/mfd/Makefile   |   1 +
 drivers/mfd/intel-vuport.c |  79 +
 drivers/usb/host/pci-quirks.c  |  47 +-
 drivers/usb/host/xhci-ext-caps.h   |   2 +
 drivers/usb/misc/Kconfig   |  20 +
 drivers/usb/misc/Makefile  |   4 +
 drivers/usb/misc/intel-mux-drcfg.c | 174 +
 drivers/usb/misc/intel-mux-gpio.c  | 126 +++
 drivers/usb/misc/mux.c | 172 
 include/linux/usb/mux.h|  71 +++
 13 files changed, 720 insertions(+), 3 deletions(-)
 create mode 100644 drivers/mfd/intel-vuport.c
 create mode 100644 drivers/usb/misc/intel-mux-drcfg.c
 create mode 100644 drivers/usb/misc/intel-mux-gpio.c
 create mode 100644 drivers/usb/misc/mux.c
 create mode 100644 include/linux/usb/mux.h

-- 
2.1.4



[PATCH 0/7] usb: add support for Intel dual role port mux

2016-03-02 Thread Lu Baolu
Intel SOC chips are featured with USB dual role. The host role is
provided by Intel xHCI IP, and the gadget role is provided by IP
from designware. Tablet platform designs always share a single
port for both host and gadget controllers. There is a mux to
switch the port to the right controller according to the cable
type. OS needs to provide the callback to control the mux when
a plug-in event raises. The method to control the mux is platform
dependent. At least three types of implementation can be found
across current devices. 1) GPIO pins; 2) a unit which can be
controlled by memory mapped registers; 3) ACPI ASL code.

This patch series adds supports for Intel dual role port mux.
It includes:
(1) A helper layer on top of extcon for individual mux driver.
It listens to the USB-HOST extcon cable and call the switch
call-back when the cable state changes.
(2) Drivers for GPIO controlled port mux which could be found on
Baytrail devices. A mfd driver is used to split the GPIOs into
USB gpio extcon device and a USB mux device. Driver for USB
gpio extcon device is already in upstream Linux. This patch
series includes a driver for GPIO USB mux.
(3) Drivers for USB port mux controlled through memory mapped
registers and the logic to create the mux device. This type
of dual role port mux could be found in Cherry Trail and
Broxton devices.

Lu Baolu (7):
  extcon: usb-gpio: add device binding for platform device
  extcon: usb-gpio: add support for ACPI gpio interface
  usb: misc: add common code for Intel dual role port mux
  usb: misc: add driver for Intel gpio controlled port mux
  usb: misc: add driver for Intel drcfg controlled port mux
  usb: pci-quirks: add Intel USB drcfg mux device
  mfd: intel_vuport: Add Intel virtual USB port MFD Driver

 MAINTAINERS|  10 +++
 drivers/extcon/extcon-usb-gpio.c   |  10 ++-
 drivers/mfd/Kconfig|   7 ++
 drivers/mfd/Makefile   |   1 +
 drivers/mfd/intel-vuport.c |  79 +
 drivers/usb/host/pci-quirks.c  |  47 +-
 drivers/usb/host/xhci-ext-caps.h   |   2 +
 drivers/usb/misc/Kconfig   |  20 +
 drivers/usb/misc/Makefile  |   4 +
 drivers/usb/misc/intel-mux-drcfg.c | 174 +
 drivers/usb/misc/intel-mux-gpio.c  | 126 +++
 drivers/usb/misc/mux.c | 172 
 include/linux/usb/mux.h|  71 +++
 13 files changed, 720 insertions(+), 3 deletions(-)
 create mode 100644 drivers/mfd/intel-vuport.c
 create mode 100644 drivers/usb/misc/intel-mux-drcfg.c
 create mode 100644 drivers/usb/misc/intel-mux-gpio.c
 create mode 100644 drivers/usb/misc/mux.c
 create mode 100644 include/linux/usb/mux.h

-- 
2.1.4



[PATCH 7/7] mfd: intel_vuport: Add Intel virtual USB port MFD Driver

2016-03-02 Thread Lu Baolu
Some Intel platforms have an USB port mux controlled by GPIOs.
There's a single ACPI platform device that provides both USB ID
extcon device and a USB port mux device. This MFD driver will
split the 2 devices for their respective drivers.

Signed-off-by: Lu Baolu 
Suggested-by: David Cohen 
Reviewed-by: Felipe Balbi 
---
 MAINTAINERS|  1 +
 drivers/mfd/Kconfig|  7 
 drivers/mfd/Makefile   |  1 +
 drivers/mfd/intel-vuport.c | 79 ++
 4 files changed, 88 insertions(+)
 create mode 100644 drivers/mfd/intel-vuport.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 20eb873..6e0bb12 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11397,6 +11397,7 @@ F:  drivers/usb/misc/mux.c
 F: include/linux/usb/mux.h
 F: drivers/usb/misc/intel-mux-gpio.c
 F: drivers/usb/misc/intel-mux-drcfg.c
+F: drivers/mfd/intel-vuport.c
 
 USB PRINTER DRIVER (usblp)
 M: Pete Zaitcev 
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 9ca66de..0913494 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -1534,5 +1534,12 @@ config MFD_VEXPRESS_SYSREG
  System Registers are the platform configuration block
  on the ARM Ltd. Versatile Express board.
 
+config MFD_INTEL_VUPORT
+   tristate "Intel virtual USB port controller"
+   select MFD_CORE
+   help
+ Say Y here to enable support for Intel dual role port mux
+ controlled by 3 GPIOs.
+
 endmenu
 endif
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 0f230a6..0ccd107 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -198,3 +198,4 @@ intel-soc-pmic-objs := intel_soc_pmic_core.o 
intel_soc_pmic_crc.o
 intel-soc-pmic-$(CONFIG_INTEL_PMC_IPC) += intel_soc_pmic_bxtwc.o
 obj-$(CONFIG_INTEL_SOC_PMIC)   += intel-soc-pmic.o
 obj-$(CONFIG_MFD_MT6397)   += mt6397-core.o
+obj-$(CONFIG_MFD_INTEL_VUPORT) += intel-vuport.o
diff --git a/drivers/mfd/intel-vuport.c b/drivers/mfd/intel-vuport.c
new file mode 100644
index 000..63391dc
--- /dev/null
+++ b/drivers/mfd/intel-vuport.c
@@ -0,0 +1,79 @@
+/*
+ * MFD driver for Intel virtual USB port
+ *
+ * Copyright(c) 2016 Intel Corporation.
+ * Author: Lu Baolu 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+/* ACPI GPIO Mappings */
+static const struct acpi_gpio_params id_gpio = { 0, 0, false };
+static const struct acpi_gpio_params vbus_gpio = { 1, 0, false };
+static const struct acpi_gpio_params mux_gpio = { 2, 0, false };
+static const struct acpi_gpio_mapping acpi_usb_gpios[] = {
+   { "id-gpios", _gpio, 1 },
+   { "vbus_en-gpios", _gpio, 1 },
+   { "usb_mux-gpios", _gpio, 1 },
+   { },
+};
+
+static const struct mfd_cell intel_vuport_mfd_cells[] = {
+   {
+   .name = "extcon-usb-gpio",
+   },
+   {
+   .name = "intel-mux-gpio",
+   },
+};
+
+static int vuport_probe(struct platform_device *pdev)
+{
+   struct device *dev = >dev;
+   int ret;
+
+   ret = acpi_dev_add_driver_gpios(ACPI_COMPANION(dev), acpi_usb_gpios);
+   if (ret)
+   return ret;
+
+   return mfd_add_devices(>dev, 0, intel_vuport_mfd_cells,
+   ARRAY_SIZE(intel_vuport_mfd_cells), NULL, 0,
+   NULL);
+}
+
+static int vuport_remove(struct platform_device *pdev)
+{
+   mfd_remove_devices(>dev);
+   acpi_dev_remove_driver_gpios(ACPI_COMPANION(>dev));
+
+   return 0;
+}
+
+static struct acpi_device_id vuport_acpi_match[] = {
+   { "INT3496" },
+   { }
+};
+MODULE_DEVICE_TABLE(acpi, vuport_acpi_match);
+
+static struct platform_driver vuport_driver = {
+   .driver = {
+   .name = "intel-vuport",
+   .owner = THIS_MODULE,
+   .acpi_match_table = ACPI_PTR(vuport_acpi_match),
+   },
+   .probe = vuport_probe,
+   .remove = vuport_remove,
+};
+
+module_platform_driver(vuport_driver);
+
+MODULE_AUTHOR("Lu Baolu ");
+MODULE_DESCRIPTION("Intel virtual USB port");
+MODULE_LICENSE("GPL v2");
-- 
2.1.4



[PATCH 6/7] usb: pci-quirks: add Intel USB drcfg mux device

2016-03-02 Thread Lu Baolu
In some Intel platforms, a single usb port is shared between USB host
and device controllers. The shared port is under control of a switch
which is defined in the Intel vendor defined extended capability for
xHCI.

This patch adds the support to detect and create the platform device
for the port mux switch.

Signed-off-by: Lu Baolu 
Reviewed-by: Felipe Balbi 
---
 drivers/usb/host/pci-quirks.c| 47 ++--
 drivers/usb/host/xhci-ext-caps.h |  2 ++
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c
index 35af362..6a737cf 100644
--- a/drivers/usb/host/pci-quirks.c
+++ b/drivers/usb/host/pci-quirks.c
@@ -16,10 +16,11 @@
 #include 
 #include 
 #include 
+#include 
+
 #include "pci-quirks.h"
 #include "xhci-ext-caps.h"
 
-
 #define UHCI_USBLEGSUP 0xc0/* legacy support */
 #define UHCI_USBCMD0   /* command register */
 #define UHCI_USBINTR   4   /* interrupt register */
@@ -78,6 +79,9 @@
 #define USB_INTEL_USB3_PSSEN   0xD8
 #define USB_INTEL_USB3PRM  0xDC
 
+#define DEVICE_ID_INTEL_CHERRYVIEW_XHCI0x22b5
+#define DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8
+
 /*
  * amd_chipset_gen values represent AMD different chipset generations
  */
@@ -956,6 +960,41 @@ void usb_disable_xhci_ports(struct pci_dev *xhci_pdev)
 }
 EXPORT_SYMBOL_GPL(usb_disable_xhci_ports);
 
+static void create_intel_usb_mux_device(struct pci_dev *xhci_pdev,
+   void __iomem *base)
+{
+   struct platform_device *plat_dev;
+   struct property_set pset;
+   int ret;
+
+   struct property_entry pentry[] = {
+   PROPERTY_ENTRY_U64("reg-start",
+   pci_resource_start(xhci_pdev, 0) + 0x80d8),
+   PROPERTY_ENTRY_U64("reg-size", 8),
+   { },
+   };
+
+   if (!xhci_find_next_ext_cap(base, 0, XHCI_EXT_CAPS_INTEL_USB_MUX))
+   return;
+
+   plat_dev = platform_device_alloc("intel-mux-drcfg",
+   PLATFORM_DEVID_AUTO);
+   if (!plat_dev)
+   return;
+
+   plat_dev->dev.parent = _pdev->dev;
+   pset.properties = pentry;
+   platform_device_add_properties(plat_dev, );
+
+   ret = platform_device_add(plat_dev);
+   if (ret) {
+   dev_warn(_pdev->dev,
+   "failed to create mux device with error %d",
+   ret);
+   platform_device_put(plat_dev);
+   }
+}
+
 /**
  * PCI Quirks for xHCI.
  *
@@ -1022,8 +1061,12 @@ static void quirk_usb_handoff_xhci(struct pci_dev *pdev)
writel(val, base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET);
 
 hc_init:
-   if (pdev->vendor == PCI_VENDOR_ID_INTEL)
+   if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
usb_enable_intel_xhci_ports(pdev);
+   if (pdev->device == DEVICE_ID_INTEL_CHERRYVIEW_XHCI ||
+   pdev->device == DEVICE_ID_INTEL_BROXTON_M_XHCI)
+   create_intel_usb_mux_device(pdev, base);
+   }
 
op_reg_base = base + XHCI_HC_LENGTH(readl(base));
 
diff --git a/drivers/usb/host/xhci-ext-caps.h b/drivers/usb/host/xhci-ext-caps.h
index e0244fb..e368ccb 100644
--- a/drivers/usb/host/xhci-ext-caps.h
+++ b/drivers/usb/host/xhci-ext-caps.h
@@ -51,6 +51,8 @@
 #define XHCI_EXT_CAPS_ROUTE5
 /* IDs 6-9 reserved */
 #define XHCI_EXT_CAPS_DEBUG10
+/* Vendor defined 192-255 */
+#define XHCI_EXT_CAPS_INTEL_USB_MUX192
 /* USB Legacy Support Capability - section 7.1.1 */
 #define XHCI_HC_BIOS_OWNED (1 << 16)
 #define XHCI_HC_OS_OWNED   (1 << 24)
-- 
2.1.4



  1   2   3   4   5   6   7   8   9   10   >