date:20160327

[PATCH 1/2] mm/page_ref: use page_ref helper instead of direct modification of _count

2016-03-27 Thread js1304

From: Joonsoo Kim 

page_reference manipulation functions are introduced to track down
reference count change of the page. Use it instead of direct modification
of _count.

Signed-off-by: Joonsoo Kim 
---
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 2 +-
 drivers/net/ethernet/qlogic/qede/qede_main.c   | 2 +-
 mm/filemap.c   | 2 +-
 net/wireless/util.c| 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c 
b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index fa05e34..8acd7c0 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -23,7 +23,7 @@ static void nicvf_get_page(struct nicvf *nic)
if (!nic->rb_pageref || !nic->rb_page)
return;
 
-   atomic_add(nic->rb_pageref, >rb_page->_count);
+   page_ref_add(nic->rb_page, nic->rb_pageref);
nic->rb_pageref = 0;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c 
b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 518af32..394c97ff 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -791,7 +791,7 @@ static inline int qede_realloc_rx_buffer(struct qede_dev 
*edev,
 * network stack to take the ownership of the page
 * which can be recycled multiple times by the driver.
 */
-   atomic_inc(_cons->data->_count);
+   page_ref_inc(curr_cons->data);
qede_reuse_page(edev, rxq, curr_cons);
}
 
diff --git a/mm/filemap.c b/mm/filemap.c
index a8c69c8..0ebd326 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -213,7 +213,7 @@ void __delete_from_page_cache(struct page *page, void 
*shadow)
 * some other bad page check should catch it later.
 */
page_mapcount_reset(page);
-   atomic_sub(mapcount, >_count);
+   page_ref_sub(page, mapcount);
}
}
 
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 9f440a9..e22432a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -651,7 +651,7 @@ __frame_add_frag(struct sk_buff *skb, struct page *page,
struct skb_shared_info *sh = skb_shinfo(skb);
int page_offset;
 
-   atomic_inc(>_count);
+   page_ref_inc(page);
page_offset = ptr - page_address(page);
skb_add_rx_frag(skb, sh->nr_frags, page, page_offset, len, size);
 }
-- 
1.9.1

[PATCH 1/2] mm/page_ref: use page_ref helper instead of direct modification of _count

2016-03-27 Thread js1304

From: Joonsoo Kim 

page_reference manipulation functions are introduced to track down
reference count change of the page. Use it instead of direct modification
of _count.

Signed-off-by: Joonsoo Kim 
---
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 2 +-
 drivers/net/ethernet/qlogic/qede/qede_main.c   | 2 +-
 mm/filemap.c   | 2 +-
 net/wireless/util.c| 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c 
b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index fa05e34..8acd7c0 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -23,7 +23,7 @@ static void nicvf_get_page(struct nicvf *nic)
if (!nic->rb_pageref || !nic->rb_page)
return;
 
-   atomic_add(nic->rb_pageref, >rb_page->_count);
+   page_ref_add(nic->rb_page, nic->rb_pageref);
nic->rb_pageref = 0;
 }
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c 
b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 518af32..394c97ff 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -791,7 +791,7 @@ static inline int qede_realloc_rx_buffer(struct qede_dev 
*edev,
 * network stack to take the ownership of the page
 * which can be recycled multiple times by the driver.
 */
-   atomic_inc(_cons->data->_count);
+   page_ref_inc(curr_cons->data);
qede_reuse_page(edev, rxq, curr_cons);
}
 
diff --git a/mm/filemap.c b/mm/filemap.c
index a8c69c8..0ebd326 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -213,7 +213,7 @@ void __delete_from_page_cache(struct page *page, void 
*shadow)
 * some other bad page check should catch it later.
 */
page_mapcount_reset(page);
-   atomic_sub(mapcount, >_count);
+   page_ref_sub(page, mapcount);
}
}
 
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 9f440a9..e22432a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -651,7 +651,7 @@ __frame_add_frag(struct sk_buff *skb, struct page *page,
struct skb_shared_info *sh = skb_shinfo(skb);
int page_offset;
 
-   atomic_inc(>_count);
+   page_ref_inc(page);
page_offset = ptr - page_address(page);
skb_add_rx_frag(skb, sh->nr_frags, page, page_offset, len, size);
 }
-- 
1.9.1

Re: [PATCH v2] mmc: Provide tracepoints for request processing

2016-03-27 Thread Baolin Wang

On 25 March 2016 at 22:10, Jens Axboe  wrote:
> On 03/25/2016 02:19 AM, Baolin Wang wrote:
>>
>> diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
>> index fe207e5..d372a2d 100644
>> --- a/drivers/mmc/card/block.c
>> +++ b/drivers/mmc/card/block.c
>> @@ -46,6 +46,9 @@
>>
>>   #include "queue.h"
>>
>> +#define CREATE_TRACE_POINTS
>> +#include 
>> +
>>   MODULE_ALIAS("mmc:block");
>>   #ifdef MODULE_PARAM_PREFIX
>>   #undef MODULE_PARAM_PREFIX
>> @@ -1709,6 +1712,7 @@ static u8 mmc_blk_prep_packed_list(struct mmc_queue
>> *mq, struct request *req)
>> if (phys_segments > max_phys_segs)
>> break;
>>
>> +   trace_mmc_block_packed_req(next);
>> list_add_tail(>queuelist, >packed->list);
>> cur = next;
>> reqs++;
>
>
> This looks like the only valid trace point in the block part.

OK.

>
>> @@ -1870,6 +1874,7 @@ static int mmc_blk_end_packed_req(struct
>> mmc_queue_req *mq_rq)
>> }
>> list_del_init(>queuelist);
>> blk_end_request(prq, 0, blk_rq_bytes(prq));
>> +   trace_mmc_block_req_done(prq);
>
>
> We just logged a complete event through blk_end_request() here.

I'll remove this repeated tracepoint.

>>
>> @@ -1985,6 +1990,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq,
>> struct request *rqc)
>> } else {
>> ret = blk_end_request(req, 0,
>> brq->data.bytes_xfered);
>> +   trace_mmc_block_req_done(req);
>> }
>
>
> Ditto

Will remove it too.

>
>> diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
>> index 6f4323c..d0388cf 100644
>> --- a/drivers/mmc/card/queue.c
>> +++ b/drivers/mmc/card/queue.c
>> @@ -16,6 +16,7 @@
>>   #include 
>>   #include 
>>   #include 
>> +#include 
>>
>>   #include 
>>   #include 
>> @@ -64,6 +65,9 @@ static int mmc_queue_thread(void *d)
>> mq->mqrq_cur->req = req;
>> spin_unlock_irq(q->queue_lock);
>>
>> +   if (req)
>> +   trace_mmc_queue_fetch(req);
>
>
> Issue event was just logged here, if req != NULL. So also redundant.

Make sense.

>
> Basically only your core mmc events would potentially have merit, the rest
> are all dupes. So nak.

I'll remove the redundant tracepoints. Thanks for your comments.

>
> --
> Jens Axboe
>



-- 
Baolin.wang
Best Regards

Re: [PATCH v2] mmc: Provide tracepoints for request processing

2016-03-27 Thread Baolin Wang

On 25 March 2016 at 22:10, Jens Axboe  wrote:
> On 03/25/2016 02:19 AM, Baolin Wang wrote:
>>
>> diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
>> index fe207e5..d372a2d 100644
>> --- a/drivers/mmc/card/block.c
>> +++ b/drivers/mmc/card/block.c
>> @@ -46,6 +46,9 @@
>>
>>   #include "queue.h"
>>
>> +#define CREATE_TRACE_POINTS
>> +#include 
>> +
>>   MODULE_ALIAS("mmc:block");
>>   #ifdef MODULE_PARAM_PREFIX
>>   #undef MODULE_PARAM_PREFIX
>> @@ -1709,6 +1712,7 @@ static u8 mmc_blk_prep_packed_list(struct mmc_queue
>> *mq, struct request *req)
>> if (phys_segments > max_phys_segs)
>> break;
>>
>> +   trace_mmc_block_packed_req(next);
>> list_add_tail(>queuelist, >packed->list);
>> cur = next;
>> reqs++;
>
>
> This looks like the only valid trace point in the block part.

OK.

>
>> @@ -1870,6 +1874,7 @@ static int mmc_blk_end_packed_req(struct
>> mmc_queue_req *mq_rq)
>> }
>> list_del_init(>queuelist);
>> blk_end_request(prq, 0, blk_rq_bytes(prq));
>> +   trace_mmc_block_req_done(prq);
>
>
> We just logged a complete event through blk_end_request() here.

I'll remove this repeated tracepoint.

>>
>> @@ -1985,6 +1990,7 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq,
>> struct request *rqc)
>> } else {
>> ret = blk_end_request(req, 0,
>> brq->data.bytes_xfered);
>> +   trace_mmc_block_req_done(req);
>> }
>
>
> Ditto

Will remove it too.

>
>> diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
>> index 6f4323c..d0388cf 100644
>> --- a/drivers/mmc/card/queue.c
>> +++ b/drivers/mmc/card/queue.c
>> @@ -16,6 +16,7 @@
>>   #include 
>>   #include 
>>   #include 
>> +#include 
>>
>>   #include 
>>   #include 
>> @@ -64,6 +65,9 @@ static int mmc_queue_thread(void *d)
>> mq->mqrq_cur->req = req;
>> spin_unlock_irq(q->queue_lock);
>>
>> +   if (req)
>> +   trace_mmc_queue_fetch(req);
>
>
> Issue event was just logged here, if req != NULL. So also redundant.

Make sense.

>
> Basically only your core mmc events would potentially have merit, the rest
> are all dupes. So nak.

I'll remove the redundant tracepoints. Thanks for your comments.

>
> --
> Jens Axboe
>



-- 
Baolin.wang
Best Regards

Re: [PATCH 10/31] Add sparc-specific parity functions

2016-03-27 Thread Zeng Zhaoxiu


在 2016年03月28日 10:43, David Miller 写道:

From: "zhaoxiu.zeng" 
Date: Sun, 27 Mar 2016 14:43:10 +0800


+
+/*
+ * parityN: returns the parity of a N-bit word,
+ * i.e. the number of 1-bits in x modulo 2.
+ */
+
+#define __arch_parity4(w)  (__arch_hweight8((w) & 0xf) & 1)
+#define __arch_parity8(w)  (__arch_hweight8(w) & 1)
+#define __arch_parity16(w) (__arch_hweight16(w) & 1)
+#define __arch_parity32(w) (__arch_hweight32(w) & 1)
+#define __arch_parity64(w) ((unsigned int)__arch_hweight64(w) & 1)

This looks like asm-generic/ material to me.


This is generic for the architectures which have popcount instruction,
but more higher costs than asm-generic/ for others.

Re: [PATCH 10/31] Add sparc-specific parity functions

2016-03-27 Thread Zeng Zhaoxiu


在 2016年03月28日 10:43, David Miller 写道:

From: "zhaoxiu.zeng" 
Date: Sun, 27 Mar 2016 14:43:10 +0800


+
+/*
+ * parityN: returns the parity of a N-bit word,
+ * i.e. the number of 1-bits in x modulo 2.
+ */
+
+#define __arch_parity4(w)  (__arch_hweight8((w) & 0xf) & 1)
+#define __arch_parity8(w)  (__arch_hweight8(w) & 1)
+#define __arch_parity16(w) (__arch_hweight16(w) & 1)
+#define __arch_parity32(w) (__arch_hweight32(w) & 1)
+#define __arch_parity64(w) ((unsigned int)__arch_hweight64(w) & 1)

This looks like asm-generic/ material to me.


This is generic for the architectures which have popcount instruction,
but more higher costs than asm-generic/ for others.

Re: [PATCH v1 0/4] Implement SoC bus driver for Vybrid

2016-03-27 Thread maitysanchayan

Hello,

Ping.

- Sanchayan.

On 16-03-11 14:29:27, Sanchayan Maity wrote:
> Hello,
> 
> This patchset implements SoC bus support for Freescale Vybrid platform,
> implementing the following
> https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-soc
> 
> This a reworked version of an older patchset series posted in June 2015
> which was at v5 then [1]. Since the NVMEM framework was then getting
> introduced, we decided that first a NVMEM driver for OCOTP peripheral
> being in place would be better.
> 
> Compared to the older revisions, this driver now relies on NVMEM
> consumer API using the NVMEM based vf610_ocotp driver which has
> already been in mainline for a while now. Also now a new syscon
> abstraction "syscon_regmap_read_from_offset" is implemented and
> exported from syscon allowing accessing a register from a syscon
> reference like this
> 
> ocotp-cfg1 = < 0x20>;
> 
> avoiding code repetition in the driver.
> 
> One point on which we were not sure here is whether we really should
> introduce a new Kconfig symbol as being introduced here. While we
> could just enable it when SOC_VF610 is selected, this however would
> introduce circular dependencies.
> 
> This patch series is based on top of shawn's for-next branch and
> tested on Colibri Vybrid VF50 and VF61 modules.
> 
> Feedback is most welcome.
> 
> [1] Older v5:
> http://lkml.iu.edu/hypermail/linux/kernel/1506.0/03787.html
> Even earlier versions:
> Version 4 of the patchset can be found here
> https://lkml.org/lkml/2015/5/26/199
> Version 3 of the patchset can be found here
> http://www.spinics.net/lists/arm-kernel/msg420847.html
> Version 2 of the patchset can be found here
> http://www.spinics.net/lists/devicetree/msg80654.html
> Version 1 of the patchset can be found here
> http://www.spinics.net/lists/devicetree/msg80257.html
> The RFC version can be found here
> https://lkml.org/lkml/2015/5/11/13
> 
> Regards,
> Sanchayan.
> 
> Sanchayan Maity (4):
>   mfd: syscon: Introduce syscon_regmap_read_from_offset
>   ARM: dts: vfxxx: Add device tree node for OCOTP
>   ARM: dts: vfxxx: Add OCROM and phandle entries for Vybrid SoC bus driver
>   soc: Add SoC bus driver for Freescale Vybrid Platform
> 
>  arch/arm/boot/dts/vfxxx.dtsi |  28 +++-
>  drivers/mfd/syscon.c |  30 
>  drivers/soc/Kconfig  |   1 +
>  drivers/soc/fsl/Kconfig  |  10 +++
>  drivers/soc/fsl/Makefile |   1 +
>  drivers/soc/fsl/soc-vf610.c  | 160 
> +++
>  include/linux/mfd/syscon.h   |  10 +++
>  7 files changed, 239 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/soc/fsl/Kconfig
>  create mode 100644 drivers/soc/fsl/soc-vf610.c
> 
> -- 
> 2.7.2
>

Re: [PATCH v1 0/4] Implement SoC bus driver for Vybrid

2016-03-27 Thread maitysanchayan

Hello,

Ping.

- Sanchayan.

On 16-03-11 14:29:27, Sanchayan Maity wrote:
> Hello,
> 
> This patchset implements SoC bus support for Freescale Vybrid platform,
> implementing the following
> https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-soc
> 
> This a reworked version of an older patchset series posted in June 2015
> which was at v5 then [1]. Since the NVMEM framework was then getting
> introduced, we decided that first a NVMEM driver for OCOTP peripheral
> being in place would be better.
> 
> Compared to the older revisions, this driver now relies on NVMEM
> consumer API using the NVMEM based vf610_ocotp driver which has
> already been in mainline for a while now. Also now a new syscon
> abstraction "syscon_regmap_read_from_offset" is implemented and
> exported from syscon allowing accessing a register from a syscon
> reference like this
> 
> ocotp-cfg1 = < 0x20>;
> 
> avoiding code repetition in the driver.
> 
> One point on which we were not sure here is whether we really should
> introduce a new Kconfig symbol as being introduced here. While we
> could just enable it when SOC_VF610 is selected, this however would
> introduce circular dependencies.
> 
> This patch series is based on top of shawn's for-next branch and
> tested on Colibri Vybrid VF50 and VF61 modules.
> 
> Feedback is most welcome.
> 
> [1] Older v5:
> http://lkml.iu.edu/hypermail/linux/kernel/1506.0/03787.html
> Even earlier versions:
> Version 4 of the patchset can be found here
> https://lkml.org/lkml/2015/5/26/199
> Version 3 of the patchset can be found here
> http://www.spinics.net/lists/arm-kernel/msg420847.html
> Version 2 of the patchset can be found here
> http://www.spinics.net/lists/devicetree/msg80654.html
> Version 1 of the patchset can be found here
> http://www.spinics.net/lists/devicetree/msg80257.html
> The RFC version can be found here
> https://lkml.org/lkml/2015/5/11/13
> 
> Regards,
> Sanchayan.
> 
> Sanchayan Maity (4):
>   mfd: syscon: Introduce syscon_regmap_read_from_offset
>   ARM: dts: vfxxx: Add device tree node for OCOTP
>   ARM: dts: vfxxx: Add OCROM and phandle entries for Vybrid SoC bus driver
>   soc: Add SoC bus driver for Freescale Vybrid Platform
> 
>  arch/arm/boot/dts/vfxxx.dtsi |  28 +++-
>  drivers/mfd/syscon.c |  30 
>  drivers/soc/Kconfig  |   1 +
>  drivers/soc/fsl/Kconfig  |  10 +++
>  drivers/soc/fsl/Makefile |   1 +
>  drivers/soc/fsl/soc-vf610.c  | 160 
> +++
>  include/linux/mfd/syscon.h   |  10 +++
>  7 files changed, 239 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/soc/fsl/Kconfig
>  create mode 100644 drivers/soc/fsl/soc-vf610.c
> 
> -- 
> 2.7.2
>

Re: [PATCH] regulator: s2mps11: Fix invalid minimal selector for buck9 supplying SD card

2016-03-27 Thread Krzysztof Kozlowski

On 28.03.2016 14:35, Anand Moon wrote:
> Should their be a fix in the u-boot for HK for this issue ?

It depends whether U-Boot S2MPS11 driver has this bug or has not. I did
not observe any issues with U-Boot at this matter.

> mmc card detection logic is pretty old in HK u-boot.

This is not related to MMC card detection logic.

Best regards,
Krzysztof

Re: [PATCH] regulator: s2mps11: Fix invalid minimal selector for buck9 supplying SD card

2016-03-27 Thread Krzysztof Kozlowski

On 28.03.2016 14:35, Anand Moon wrote:
> Should their be a fix in the u-boot for HK for this issue ?

It depends whether U-Boot S2MPS11 driver has this bug or has not. I did
not observe any issues with U-Boot at this matter.

> mmc card detection logic is pretty old in HK u-boot.

This is not related to MMC card detection logic.

Best regards,
Krzysztof

Re: [PATCH] regulator: s2mps11: Fix invalid minimal selector for buck9 supplying SD card

2016-03-27 Thread Anand Moon

Hi Krzysztof

On 28 March 2016 at 09:02, Krzysztof Kozlowski  wrote:
> On 28.03.2016 10:59, Javier Martinez Canillas wrote:
>> Hello Krzysztof,
>>
>> On 03/27/2016 08:54 PM, Krzysztof Kozlowski wrote:
>>> The buck9 regulator of S2MPS11 PMIC lacked minimal selector for linear
>>> mapping. The mapping starts from 0x40 (3 V).
>>>
>>
>> This patch is a real fix since the the SD error goes away and the regulator
>> driver probes correctly now.
>>
>>> This buck9 provides power to other regulators, including LDO13 and LDO19
>>> which supply the MMC2 (SD card).
>>>
>>
>> I think it's worth mentioning that this is the case for the Exynos5422 Odroid
>> XU{3,4} boards. I mean, the regulators can be used for anything but are those
>> boards that use LDO19 as the SD card supply.
>
> Yes, indeed. I forgot to add that details.
>
>>
>> In fact, I wonder if the subject line shouldn't be changed to something like:
>>
>> "regulator: s2mps11: Fix invalid min selector and voltages num for buck9"
>>
>> since the real problem is that the .linear_min_sel and .n_voltages are wrong.
>> The fact that the SD was failing on Odroids is just a consequence of that
>> (although I agree that this information should be part of the commit 
>> message).
>
> Okay, seems more accurate.
>
>>
>>> Bootloader initializes the regulator with value of 0xff (5 V) which is
>>> outside of supported voltage range. When (during boot) constraints to
>>> buck9 were applied, the driver wrote value counting from 0x00, not 0x40.
>>> Effectively driver set lower voltage than required leading to SD card
>>> detection errors on Odroid XU3/XU4:
>>>  mmc1: card never left busy state
>>>  mmc1: error -110 whilst initialising SD card
>>>
>>> Fixes: cb74685ecb39 ("regulator: s2mps11: Add samsung s2mps11 regulator 
>>> driver")
>>> Cc: 
>>> Signed-off-by: Krzysztof Kozlowski 
>>>
>>> ---
>>>
>>> The issue can be reproduced on next-20160324 with
>>> bae4fdc88d7f7dda1 (regulator: core: Ensure we are at least in bounds
>>> for our constraints).
>>> ---
>>>  drivers/regulator/s2mps11.c | 19 ++-
>>>  include/linux/mfd/samsung/s2mps11.h |  9 +
>>>  2 files changed, 27 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
>>> index d24e2c783dc5..caeefc38ac47 100644
>>> --- a/drivers/regulator/s2mps11.c
>>> +++ b/drivers/regulator/s2mps11.c
>>> @@ -324,6 +324,23 @@ static struct regulator_ops s2mps11_buck_ops = {
>>>  .enable_mask= S2MPS11_ENABLE_MASK   \
>>>  }
>>>
>>> +#define regulator_desc_s2mps11_buck9 {  \
>>> +.name   = "BUCK9",  \
>>> +.id = S2MPS11_BUCK9,\
>>> +.ops= _buck_ops,\
>>> +.type   = REGULATOR_VOLTAGE,\
>>> +.owner  = THIS_MODULE,  \
>>> +.min_uV = MIN_3000_MV,  \
>>> +.uV_step= STEP_25_MV,   \
>>> +.linear_min_sel = S2MPS11_BUCK9_MIN_VSEL,   \
>>
>> I don't have a datasheet for this PMIC but I wonder if buck9 is the only
>> buck regulator whose minimal register value is != 0. If that's not the
>> case, it would be good to fix the descriptions for all other regulators.
>
> Some of them are broken, some not. :) Buck 1-4 and 6 also should have
> minimal selector. Also number of selectors and masks seems to be
> invalid. I already have a plan to fix this up but it is not an urgent
> task because the driver works so far.
>
>>
>>> +.n_voltages = S2MPS11_BUCK9_N_VOLTAGES, \
>>> +.ramp_delay = S2MPS11_RAMP_DELAY,   \
>>> +.vsel_reg   = S2MPS11_REG_B9CTRL2,  \
>>> +.vsel_mask  = S2MPS11_BUCK_VSEL_MASK,   \
>>> +.enable_reg = S2MPS11_REG_B9CTRL1,  \
>>> +.enable_mask= S2MPS11_ENABLE_MASK   \
>>> +}
>>> +
>>>  static const struct regulator_desc s2mps11_regulators[] = {
>>>  regulator_desc_s2mps11_ldo(1, STEP_25_MV),
>>>  regulator_desc_s2mps11_ldo(2, STEP_50_MV),
>>> @@ -371,7 +388,7 @@ static const struct regulator_desc s2mps11_regulators[] 
>>> = {
>>>  regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV),
>>>  regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV),
>>>  regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV),
>>> -regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV),
>>
>> Maybe the regulator_desc_s2mps11_buck6_10() define should be renamed?
>> Since it's no longer true that can be used for buck6-10, so the name
>> is misleading now.
>
> Sure.
>
>>
>> Patch looks good to me though and as I said it fixes the issue so:
>>
>> Reviewed-by: Javier Martinez Canillas

Re: [PATCH] regulator: s2mps11: Fix invalid minimal selector for buck9 supplying SD card

2016-03-27 Thread Anand Moon

Hi Krzysztof

On 28 March 2016 at 09:02, Krzysztof Kozlowski  wrote:
> On 28.03.2016 10:59, Javier Martinez Canillas wrote:
>> Hello Krzysztof,
>>
>> On 03/27/2016 08:54 PM, Krzysztof Kozlowski wrote:
>>> The buck9 regulator of S2MPS11 PMIC lacked minimal selector for linear
>>> mapping. The mapping starts from 0x40 (3 V).
>>>
>>
>> This patch is a real fix since the the SD error goes away and the regulator
>> driver probes correctly now.
>>
>>> This buck9 provides power to other regulators, including LDO13 and LDO19
>>> which supply the MMC2 (SD card).
>>>
>>
>> I think it's worth mentioning that this is the case for the Exynos5422 Odroid
>> XU{3,4} boards. I mean, the regulators can be used for anything but are those
>> boards that use LDO19 as the SD card supply.
>
> Yes, indeed. I forgot to add that details.
>
>>
>> In fact, I wonder if the subject line shouldn't be changed to something like:
>>
>> "regulator: s2mps11: Fix invalid min selector and voltages num for buck9"
>>
>> since the real problem is that the .linear_min_sel and .n_voltages are wrong.
>> The fact that the SD was failing on Odroids is just a consequence of that
>> (although I agree that this information should be part of the commit 
>> message).
>
> Okay, seems more accurate.
>
>>
>>> Bootloader initializes the regulator with value of 0xff (5 V) which is
>>> outside of supported voltage range. When (during boot) constraints to
>>> buck9 were applied, the driver wrote value counting from 0x00, not 0x40.
>>> Effectively driver set lower voltage than required leading to SD card
>>> detection errors on Odroid XU3/XU4:
>>>  mmc1: card never left busy state
>>>  mmc1: error -110 whilst initialising SD card
>>>
>>> Fixes: cb74685ecb39 ("regulator: s2mps11: Add samsung s2mps11 regulator 
>>> driver")
>>> Cc: 
>>> Signed-off-by: Krzysztof Kozlowski 
>>>
>>> ---
>>>
>>> The issue can be reproduced on next-20160324 with
>>> bae4fdc88d7f7dda1 (regulator: core: Ensure we are at least in bounds
>>> for our constraints).
>>> ---
>>>  drivers/regulator/s2mps11.c | 19 ++-
>>>  include/linux/mfd/samsung/s2mps11.h |  9 +
>>>  2 files changed, 27 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
>>> index d24e2c783dc5..caeefc38ac47 100644
>>> --- a/drivers/regulator/s2mps11.c
>>> +++ b/drivers/regulator/s2mps11.c
>>> @@ -324,6 +324,23 @@ static struct regulator_ops s2mps11_buck_ops = {
>>>  .enable_mask= S2MPS11_ENABLE_MASK   \
>>>  }
>>>
>>> +#define regulator_desc_s2mps11_buck9 {  \
>>> +.name   = "BUCK9",  \
>>> +.id = S2MPS11_BUCK9,\
>>> +.ops= _buck_ops,\
>>> +.type   = REGULATOR_VOLTAGE,\
>>> +.owner  = THIS_MODULE,  \
>>> +.min_uV = MIN_3000_MV,  \
>>> +.uV_step= STEP_25_MV,   \
>>> +.linear_min_sel = S2MPS11_BUCK9_MIN_VSEL,   \
>>
>> I don't have a datasheet for this PMIC but I wonder if buck9 is the only
>> buck regulator whose minimal register value is != 0. If that's not the
>> case, it would be good to fix the descriptions for all other regulators.
>
> Some of them are broken, some not. :) Buck 1-4 and 6 also should have
> minimal selector. Also number of selectors and masks seems to be
> invalid. I already have a plan to fix this up but it is not an urgent
> task because the driver works so far.
>
>>
>>> +.n_voltages = S2MPS11_BUCK9_N_VOLTAGES, \
>>> +.ramp_delay = S2MPS11_RAMP_DELAY,   \
>>> +.vsel_reg   = S2MPS11_REG_B9CTRL2,  \
>>> +.vsel_mask  = S2MPS11_BUCK_VSEL_MASK,   \
>>> +.enable_reg = S2MPS11_REG_B9CTRL1,  \
>>> +.enable_mask= S2MPS11_ENABLE_MASK   \
>>> +}
>>> +
>>>  static const struct regulator_desc s2mps11_regulators[] = {
>>>  regulator_desc_s2mps11_ldo(1, STEP_25_MV),
>>>  regulator_desc_s2mps11_ldo(2, STEP_50_MV),
>>> @@ -371,7 +388,7 @@ static const struct regulator_desc s2mps11_regulators[] 
>>> = {
>>>  regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV),
>>>  regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV),
>>>  regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV),
>>> -regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV),
>>
>> Maybe the regulator_desc_s2mps11_buck6_10() define should be renamed?
>> Since it's no longer true that can be used for buck6-10, so the name
>> is misleading now.
>
> Sure.
>
>>
>> Patch looks good to me though and as I said it fixes the issue so:
>>
>> Reviewed-by: Javier Martinez Canillas 
>> Tested-by: Javier Martinez Canillas 
>
> Thanks!
>
> I'll update patch as you suggested and

Re: [PATCH v6 5/7][Resend] cpufreq: Move governor symbols to cpufreq.h

2016-03-27 Thread Viresh Kumar

On 22-03-16, 02:51, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki 
> 
> Move definitions of symbols related to transition latency and
> sampling rate to include/linux/cpufreq.h so they can be used by
> (future) goverernors located outside of drivers/cpufreq/.

s/goverernors/governors

> 
> No functional changes.
> 
> Signed-off-by: Rafael J. Wysocki 
> ---
> 
> This patch was new in v4, no changes since then.
> 
> ---
>  drivers/cpufreq/cpufreq_governor.h |   14 --
>  include/linux/cpufreq.h|   14 ++
>  2 files changed, 14 insertions(+), 14 deletions(-)

Acked-by: Viresh Kumar 

-- 
viresh

Re: [PATCH v6 5/7][Resend] cpufreq: Move governor symbols to cpufreq.h

2016-03-27 Thread Viresh Kumar

On 22-03-16, 02:51, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki 
> 
> Move definitions of symbols related to transition latency and
> sampling rate to include/linux/cpufreq.h so they can be used by
> (future) goverernors located outside of drivers/cpufreq/.

s/goverernors/governors

> 
> No functional changes.
> 
> Signed-off-by: Rafael J. Wysocki 
> ---
> 
> This patch was new in v4, no changes since then.
> 
> ---
>  drivers/cpufreq/cpufreq_governor.h |   14 --
>  include/linux/cpufreq.h|   14 ++
>  2 files changed, 14 insertions(+), 14 deletions(-)

Acked-by: Viresh Kumar 

-- 
viresh

Re: Warnings for invalid VDD (sdhci-s3c)

2016-03-27 Thread Krzysztof Kozlowski

On 27.03.2016 16:41, Anand Moon wrote:
> 
> On My Odroid U3 with debug flags enable I am observing bellow deadlock.

There is a sleep in atomic context and possible deadlock, but:
1. Are you sure it does not happen without the patch?
2. Are you sure it is not the same as already known issue on sdhci-s3c?
For example reported here:
http://www.spinics.net/lists/linux-samsung-soc/msg42398.html

What is reproducibility rate?

Best regards,
Krzysztof

> -
> [  202.519524] BUG: sleeping function called from invalid context at
> kernel/locking/mutex.c:617
> [  202.522364] in_atomic(): 1, irqs_disabled(): 128, pid: 100, name: mmcqd/0
> [  202.529129] 1 lock held by mmcqd/0/100:
> [  202.529150]  #0:  (&(>lock)->rlock#2){-.-...}, at:
> [] sdhci_do_set_ios+0x1c/0x484
> [  202.529271] irq event stamp: 703530
> [  202.529291] hardirqs last  enabled at (703529): []
> _raw_spin_unlock_irqrestore+0x6c/0x74
> [  202.529343] hardirqs last disabled at (703530): []
> _raw_spin_lock_irqsave+0x1c/0x84
> [  202.529384] softirqs last  enabled at (703456): []
> __do_softirq+0x244/0x2c0
> [  202.529438] softirqs last disabled at (703445): []
> irq_exit+0xec/0x128
> [  202.529472] Preemption disabled at:[<  (null)>]   (null)
> [  202.534415]
> [  202.534449] CPU: 0 PID: 100 Comm: mmcqd/0 Not tainted 4.6.0-rc1-u3s #38
> [  202.534473] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
> [  202.534544] [] (unwind_backtrace) from []
> (show_stack+0x10/0x14)
> [  202.534594] [] (show_stack) from []
> (dump_stack+0x98/0xc4)
> [  202.534639] [] (dump_stack) from []
> (mutex_lock_nested+0x2c/0x4dc)
> [  202.534685] [] (mutex_lock_nested) from []
> (clk_prepare_lock+0x50/0xf8)
> [  202.534726] [] (clk_prepare_lock) from []
> (clk_round_rate+0x1c/0x58)
> [  202.534773] [] (clk_round_rate) from []
> (sdhci_s3c_set_clock+0x18c/0x1b0)
> [  202.534819] [] (sdhci_s3c_set_clock) from []
> (sdhci_cmu_set_clock+0x24/0x17c)
> [  202.534860] [] (sdhci_cmu_set_clock) from []
> (sdhci_do_set_ios+0x78/0x484)
> [  202.534904] [] (sdhci_do_set_ios) from []
> (sdhci_runtime_resume_host+0x60/0x114)
> [  202.534957] [] (sdhci_runtime_resume_host) from
> [] (__rpm_callback+0x2c/0x60)
> [  202.535000] [] (__rpm_callback) from []
> (rpm_callback+0x54/0x80)
> [  202.535041] [] (rpm_callback) from []
> (rpm_resume+0x364/0x558)
> [  202.535081] [] (rpm_resume) from []
> (__pm_runtime_resume+0x60/0x8c)
> [  202.535125] [] (__pm_runtime_resume) from []
> (__mmc_claim_host+0x1b4/0x1f8)
> [  202.535176] [] (__mmc_claim_host) from []
> (mmc_sd_runtime_resume+0x20/0xac)
> [  202.535220] [] (mmc_sd_runtime_resume) from []
> (__rpm_callback+0x2c/0x60)
> [  202.535259] [] (__rpm_callback) from []
> (rpm_callback+0x54/0x80)
> [  202.535299] [] (rpm_callback) from []
> (rpm_resume+0x364/0x558)
> [  202.535340] [] (rpm_resume) from []
> (__pm_runtime_resume+0x60/0x8c)
> [  202.535379] [] (__pm_runtime_resume) from []
> (mmc_get_card+0x14/0x24)
> [  202.535420] [] (mmc_get_card) from []
> (mmc_blk_issue_rq+0x258/0x4f0)
> [  202.535461] [] (mmc_blk_issue_rq) from []
> (mmc_queue_thread+0xd0/0x1d8)
> [  202.535513] [] (mmc_queue_thread) from []
> (kthread+0xf4/0x10c)
> [  202.535561] [] (kthread) from []
> (ret_from_fork+0x14/0x24)
> [  202.535624]
> [  202.535893] ==
> [  202.542059] [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ]
> [  202.548742] 4.6.0-rc1-u3s #38 Not tainted
> [  202.552732] --
> [  202.558902] mmcqd/0/100 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
> [  202.565317]  (prepare_lock){+.+...}, at: []
> clk_prepare_lock+0x50/0xf8
> [  202.572695]
> [  202.572695] and this task is already holding:
> [  202.578510]  (&(>lock)->rlock#2){-.-...}, at: []
> sdhci_do_set_ios+0x1c/0x484
> [  202.586930] which would create a new lock dependency:
> [  202.591964]  (&(>lock)->rlock#2){-.-...} -> (prepare_lock){+.+...}
> [  202.598650]
> [  202.598650] but this new dependency connects a HARDIRQ-irq-safe lock:
> [  202.606546]  (&(>lock)->rlock#2){-.-...}
> [  202.606546] ... which became HARDIRQ-irq-safe at:
> [  202.614359]   [] _raw_spin_lock+0x3c/0x74
> [  202.619219]   [] sdhci_irq+0x1c/0x814
> [  202.623732]   [] handle_irq_event_percpu+0x9c/0x150
> [  202.629461]   [] handle_irq_event+0x38/0x5c
> [  202.634495]   [] handle_fasteoi_irq+0xd0/0x1a8
> [  202.639790]   [] generic_handle_irq+0x24/0x34
> [  202.644998]   [] __handle_domain_irq+0x7c/0xec
> [  202.650293]   [] gic_handle_irq+0x54/0x94
> [  202.655154]   [] __irq_svc+0x58/0x98
> [  202.659581]   [] arch_cpu_idle+0x24/0x3c
> [  202.664354]   [] arch_cpu_idle+0x24/0x3c
> [  202.669128]   [] cpu_startup_entry+0x1c8/0x24c
> [  202.674423]   [] start_kernel+0x39c/0x3a8
> [  202.679284]   [<4000807c>] 0x4000807c
> [  202.682933]
> [  202.682933] to a HARDIRQ-irq-unsafe lock:
> [  202.688399]

Re: Warnings for invalid VDD (sdhci-s3c)

2016-03-27 Thread Krzysztof Kozlowski

On 27.03.2016 16:41, Anand Moon wrote:
> 
> On My Odroid U3 with debug flags enable I am observing bellow deadlock.

There is a sleep in atomic context and possible deadlock, but:
1. Are you sure it does not happen without the patch?
2. Are you sure it is not the same as already known issue on sdhci-s3c?
For example reported here:
http://www.spinics.net/lists/linux-samsung-soc/msg42398.html

What is reproducibility rate?

Best regards,
Krzysztof

> -
> [  202.519524] BUG: sleeping function called from invalid context at
> kernel/locking/mutex.c:617
> [  202.522364] in_atomic(): 1, irqs_disabled(): 128, pid: 100, name: mmcqd/0
> [  202.529129] 1 lock held by mmcqd/0/100:
> [  202.529150]  #0:  (&(>lock)->rlock#2){-.-...}, at:
> [] sdhci_do_set_ios+0x1c/0x484
> [  202.529271] irq event stamp: 703530
> [  202.529291] hardirqs last  enabled at (703529): []
> _raw_spin_unlock_irqrestore+0x6c/0x74
> [  202.529343] hardirqs last disabled at (703530): []
> _raw_spin_lock_irqsave+0x1c/0x84
> [  202.529384] softirqs last  enabled at (703456): []
> __do_softirq+0x244/0x2c0
> [  202.529438] softirqs last disabled at (703445): []
> irq_exit+0xec/0x128
> [  202.529472] Preemption disabled at:[<  (null)>]   (null)
> [  202.534415]
> [  202.534449] CPU: 0 PID: 100 Comm: mmcqd/0 Not tainted 4.6.0-rc1-u3s #38
> [  202.534473] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree)
> [  202.534544] [] (unwind_backtrace) from []
> (show_stack+0x10/0x14)
> [  202.534594] [] (show_stack) from []
> (dump_stack+0x98/0xc4)
> [  202.534639] [] (dump_stack) from []
> (mutex_lock_nested+0x2c/0x4dc)
> [  202.534685] [] (mutex_lock_nested) from []
> (clk_prepare_lock+0x50/0xf8)
> [  202.534726] [] (clk_prepare_lock) from []
> (clk_round_rate+0x1c/0x58)
> [  202.534773] [] (clk_round_rate) from []
> (sdhci_s3c_set_clock+0x18c/0x1b0)
> [  202.534819] [] (sdhci_s3c_set_clock) from []
> (sdhci_cmu_set_clock+0x24/0x17c)
> [  202.534860] [] (sdhci_cmu_set_clock) from []
> (sdhci_do_set_ios+0x78/0x484)
> [  202.534904] [] (sdhci_do_set_ios) from []
> (sdhci_runtime_resume_host+0x60/0x114)
> [  202.534957] [] (sdhci_runtime_resume_host) from
> [] (__rpm_callback+0x2c/0x60)
> [  202.535000] [] (__rpm_callback) from []
> (rpm_callback+0x54/0x80)
> [  202.535041] [] (rpm_callback) from []
> (rpm_resume+0x364/0x558)
> [  202.535081] [] (rpm_resume) from []
> (__pm_runtime_resume+0x60/0x8c)
> [  202.535125] [] (__pm_runtime_resume) from []
> (__mmc_claim_host+0x1b4/0x1f8)
> [  202.535176] [] (__mmc_claim_host) from []
> (mmc_sd_runtime_resume+0x20/0xac)
> [  202.535220] [] (mmc_sd_runtime_resume) from []
> (__rpm_callback+0x2c/0x60)
> [  202.535259] [] (__rpm_callback) from []
> (rpm_callback+0x54/0x80)
> [  202.535299] [] (rpm_callback) from []
> (rpm_resume+0x364/0x558)
> [  202.535340] [] (rpm_resume) from []
> (__pm_runtime_resume+0x60/0x8c)
> [  202.535379] [] (__pm_runtime_resume) from []
> (mmc_get_card+0x14/0x24)
> [  202.535420] [] (mmc_get_card) from []
> (mmc_blk_issue_rq+0x258/0x4f0)
> [  202.535461] [] (mmc_blk_issue_rq) from []
> (mmc_queue_thread+0xd0/0x1d8)
> [  202.535513] [] (mmc_queue_thread) from []
> (kthread+0xf4/0x10c)
> [  202.535561] [] (kthread) from []
> (ret_from_fork+0x14/0x24)
> [  202.535624]
> [  202.535893] ==
> [  202.542059] [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ]
> [  202.548742] 4.6.0-rc1-u3s #38 Not tainted
> [  202.552732] --
> [  202.558902] mmcqd/0/100 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
> [  202.565317]  (prepare_lock){+.+...}, at: []
> clk_prepare_lock+0x50/0xf8
> [  202.572695]
> [  202.572695] and this task is already holding:
> [  202.578510]  (&(>lock)->rlock#2){-.-...}, at: []
> sdhci_do_set_ios+0x1c/0x484
> [  202.586930] which would create a new lock dependency:
> [  202.591964]  (&(>lock)->rlock#2){-.-...} -> (prepare_lock){+.+...}
> [  202.598650]
> [  202.598650] but this new dependency connects a HARDIRQ-irq-safe lock:
> [  202.606546]  (&(>lock)->rlock#2){-.-...}
> [  202.606546] ... which became HARDIRQ-irq-safe at:
> [  202.614359]   [] _raw_spin_lock+0x3c/0x74
> [  202.619219]   [] sdhci_irq+0x1c/0x814
> [  202.623732]   [] handle_irq_event_percpu+0x9c/0x150
> [  202.629461]   [] handle_irq_event+0x38/0x5c
> [  202.634495]   [] handle_fasteoi_irq+0xd0/0x1a8
> [  202.639790]   [] generic_handle_irq+0x24/0x34
> [  202.644998]   [] __handle_domain_irq+0x7c/0xec
> [  202.650293]   [] gic_handle_irq+0x54/0x94
> [  202.655154]   [] __irq_svc+0x58/0x98
> [  202.659581]   [] arch_cpu_idle+0x24/0x3c
> [  202.664354]   [] arch_cpu_idle+0x24/0x3c
> [  202.669128]   [] cpu_startup_entry+0x1c8/0x24c
> [  202.674423]   [] start_kernel+0x39c/0x3a8
> [  202.679284]   [<4000807c>] 0x4000807c
> [  202.682933]
> [  202.682933] to a HARDIRQ-irq-unsafe lock:
> [  202.688399]

Re: [PATCH 0/2] scsi: remove orphaned modular code from non-modular drivers

2016-03-27 Thread James Bottomley

On Sun, 2016-03-27 at 13:00 -0400, Paul Gortmaker wrote:
> In the ongoing audit/cleanup of non-modular code needlessly using 
> modular infrastructure, the SCSI subsystem fortunately only contains 
> two instances that I detected.  Both are for legacy drivers that 
> predate the git epoch, so cleary there is no demand for converting 
> these drivers to be tristate.
> 
> For anyone new to the underlying goal of this cleanup, we are trying 
> to not use module support for code that isn't buildable as a module
> since:
> 
>  (1) it is easy to accidentally write unused module_exit and remove
> code
>  (2) it can be misleading when reading the source, thinking it can be
>  modular when the Makefile and/or Kconfig prohibit it
>  (3) it requires the include of the module.h header file which in
> turn
>  includes nearly everything else, thus adding to CPP overhead.
>  (4) it gets copied/replicated into other code and spreads like
> weeds.

I don't really buy any of these as being credible issues for the
ancient drivers, so there doesn't appear to be an real benefit to this
conversion; however, besides the danger of touching old stuff, there
are some down sides:

> -MODULE_DESCRIPTION("Sun3x ESP SCSI driver");
> -MODULE_AUTHOR("Thomas Bogendoerfer (tsbog...@alpha.franken.de)");
> -MODULE_LICENSE("GPL");
> -MODULE_VERSION(DRV_VERSION);

These tags are usefully greppable for drivers, regardless of whether
they generate actual kernel side information.

> We explicitly disallow a driver unbind, since that doesn't have a
> sensible use case anyway, and it allows us to drop the ".remove"
> code for non-modular drivers.

That's bogus.  I use bind and unbind a lot for testing built in drivers
but the usual user use case is for resetting the devices.

> Build tested for mips (jazz) and m68k (sun3x) on 4.6-rc1 to ensure no
> silly typos crept in.

For trivial changes, build testing is not really sufficient: a
significant fraction of them break something that isn't spotted by the
reviewers.  For the older drivers, this isn't discovered for months to
years and then someone has to go digging back through all the so called
trivial changes to find which one it was.

James

[PATCH v5 5/6] hwmon: (fam15h_power) Add documentation for TDP and accumulated power algorithm

2016-03-27 Thread Huang Rui

This patch adds the description to explain the TDP reporting mechanism
and accumulated power algorithm.

Signed-off-by: Huang Rui 
Cc: Borislav Petkov 
---
 Documentation/hwmon/fam15h_power | 57 +++-
 drivers/hwmon/fam15h_power.c |  2 +-
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/Documentation/hwmon/fam15h_power b/Documentation/hwmon/fam15h_power
index e2b1b69..2c4fbee 100644
--- a/Documentation/hwmon/fam15h_power
+++ b/Documentation/hwmon/fam15h_power
@@ -10,14 +10,22 @@ Supported chips:
   Datasheets:
   BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors
   BIOS and Kernel Developer's Guide (BKDG) For AMD Family 16h Processors
+  AMD64 Architecture Programmer's Manual Volume 2: System Programming
 
 Author: Andreas Herrmann 
 
 Description
 ---
 
+1) Processor TDP (Thermal design power)
+
+Given a fixed frequency and voltage, the power consumption of a
+processor varies based on the workload being executed. Derated power
+is the power consumed when running a specific application. Thermal
+design power (TDP) is an example of derated power.
+
 This driver permits reading of registers providing power information
-of AMD Family 15h and 16h processors.
+of AMD Family 15h and 16h processors via TDP algorithm.
 
 For AMD Family 15h and 16h processors the following power values can
 be calculated using different processor northbridge function
@@ -37,3 +45,50 @@ This driver provides ProcessorPwrWatts and CurrPwrWatts:
 On multi-node processors the calculated value is for the entire
 package and not for a single node. Thus the driver creates sysfs
 attributes only for internal node0 of a multi-node processor.
+
+2) Accumulated Power Mechanism
+
+This driver also introduces an algorithm that should be used to
+calculate the average power consumed by a processor during a
+measurement interval Tm. The feature of accumulated power mechanism is
+indicated by CPUID Fn8000_0007_EDX[12].
+
+* Tsample: compute unit power accumulator sample period
+* Tref: the PTSC counter period
+* PTSC: performance timestamp counter
+* N: the ratio of compute unit power accumulator sample period to the
+  PTSC period
+* Jmax: max compute unit accumulated power which is indicated by
+  MaxCpuSwPwrAcc MSR C001007b
+* Jx/Jy: compute unit accumulated power which is indicated by
+  CpuSwPwrAcc MSR C001007a
+* Tx/Ty: the value of performance timestamp counter which is indicated
+  by CU_PTSC MSR C0010280
+* PwrCPUave: CPU average power
+
+i. Determine the ratio of Tsample to Tref by executing CPUID Fn8000_0007.
+   N = value of CPUID Fn8000_0007_ECX[CpuPwrSampleTimeRatio[15:0]].
+
+ii. Read the full range of the cumulative energy value from the new
+MSR MaxCpuSwPwrAcc.
+   Jmax = value returned.
+iii. At time x, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+   Jx = value read from CpuSwPwrAcc and Tx = value read from
+PTSC.
+
+iv. At time y, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+   Jy = value read from CpuSwPwrAcc and Ty = value read from
+PTSC.
+
+v. Calculate the average power consumption for a compute unit over
+time period (y-x). Unit of result is uWatt.
+   if (Jy < Jx) // Rollover has occurred
+   Jdelta = (Jy + Jmax) - Jx
+   else
+   Jdelta = Jy - Jx
+   PwrCPUave = N * Jdelta * 1000 / (Ty - Tx)
+
+This driver provides PwrCPUave and interval(default is 10 millisecond
+and maximum is 1 second):
+* power1_average (PwrCPUave)
+* power1_average_interval (Interval)
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index 003564b..c1cad26 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -1,7 +1,7 @@
 /*
  * fam15h_power.c - AMD Family 15h processor power monitoring
  *
- * Copyright (c) 2011 Advanced Micro Devices, Inc.
+ * Copyright (c) 2011-2016 Advanced Micro Devices, Inc.
  * Author: Andreas Herrmann 
  *
  *
-- 
1.9.1

Re: [PATCH 0/2] scsi: remove orphaned modular code from non-modular drivers

2016-03-27 Thread James Bottomley

On Sun, 2016-03-27 at 13:00 -0400, Paul Gortmaker wrote:
> In the ongoing audit/cleanup of non-modular code needlessly using 
> modular infrastructure, the SCSI subsystem fortunately only contains 
> two instances that I detected.  Both are for legacy drivers that 
> predate the git epoch, so cleary there is no demand for converting 
> these drivers to be tristate.
> 
> For anyone new to the underlying goal of this cleanup, we are trying 
> to not use module support for code that isn't buildable as a module
> since:
> 
>  (1) it is easy to accidentally write unused module_exit and remove
> code
>  (2) it can be misleading when reading the source, thinking it can be
>  modular when the Makefile and/or Kconfig prohibit it
>  (3) it requires the include of the module.h header file which in
> turn
>  includes nearly everything else, thus adding to CPP overhead.
>  (4) it gets copied/replicated into other code and spreads like
> weeds.

I don't really buy any of these as being credible issues for the
ancient drivers, so there doesn't appear to be an real benefit to this
conversion; however, besides the danger of touching old stuff, there
are some down sides:

> -MODULE_DESCRIPTION("Sun3x ESP SCSI driver");
> -MODULE_AUTHOR("Thomas Bogendoerfer (tsbog...@alpha.franken.de)");
> -MODULE_LICENSE("GPL");
> -MODULE_VERSION(DRV_VERSION);

These tags are usefully greppable for drivers, regardless of whether
they generate actual kernel side information.

> We explicitly disallow a driver unbind, since that doesn't have a
> sensible use case anyway, and it allows us to drop the ".remove"
> code for non-modular drivers.

That's bogus.  I use bind and unbind a lot for testing built in drivers
but the usual user use case is for resetting the devices.

> Build tested for mips (jazz) and m68k (sun3x) on 4.6-rc1 to ensure no
> silly typos crept in.

For trivial changes, build testing is not really sufficient: a
significant fraction of them break something that isn't spotted by the
reviewers.  For the older drivers, this isn't discovered for months to
years and then someone has to go digging back through all the so called
trivial changes to find which one it was.

James

[PATCH v5 5/6] hwmon: (fam15h_power) Add documentation for TDP and accumulated power algorithm

2016-03-27 Thread Huang Rui

This patch adds the description to explain the TDP reporting mechanism
and accumulated power algorithm.

Signed-off-by: Huang Rui 
Cc: Borislav Petkov 
---
 Documentation/hwmon/fam15h_power | 57 +++-
 drivers/hwmon/fam15h_power.c |  2 +-
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/Documentation/hwmon/fam15h_power b/Documentation/hwmon/fam15h_power
index e2b1b69..2c4fbee 100644
--- a/Documentation/hwmon/fam15h_power
+++ b/Documentation/hwmon/fam15h_power
@@ -10,14 +10,22 @@ Supported chips:
   Datasheets:
   BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors
   BIOS and Kernel Developer's Guide (BKDG) For AMD Family 16h Processors
+  AMD64 Architecture Programmer's Manual Volume 2: System Programming
 
 Author: Andreas Herrmann 
 
 Description
 ---
 
+1) Processor TDP (Thermal design power)
+
+Given a fixed frequency and voltage, the power consumption of a
+processor varies based on the workload being executed. Derated power
+is the power consumed when running a specific application. Thermal
+design power (TDP) is an example of derated power.
+
 This driver permits reading of registers providing power information
-of AMD Family 15h and 16h processors.
+of AMD Family 15h and 16h processors via TDP algorithm.
 
 For AMD Family 15h and 16h processors the following power values can
 be calculated using different processor northbridge function
@@ -37,3 +45,50 @@ This driver provides ProcessorPwrWatts and CurrPwrWatts:
 On multi-node processors the calculated value is for the entire
 package and not for a single node. Thus the driver creates sysfs
 attributes only for internal node0 of a multi-node processor.
+
+2) Accumulated Power Mechanism
+
+This driver also introduces an algorithm that should be used to
+calculate the average power consumed by a processor during a
+measurement interval Tm. The feature of accumulated power mechanism is
+indicated by CPUID Fn8000_0007_EDX[12].
+
+* Tsample: compute unit power accumulator sample period
+* Tref: the PTSC counter period
+* PTSC: performance timestamp counter
+* N: the ratio of compute unit power accumulator sample period to the
+  PTSC period
+* Jmax: max compute unit accumulated power which is indicated by
+  MaxCpuSwPwrAcc MSR C001007b
+* Jx/Jy: compute unit accumulated power which is indicated by
+  CpuSwPwrAcc MSR C001007a
+* Tx/Ty: the value of performance timestamp counter which is indicated
+  by CU_PTSC MSR C0010280
+* PwrCPUave: CPU average power
+
+i. Determine the ratio of Tsample to Tref by executing CPUID Fn8000_0007.
+   N = value of CPUID Fn8000_0007_ECX[CpuPwrSampleTimeRatio[15:0]].
+
+ii. Read the full range of the cumulative energy value from the new
+MSR MaxCpuSwPwrAcc.
+   Jmax = value returned.
+iii. At time x, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+   Jx = value read from CpuSwPwrAcc and Tx = value read from
+PTSC.
+
+iv. At time y, SW reads CpuSwPwrAcc MSR and samples the PTSC.
+   Jy = value read from CpuSwPwrAcc and Ty = value read from
+PTSC.
+
+v. Calculate the average power consumption for a compute unit over
+time period (y-x). Unit of result is uWatt.
+   if (Jy < Jx) // Rollover has occurred
+   Jdelta = (Jy + Jmax) - Jx
+   else
+   Jdelta = Jy - Jx
+   PwrCPUave = N * Jdelta * 1000 / (Ty - Tx)
+
+This driver provides PwrCPUave and interval(default is 10 millisecond
+and maximum is 1 second):
+* power1_average (PwrCPUave)
+* power1_average_interval (Interval)
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index 003564b..c1cad26 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -1,7 +1,7 @@
 /*
  * fam15h_power.c - AMD Family 15h processor power monitoring
  *
- * Copyright (c) 2011 Advanced Micro Devices, Inc.
+ * Copyright (c) 2011-2016 Advanced Micro Devices, Inc.
  * Author: Andreas Herrmann 
  *
  *
-- 
1.9.1

[PATCH v5 6/6] hwmon: (fam15h_power) Add platform check function

2016-03-27 Thread Huang Rui

This patch adds a platform check function to make code more readable.

Signed-off-by: Huang Rui 
---
 drivers/hwmon/fam15h_power.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index c1cad26..622c646 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -78,6 +78,11 @@ struct fam15h_power_data {
unsigned long power_period;
 };
 
+static bool is_carrizo_or_later(void)
+{
+   return boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60;
+}
+
 static ssize_t show_power(struct device *dev,
  struct device_attribute *attr, char *buf)
 {
@@ -94,7 +99,7 @@ static ssize_t show_power(struct device *dev,
 * On Carrizo and later platforms, TdpRunAvgAccCap bit field
 * is extended to 4:31 from 4:25.
 */
-   if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60) {
+   if (is_carrizo_or_later()) {
running_avg_capture = val >> 4;
running_avg_capture = sign_extend32(running_avg_capture, 27);
} else {
@@ -111,7 +116,7 @@ static ssize_t show_power(struct device *dev,
 * On Carrizo and later platforms, ApmTdpLimit bit field
 * is extended to 16:31 from 16:28.
 */
-   if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+   if (is_carrizo_or_later())
tdp_limit = val >> 16;
else
tdp_limit = (val >> 16) & 0x1fff;
-- 
1.9.1

[PATCH v5 4/6] hwmon: (fam15h_power) Introduce a cpu accumulated power reporting algorithm

2016-03-27 Thread Huang Rui

This patch introduces an algorithm that computes the average power by
reading a delta value of “core power accumulator” register during
measurement interval, and then dividing delta value by the length of
the time interval.

User is able to use power1_average entry to measure the processor power
consumption and power1_average_interval entry to set the interval.

A simple example:

ray@hr-ub:~/tip$ sensors
fam15h_power-pci-00c4
Adapter: PCI adapter
power1:   19.58 mW (avg =   2.55 mW, interval =   0.01 s)
   (crit =  15.00 W)

...

The result is current average processor power consumption in 10
millisecond. The unit of the result is uWatt.

Suggested-by: Guenter Roeck 
Signed-off-by: Huang Rui 
Cc: Borislav Petkov 
---
 drivers/hwmon/fam15h_power.c | 119 +++
 1 file changed, 119 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index de6f52b..003564b 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -27,6 +27,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 
@@ -48,6 +50,9 @@ MODULE_LICENSE("GPL");
 #define FAM15H_NUM_GROUPS  2
 #define MAX_CUS8
 
+/* set maximum interval as 1 second */
+#define MAX_INTERVAL   1000
+
 #define MSR_F15H_CU_PWR_ACCUMULATOR0xc001007a
 #define MSR_F15H_CU_MAX_PWR_ACCUMULATOR0xc001007b
 #define MSR_F15H_PTSC  0xc0010280
@@ -68,6 +73,9 @@ struct fam15h_power_data {
u64 cu_acc_power[MAX_CUS];
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc[MAX_CUS];
+   /* online/offline status of current compute unit */
+   int cu_on[MAX_CUS];
+   unsigned long power_period;
 };
 
 static ssize_t show_power(struct device *dev,
@@ -149,6 +157,8 @@ static void do_read_registers_on_cu(void *_data)
 
rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, >cu_acc_power[cu]);
rdmsrl_safe(MSR_F15H_PTSC, >cpu_sw_pwr_ptsc[cu]);
+
+   data->cu_on[cu] = 1;
 }
 
 /*
@@ -165,6 +175,8 @@ static int read_registers(struct fam15h_power_data *data)
if (!ret)
return -ENOMEM;
 
+   memset(data->cu_on, 0, sizeof(int) * MAX_CUS);
+
get_online_cpus();
this_cpu = get_cpu();
 
@@ -192,18 +204,117 @@ static int read_registers(struct fam15h_power_data *data)
return 0;
 }
 
+static ssize_t acc_show_power(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+   u64 prev_cu_acc_power[MAX_CUS], prev_ptsc[MAX_CUS],
+   jdelta[MAX_CUS];
+   u64 tdelta, avg_acc;
+   int cu, cu_num, ret;
+   signed long leftover;
+
+   /*
+* With the new x86 topology modelling, x86_max_cores is the
+* compute unit number.
+*/
+   cu_num = boot_cpu_data.x86_max_cores;
+
+   ret = read_registers(data);
+   if (ret)
+   return 0;
+
+   for (cu = 0; cu < cu_num; cu++) {
+   prev_cu_acc_power[cu] = data->cu_acc_power[cu];
+   prev_ptsc[cu] = data->cpu_sw_pwr_ptsc[cu];
+   }
+
+   leftover = 
schedule_timeout_interruptible(msecs_to_jiffies(data->power_period));
+   if (leftover)
+   return 0;
+
+   ret = read_registers(data);
+   if (ret)
+   return 0;
+
+   for (cu = 0, avg_acc = 0; cu < cu_num; cu++) {
+   /* check if current compute unit is online */
+   if (data->cu_on[cu] == 0)
+   continue;
+
+   if (data->cu_acc_power[cu] < prev_cu_acc_power[cu]) {
+   jdelta[cu] = data->max_cu_acc_power + 
data->cu_acc_power[cu];
+   jdelta[cu] -= prev_cu_acc_power[cu];
+   } else {
+   jdelta[cu] = data->cu_acc_power[cu] - 
prev_cu_acc_power[cu];
+   }
+   tdelta = data->cpu_sw_pwr_ptsc[cu] - prev_ptsc[cu];
+   jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
+   do_div(jdelta[cu], tdelta);
+
+   /* the unit is microWatt */
+   avg_acc += jdelta[cu];
+   }
+
+   return sprintf(buf, "%llu\n", (unsigned long long)avg_acc);
+}
+static DEVICE_ATTR(power1_average, S_IRUGO, acc_show_power, NULL);
+
+static ssize_t acc_show_power_period(struct device *dev,
+struct device_attribute *attr,
+char *buf)
+{
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+   return sprintf(buf, "%lu\n", data->power_period);
+}
+
+static ssize_t acc_set_power_period(struct device *dev,
+   struct device_attribute *attr,
+   const char *buf, size_t

[PATCH v5 6/6] hwmon: (fam15h_power) Add platform check function

2016-03-27 Thread Huang Rui

This patch adds a platform check function to make code more readable.

Signed-off-by: Huang Rui 
---
 drivers/hwmon/fam15h_power.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index c1cad26..622c646 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -78,6 +78,11 @@ struct fam15h_power_data {
unsigned long power_period;
 };
 
+static bool is_carrizo_or_later(void)
+{
+   return boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60;
+}
+
 static ssize_t show_power(struct device *dev,
  struct device_attribute *attr, char *buf)
 {
@@ -94,7 +99,7 @@ static ssize_t show_power(struct device *dev,
 * On Carrizo and later platforms, TdpRunAvgAccCap bit field
 * is extended to 4:31 from 4:25.
 */
-   if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60) {
+   if (is_carrizo_or_later()) {
running_avg_capture = val >> 4;
running_avg_capture = sign_extend32(running_avg_capture, 27);
} else {
@@ -111,7 +116,7 @@ static ssize_t show_power(struct device *dev,
 * On Carrizo and later platforms, ApmTdpLimit bit field
 * is extended to 16:31 from 16:28.
 */
-   if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60)
+   if (is_carrizo_or_later())
tdp_limit = val >> 16;
else
tdp_limit = (val >> 16) & 0x1fff;
-- 
1.9.1

[PATCH v5 4/6] hwmon: (fam15h_power) Introduce a cpu accumulated power reporting algorithm

2016-03-27 Thread Huang Rui

This patch introduces an algorithm that computes the average power by
reading a delta value of “core power accumulator” register during
measurement interval, and then dividing delta value by the length of
the time interval.

User is able to use power1_average entry to measure the processor power
consumption and power1_average_interval entry to set the interval.

A simple example:

ray@hr-ub:~/tip$ sensors
fam15h_power-pci-00c4
Adapter: PCI adapter
power1:   19.58 mW (avg =   2.55 mW, interval =   0.01 s)
   (crit =  15.00 W)

...

The result is current average processor power consumption in 10
millisecond. The unit of the result is uWatt.

Suggested-by: Guenter Roeck 
Signed-off-by: Huang Rui 
Cc: Borislav Petkov 
---
 drivers/hwmon/fam15h_power.c | 119 +++
 1 file changed, 119 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index de6f52b..003564b 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -27,6 +27,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 
@@ -48,6 +50,9 @@ MODULE_LICENSE("GPL");
 #define FAM15H_NUM_GROUPS  2
 #define MAX_CUS8
 
+/* set maximum interval as 1 second */
+#define MAX_INTERVAL   1000
+
 #define MSR_F15H_CU_PWR_ACCUMULATOR0xc001007a
 #define MSR_F15H_CU_MAX_PWR_ACCUMULATOR0xc001007b
 #define MSR_F15H_PTSC  0xc0010280
@@ -68,6 +73,9 @@ struct fam15h_power_data {
u64 cu_acc_power[MAX_CUS];
/* performance timestamp counter */
u64 cpu_sw_pwr_ptsc[MAX_CUS];
+   /* online/offline status of current compute unit */
+   int cu_on[MAX_CUS];
+   unsigned long power_period;
 };
 
 static ssize_t show_power(struct device *dev,
@@ -149,6 +157,8 @@ static void do_read_registers_on_cu(void *_data)
 
rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, >cu_acc_power[cu]);
rdmsrl_safe(MSR_F15H_PTSC, >cpu_sw_pwr_ptsc[cu]);
+
+   data->cu_on[cu] = 1;
 }
 
 /*
@@ -165,6 +175,8 @@ static int read_registers(struct fam15h_power_data *data)
if (!ret)
return -ENOMEM;
 
+   memset(data->cu_on, 0, sizeof(int) * MAX_CUS);
+
get_online_cpus();
this_cpu = get_cpu();
 
@@ -192,18 +204,117 @@ static int read_registers(struct fam15h_power_data *data)
return 0;
 }
 
+static ssize_t acc_show_power(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+   u64 prev_cu_acc_power[MAX_CUS], prev_ptsc[MAX_CUS],
+   jdelta[MAX_CUS];
+   u64 tdelta, avg_acc;
+   int cu, cu_num, ret;
+   signed long leftover;
+
+   /*
+* With the new x86 topology modelling, x86_max_cores is the
+* compute unit number.
+*/
+   cu_num = boot_cpu_data.x86_max_cores;
+
+   ret = read_registers(data);
+   if (ret)
+   return 0;
+
+   for (cu = 0; cu < cu_num; cu++) {
+   prev_cu_acc_power[cu] = data->cu_acc_power[cu];
+   prev_ptsc[cu] = data->cpu_sw_pwr_ptsc[cu];
+   }
+
+   leftover = 
schedule_timeout_interruptible(msecs_to_jiffies(data->power_period));
+   if (leftover)
+   return 0;
+
+   ret = read_registers(data);
+   if (ret)
+   return 0;
+
+   for (cu = 0, avg_acc = 0; cu < cu_num; cu++) {
+   /* check if current compute unit is online */
+   if (data->cu_on[cu] == 0)
+   continue;
+
+   if (data->cu_acc_power[cu] < prev_cu_acc_power[cu]) {
+   jdelta[cu] = data->max_cu_acc_power + 
data->cu_acc_power[cu];
+   jdelta[cu] -= prev_cu_acc_power[cu];
+   } else {
+   jdelta[cu] = data->cu_acc_power[cu] - 
prev_cu_acc_power[cu];
+   }
+   tdelta = data->cpu_sw_pwr_ptsc[cu] - prev_ptsc[cu];
+   jdelta[cu] *= data->cpu_pwr_sample_ratio * 1000;
+   do_div(jdelta[cu], tdelta);
+
+   /* the unit is microWatt */
+   avg_acc += jdelta[cu];
+   }
+
+   return sprintf(buf, "%llu\n", (unsigned long long)avg_acc);
+}
+static DEVICE_ATTR(power1_average, S_IRUGO, acc_show_power, NULL);
+
+static ssize_t acc_show_power_period(struct device *dev,
+struct device_attribute *attr,
+char *buf)
+{
+   struct fam15h_power_data *data = dev_get_drvdata(dev);
+
+   return sprintf(buf, "%lu\n", data->power_period);
+}
+
+static ssize_t acc_set_power_period(struct device *dev,
+   struct device_attribute *attr,
+   const char *buf, size_t count)
+{
+   struct fam15h_power_data *data =

Re: [PATCH v6 1/7][Resend] cpufreq: sched: Helpers to add and remove update_util hooks

2016-03-27 Thread Viresh Kumar

On 22-03-16, 02:46, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki 
> 
> Replace the single helper for adding and removing cpufreq utilization
> update hooks, cpufreq_set_update_util_data(), with a pair of helpers,
> cpufreq_add_update_util_hook() and cpufreq_remove_update_util_hook(),
> and modify the users of cpufreq_set_update_util_data() accordingly.
> 
> With the new helpers, the code using them doesn't need to worry
> about the internals of struct update_util_data and in particular
> it doesn't need to worry about populating the func field in it
> properly upfront.
> 
> Signed-off-by: Rafael J. Wysocki 
> ---

Acked-by: Viresh Kumar 

-- 
viresh

[PATCH v5 0/6] hwmon: (fam15h_power) Introduce an accumulated power reporting algorithm

2016-03-27 Thread Huang Rui

Hi Guenter,

This serial of patches introduces an accumulated power reporting
algorithm. It will calculate the average power consumption for the
processor. The cpu feature flag is CPUID.8000_0007H:EDX[12].

This algorithm is used to test the comparison of processor power
consumption with between MWAITX delay and TSC delay on AMD Carrizo
platforms.

Reference:
https://lkml.kernel.org/r/1438744732-1459-1-git-send-email-ray.hu...@amd.com

Commit f96756 at tip ("x86/asm: Add MONITORX/MWAITX instruction support")
Commit b466bd at tip ("x86/asm/delay: Introduce an MWAITX-based delay with a 
configurable timer")

V1: 
https://lkml.kernel.org/r/1440662866-28716-1-git-send-email-ray.hu...@amd.com
V2: 
https://lkml.kernel.org/r/1445308109-17970-1-git-send-email-ray.hu...@amd.com
V3: https://lkml.kernel.org/r/1446199024-1896-1-git-send-email-ray.hu...@amd.com
V4: https://lkml.kernel.org/r/1457662670-3354-1-git-send-email-ray.hu...@amd.com

On V5, I used the new x86 topology with compute unit id (cpu_core_id)
and compute unit numbers (x86_max_cores) for AMD processors. The
change and documentation will go to master soon as below link. And
This way is better to use smp_num_siblings which is un-defined
variable out of CONFIG_SMP.

Refer:
https://git.kernel.org/cgit/linux/kernel/git/bp/bp.git/commit/?h=tip-urgent=f6ce1851fa2eec2c332255fc25a544658dbfbfe4

Changes from v1 -> v2:
- Move fam15h_power_groups and fam15h_power_group into fam15h_power_data to
  avoid overwrite on multi-CPU system.
- Rename FAM15H_MIN_NUM_ATTRS macro and fix return error code.
- Remove unnecessary warning print.
- Adds do_read_registers_on_cu to do all the read to all MSRs and run it on one
  of the online cores on each compute unit with smp_call_function_many().
- Use power1_average and power1_average_interval standard entry
  instread of power1_acc
- Fix the CPU-hotplug case.

Changes from v2 -> v3:
- As Guenter's suggestion, remove typecast, use >groups[0].
- Remove all "fam15_power_*" prefix at data.
- Remove unnecessary ( ).
- Fix the issue that is reported by build test robot, and add
  CPU_SUP_AMD as the dependence of fam15h_power
- Remove the WARN_ON at do_read_registers_on_cu, because it must be
  behind CPUID check. The MSR must be available since
  CPUID.8000_0007H:EDX[12] is set 
- Add get_online_cpus()/put_online_cpus() functions.
- Refine commments and the method which generate cpumask for cu.
- Add the interval scope to make the value suitable for user
  experience
- Remove the useless mutex.

Changes from v3 -> v4:
- Rebase the patches to latest groeck/hwmon-next.
- Use smp_num_siblings instead of cores_per_cu accessor.
- Refine the cpumask method which is inspired by perf solution.
- Fix some typo and errors.

Changes from v4 -> v5:
- Rebase patches to v4.6-rc1.
- Use new x86 topology with compute id (cpu_core_id) and compute unit
  number (x86_max_cores) instead of smp_num_siblings.

A simple example:

ray@hr-ub:~/tip$ sensors
fam15h_power-pci-00c4
Adapter: PCI adapter
power1:   19.58 mW (avg =   2.55 mW, interval =   0.01 s)
   (crit =  15.00 W)

...

These patches are rebased on v4.6-rc1.

Thanks,
Rui

Huang Rui (6):
  hwmon: (fam15h_power) Add CPU_SUP_AMD as the dependence
  hwmon: (fam15h_power) Add compute unit accumulated power
  hwmon: (fam15h_power) Add ptsc counter value for accumulated power
  hwmon: (fam15h_power) Introduce a cpu accumulated power reporting
algorithm
  hwmon: (fam15h_power) Add documentation for TDP and accumulated power
algorithm
  hwmon: (fam15h_power) Add platform check function

 Documentation/hwmon/fam15h_power |  57 ++-
 drivers/hwmon/Kconfig|   2 +-
 drivers/hwmon/fam15h_power.c | 199 ++-
 3 files changed, 252 insertions(+), 6 deletions(-)

-- 
1.9.1

Re: [PATCH v6 1/7][Resend] cpufreq: sched: Helpers to add and remove update_util hooks

2016-03-27 Thread Viresh Kumar

On 22-03-16, 02:46, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki 
> 
> Replace the single helper for adding and removing cpufreq utilization
> update hooks, cpufreq_set_update_util_data(), with a pair of helpers,
> cpufreq_add_update_util_hook() and cpufreq_remove_update_util_hook(),
> and modify the users of cpufreq_set_update_util_data() accordingly.
> 
> With the new helpers, the code using them doesn't need to worry
> about the internals of struct update_util_data and in particular
> it doesn't need to worry about populating the func field in it
> properly upfront.
> 
> Signed-off-by: Rafael J. Wysocki 
> ---

Acked-by: Viresh Kumar 

-- 
viresh

[PATCH v5 0/6] hwmon: (fam15h_power) Introduce an accumulated power reporting algorithm

2016-03-27 Thread Huang Rui

Hi Guenter,

This serial of patches introduces an accumulated power reporting
algorithm. It will calculate the average power consumption for the
processor. The cpu feature flag is CPUID.8000_0007H:EDX[12].

This algorithm is used to test the comparison of processor power
consumption with between MWAITX delay and TSC delay on AMD Carrizo
platforms.

Reference:
https://lkml.kernel.org/r/1438744732-1459-1-git-send-email-ray.hu...@amd.com

Commit f96756 at tip ("x86/asm: Add MONITORX/MWAITX instruction support")
Commit b466bd at tip ("x86/asm/delay: Introduce an MWAITX-based delay with a 
configurable timer")

V1: 
https://lkml.kernel.org/r/1440662866-28716-1-git-send-email-ray.hu...@amd.com
V2: 
https://lkml.kernel.org/r/1445308109-17970-1-git-send-email-ray.hu...@amd.com
V3: https://lkml.kernel.org/r/1446199024-1896-1-git-send-email-ray.hu...@amd.com
V4: https://lkml.kernel.org/r/1457662670-3354-1-git-send-email-ray.hu...@amd.com

On V5, I used the new x86 topology with compute unit id (cpu_core_id)
and compute unit numbers (x86_max_cores) for AMD processors. The
change and documentation will go to master soon as below link. And
This way is better to use smp_num_siblings which is un-defined
variable out of CONFIG_SMP.

Refer:
https://git.kernel.org/cgit/linux/kernel/git/bp/bp.git/commit/?h=tip-urgent=f6ce1851fa2eec2c332255fc25a544658dbfbfe4

Changes from v1 -> v2:
- Move fam15h_power_groups and fam15h_power_group into fam15h_power_data to
  avoid overwrite on multi-CPU system.
- Rename FAM15H_MIN_NUM_ATTRS macro and fix return error code.
- Remove unnecessary warning print.
- Adds do_read_registers_on_cu to do all the read to all MSRs and run it on one
  of the online cores on each compute unit with smp_call_function_many().
- Use power1_average and power1_average_interval standard entry
  instread of power1_acc
- Fix the CPU-hotplug case.

Changes from v2 -> v3:
- As Guenter's suggestion, remove typecast, use >groups[0].
- Remove all "fam15_power_*" prefix at data.
- Remove unnecessary ( ).
- Fix the issue that is reported by build test robot, and add
  CPU_SUP_AMD as the dependence of fam15h_power
- Remove the WARN_ON at do_read_registers_on_cu, because it must be
  behind CPUID check. The MSR must be available since
  CPUID.8000_0007H:EDX[12] is set 
- Add get_online_cpus()/put_online_cpus() functions.
- Refine commments and the method which generate cpumask for cu.
- Add the interval scope to make the value suitable for user
  experience
- Remove the useless mutex.

Changes from v3 -> v4:
- Rebase the patches to latest groeck/hwmon-next.
- Use smp_num_siblings instead of cores_per_cu accessor.
- Refine the cpumask method which is inspired by perf solution.
- Fix some typo and errors.

Changes from v4 -> v5:
- Rebase patches to v4.6-rc1.
- Use new x86 topology with compute id (cpu_core_id) and compute unit
  number (x86_max_cores) instead of smp_num_siblings.

A simple example:

ray@hr-ub:~/tip$ sensors
fam15h_power-pci-00c4
Adapter: PCI adapter
power1:   19.58 mW (avg =   2.55 mW, interval =   0.01 s)
   (crit =  15.00 W)

...

These patches are rebased on v4.6-rc1.

Thanks,
Rui

Huang Rui (6):
  hwmon: (fam15h_power) Add CPU_SUP_AMD as the dependence
  hwmon: (fam15h_power) Add compute unit accumulated power
  hwmon: (fam15h_power) Add ptsc counter value for accumulated power
  hwmon: (fam15h_power) Introduce a cpu accumulated power reporting
algorithm
  hwmon: (fam15h_power) Add documentation for TDP and accumulated power
algorithm
  hwmon: (fam15h_power) Add platform check function

 Documentation/hwmon/fam15h_power |  57 ++-
 drivers/hwmon/Kconfig|   2 +-
 drivers/hwmon/fam15h_power.c | 199 ++-
 3 files changed, 252 insertions(+), 6 deletions(-)

-- 
1.9.1

[PATCH v5 1/6] hwmon: (fam15h_power) Add CPU_SUP_AMD as the dependence

2016-03-27 Thread Huang Rui

This patch adds CONFIG_CPU_SUP_AMD as the dependence of fam15h_power
driver. Because the following patch will use the interface from
x86/kernel/cpu/amd.c.

Otherwise, the below error might be encountered:

All errors (new ones prefixed by >>):

   drivers/built-in.o: In function `fam15h_power_probe':
>> fam15h_power.c:(.text+0x26e3a3): undefined reference to
>> `amd_get_cores_per_cu'
   fam15h_power.c:(.text+0x26e41e): undefined reference to
`amd_get_cores_per_cu'

Reported-by: build test robot 
Signed-off-by: Huang Rui 
---
 drivers/hwmon/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 5c2d13a..4be3792 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -288,7 +288,7 @@ config SENSORS_K10TEMP
 
 config SENSORS_FAM15H_POWER
tristate "AMD Family 15h processor power"
-   depends on X86 && PCI
+   depends on X86 && PCI && CPU_SUP_AMD
help
  If you say yes here you get support for processor power
  information of your AMD family 15h CPU.
-- 
1.9.1

[PATCH v5 3/6] hwmon: (fam15h_power) Add ptsc counter value for accumulated power

2016-03-27 Thread Huang Rui

PTSC is the performance timestamp counter value in a cpu core and the
cores in one compute unit have the fixed frequency. So it picks up the
performance timestamp counter value of the first core per compute unit
to measure the interval for average power per compute unit.

Signed-off-by: Huang Rui 
Cc: Borislav Petkov 
---
 drivers/hwmon/fam15h_power.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index ccbc944..de6f52b 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -50,6 +50,7 @@ MODULE_LICENSE("GPL");
 
 #define MSR_F15H_CU_PWR_ACCUMULATOR0xc001007a
 #define MSR_F15H_CU_MAX_PWR_ACCUMULATOR0xc001007b
+#define MSR_F15H_PTSC  0xc0010280
 
 #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4
 
@@ -65,6 +66,8 @@ struct fam15h_power_data {
u64 max_cu_acc_power;
/* accumulated power of the compute units */
u64 cu_acc_power[MAX_CUS];
+   /* performance timestamp counter */
+   u64 cpu_sw_pwr_ptsc[MAX_CUS];
 };
 
 static ssize_t show_power(struct device *dev,
@@ -145,6 +148,7 @@ static void do_read_registers_on_cu(void *_data)
cu = cpu_data(cpu).cpu_core_id;
 
rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, >cu_acc_power[cu]);
+   rdmsrl_safe(MSR_F15H_PTSC, >cpu_sw_pwr_ptsc[cu]);
 }
 
 /*
-- 
1.9.1

[PATCH v5 1/6] hwmon: (fam15h_power) Add CPU_SUP_AMD as the dependence

2016-03-27 Thread Huang Rui

This patch adds CONFIG_CPU_SUP_AMD as the dependence of fam15h_power
driver. Because the following patch will use the interface from
x86/kernel/cpu/amd.c.

Otherwise, the below error might be encountered:

All errors (new ones prefixed by >>):

   drivers/built-in.o: In function `fam15h_power_probe':
>> fam15h_power.c:(.text+0x26e3a3): undefined reference to
>> `amd_get_cores_per_cu'
   fam15h_power.c:(.text+0x26e41e): undefined reference to
`amd_get_cores_per_cu'

Reported-by: build test robot 
Signed-off-by: Huang Rui 
---
 drivers/hwmon/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 5c2d13a..4be3792 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -288,7 +288,7 @@ config SENSORS_K10TEMP
 
 config SENSORS_FAM15H_POWER
tristate "AMD Family 15h processor power"
-   depends on X86 && PCI
+   depends on X86 && PCI && CPU_SUP_AMD
help
  If you say yes here you get support for processor power
  information of your AMD family 15h CPU.
-- 
1.9.1

[PATCH v5 3/6] hwmon: (fam15h_power) Add ptsc counter value for accumulated power

2016-03-27 Thread Huang Rui

PTSC is the performance timestamp counter value in a cpu core and the
cores in one compute unit have the fixed frequency. So it picks up the
performance timestamp counter value of the first core per compute unit
to measure the interval for average power per compute unit.

Signed-off-by: Huang Rui 
Cc: Borislav Petkov 
---
 drivers/hwmon/fam15h_power.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index ccbc944..de6f52b 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -50,6 +50,7 @@ MODULE_LICENSE("GPL");
 
 #define MSR_F15H_CU_PWR_ACCUMULATOR0xc001007a
 #define MSR_F15H_CU_MAX_PWR_ACCUMULATOR0xc001007b
+#define MSR_F15H_PTSC  0xc0010280
 
 #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4
 
@@ -65,6 +66,8 @@ struct fam15h_power_data {
u64 max_cu_acc_power;
/* accumulated power of the compute units */
u64 cu_acc_power[MAX_CUS];
+   /* performance timestamp counter */
+   u64 cpu_sw_pwr_ptsc[MAX_CUS];
 };
 
 static ssize_t show_power(struct device *dev,
@@ -145,6 +148,7 @@ static void do_read_registers_on_cu(void *_data)
cu = cpu_data(cpu).cpu_core_id;
 
rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, >cu_acc_power[cu]);
+   rdmsrl_safe(MSR_F15H_PTSC, >cpu_sw_pwr_ptsc[cu]);
 }
 
 /*
-- 
1.9.1

[PATCH v5 2/6] hwmon: (fam15h_power) Add compute unit accumulated power

2016-03-27 Thread Huang Rui

This patch adds a member in fam15h_power_data which specifies the
compute unit accumulated power. It adds do_read_registers_on_cu to do
all the read to all MSRs and run it on one of the online cores on each
compute unit with smp_call_function_many(). This behavior can decrease
IPI numbers.

Suggested-by: Borislav Petkov 
Signed-off-by: Huang Rui 
---
 drivers/hwmon/fam15h_power.c | 65 +++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index 4f695d8..ccbc944 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -25,6 +25,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 
@@ -44,7 +46,9 @@ MODULE_LICENSE("GPL");
 
 #define FAM15H_MIN_NUM_ATTRS   2
 #define FAM15H_NUM_GROUPS  2
+#define MAX_CUS8
 
+#define MSR_F15H_CU_PWR_ACCUMULATOR0xc001007a
 #define MSR_F15H_CU_MAX_PWR_ACCUMULATOR0xc001007b
 
 #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4
@@ -59,6 +63,8 @@ struct fam15h_power_data {
struct attribute_group group;
/* maximum accumulated power of a compute unit */
u64 max_cu_acc_power;
+   /* accumulated power of the compute units */
+   u64 cu_acc_power[MAX_CUS];
 };
 
 static ssize_t show_power(struct device *dev,
@@ -125,6 +131,63 @@ static ssize_t show_power_crit(struct device *dev,
 }
 static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL);
 
+static void do_read_registers_on_cu(void *_data)
+{
+   struct fam15h_power_data *data = _data;
+   int cpu, cu;
+
+   cpu = smp_processor_id();
+
+   /*
+* With the new x86 topology modelling, cpu core id actually
+* is compute unit id.
+*/
+   cu = cpu_data(cpu).cpu_core_id;
+
+   rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, >cu_acc_power[cu]);
+}
+
+/*
+ * This function is only able to be called when CPUID
+ * Fn8000_0007:EDX[12] is set.
+ */
+static int read_registers(struct fam15h_power_data *data)
+{
+   int this_cpu, ret, cpu;
+   int target;
+   cpumask_var_t mask;
+
+   ret = zalloc_cpumask_var(, GFP_KERNEL);
+   if (!ret)
+   return -ENOMEM;
+
+   get_online_cpus();
+   this_cpu = get_cpu();
+
+   /*
+* Choose the first online core of each compute unit, and then
+* read their MSR value of power and ptsc in a single IPI,
+* because the MSR value of CPU core represent the compute
+* unit's.
+*/
+   for_each_online_cpu(cpu) {
+   target = cpumask_first(topology_sibling_cpumask(cpu));
+   if (!cpumask_test_cpu(target, mask))
+   cpumask_set_cpu(target, mask);
+   }
+
+   if (cpumask_test_cpu(this_cpu, mask))
+   do_read_registers_on_cu(data);
+
+   smp_call_function_many(mask, do_read_registers_on_cu, data, true);
+   put_cpu();
+   put_online_cpus();
+
+   free_cpumask_var(mask);
+
+   return 0;
+}
+
 static int fam15h_power_init_attrs(struct pci_dev *pdev,
   struct fam15h_power_data *data)
 {
@@ -263,7 +326,7 @@ static int fam15h_power_init_data(struct pci_dev *f4,
 
data->max_cu_acc_power = tmp;
 
-   return 0;
+   return read_registers(data);
 }
 
 static int fam15h_power_probe(struct pci_dev *pdev,
-- 
1.9.1

[PATCH v5 2/6] hwmon: (fam15h_power) Add compute unit accumulated power

2016-03-27 Thread Huang Rui

This patch adds a member in fam15h_power_data which specifies the
compute unit accumulated power. It adds do_read_registers_on_cu to do
all the read to all MSRs and run it on one of the online cores on each
compute unit with smp_call_function_many(). This behavior can decrease
IPI numbers.

Suggested-by: Borislav Petkov 
Signed-off-by: Huang Rui 
---
 drivers/hwmon/fam15h_power.c | 65 +++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index 4f695d8..ccbc944 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -25,6 +25,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 
@@ -44,7 +46,9 @@ MODULE_LICENSE("GPL");
 
 #define FAM15H_MIN_NUM_ATTRS   2
 #define FAM15H_NUM_GROUPS  2
+#define MAX_CUS8
 
+#define MSR_F15H_CU_PWR_ACCUMULATOR0xc001007a
 #define MSR_F15H_CU_MAX_PWR_ACCUMULATOR0xc001007b
 
 #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4
@@ -59,6 +63,8 @@ struct fam15h_power_data {
struct attribute_group group;
/* maximum accumulated power of a compute unit */
u64 max_cu_acc_power;
+   /* accumulated power of the compute units */
+   u64 cu_acc_power[MAX_CUS];
 };
 
 static ssize_t show_power(struct device *dev,
@@ -125,6 +131,63 @@ static ssize_t show_power_crit(struct device *dev,
 }
 static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL);
 
+static void do_read_registers_on_cu(void *_data)
+{
+   struct fam15h_power_data *data = _data;
+   int cpu, cu;
+
+   cpu = smp_processor_id();
+
+   /*
+* With the new x86 topology modelling, cpu core id actually
+* is compute unit id.
+*/
+   cu = cpu_data(cpu).cpu_core_id;
+
+   rdmsrl_safe(MSR_F15H_CU_PWR_ACCUMULATOR, >cu_acc_power[cu]);
+}
+
+/*
+ * This function is only able to be called when CPUID
+ * Fn8000_0007:EDX[12] is set.
+ */
+static int read_registers(struct fam15h_power_data *data)
+{
+   int this_cpu, ret, cpu;
+   int target;
+   cpumask_var_t mask;
+
+   ret = zalloc_cpumask_var(, GFP_KERNEL);
+   if (!ret)
+   return -ENOMEM;
+
+   get_online_cpus();
+   this_cpu = get_cpu();
+
+   /*
+* Choose the first online core of each compute unit, and then
+* read their MSR value of power and ptsc in a single IPI,
+* because the MSR value of CPU core represent the compute
+* unit's.
+*/
+   for_each_online_cpu(cpu) {
+   target = cpumask_first(topology_sibling_cpumask(cpu));
+   if (!cpumask_test_cpu(target, mask))
+   cpumask_set_cpu(target, mask);
+   }
+
+   if (cpumask_test_cpu(this_cpu, mask))
+   do_read_registers_on_cu(data);
+
+   smp_call_function_many(mask, do_read_registers_on_cu, data, true);
+   put_cpu();
+   put_online_cpus();
+
+   free_cpumask_var(mask);
+
+   return 0;
+}
+
 static int fam15h_power_init_attrs(struct pci_dev *pdev,
   struct fam15h_power_data *data)
 {
@@ -263,7 +326,7 @@ static int fam15h_power_init_data(struct pci_dev *f4,
 
data->max_cu_acc_power = tmp;
 
-   return 0;
+   return read_registers(data);
 }
 
 static int fam15h_power_probe(struct pci_dev *pdev,
-- 
1.9.1

RE: [PATCH v2 3/6] Documentation: DT: vdma: update binding doc for AXI DMA

2016-03-27 Thread Appana Durga Kedareswara Rao

Hi Soren,

> -Original Message-
> From: Sören Brinkmann [mailto:soren.brinkm...@xilinx.com]
> Sent: Monday, March 28, 2016 12:56 AM
> To: Appana Durga Kedareswara Rao
> Cc: robh...@kernel.org; pawel.m...@arm.com; mark.rutl...@arm.com;
> ijc+devicet...@hellion.org.uk; ga...@codeaurora.org; Michal Simek;
> vinod.k...@intel.com; dan.j.willi...@intel.com; Anurag Kumar Vulisha; Appana
> Durga Kedareswara Rao; moritz.fisc...@ettus.com;
> laurent.pinch...@ideasonboard.com; l...@debethencourt.com; Srikanth
> Vemula; Anirudha Sarangi; devicet...@vger.kernel.org; linux-arm-
> ker...@lists.infradead.org; linux-kernel@vger.kernel.org;
> dmaeng...@vger.kernel.org
> Subject: Re: [PATCH v2 3/6] Documentation: DT: vdma: update binding doc for
> AXI DMA
> 
> On Sun, 2016-03-27 at 23:36:05 +0530, Kedareswara rao Appana wrote:
> > This patch updates the device-tree binding doc for adding support for
> > AXI DMA.
> >
> > Signed-off-by: Kedareswara rao Appana 
> > ---
> > Changes for v2:
> > ---> Modified commit message as suggested by Vinod.
> > ---> Moved the patch to forward in the series as suggested by vinod.
> >
> >  .../devicetree/bindings/dma/xilinx/xilinx_vdma.txt | 22
> > +-
> >  1 file changed, 21 insertions(+), 1 deletion(-)
> >
> > diff --git
> > a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > index a86737c..5841421 100644
> > --- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > +++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > @@ -3,8 +3,13 @@ It can be configured to have one channel or two
> > channels. If configured  as two channels, one is to transmit to the
> > video device and another is  to receive from the video device.
> >
> > +Xilinx AXI DMA engine, it does transfers between memory and AXI4
> > +stream target devices. It can be configured to have one channel or two
> channels.
> > +If configured as two channels, one is to transmit to the device and
> > +another is to receive from the device.
> > +
> >  Required properties:
> > -- compatible: Should be "xlnx,axi-vdma-1.00.a"
> > +- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a"
> >  - #dma-cells: Should be <1>, see "dmas" property below
> >  - reg: Should contain VDMA registers location and length.
> >  - xlnx,num-fstores: Should be the number of framebuffers as configured in
> h/w.
> > @@ -59,6 +64,21 @@ axi_vdma_0: axivdma@4003 {
> > } ;
> >  } ;
> >
> > +axi_dma_0: axidma@4040 {
> 
> The node names should follow the generic names, hence 'dma-controller@...'.

Ok will fix in next version...

> 
> > +   compatible = "xlnx,axi-dma-1.00.a";
> > +   #dma-cells = <1>;
> > +   reg = < 0x4040 0x1 >;
> > +   dma-channel@4040 {
> > +   compatible = "xlnx,axi-dma-mm2s-channel";
> > +   interrupts = < 0 59 4 >;
> > +   xlnx,datawidth = <0x40>;
> > +   } ;
> > +   dma-channel@40400030 {
> > +   compatible = "xlnx,axi-dma-s2mm-channel";
> > +   interrupts = < 0 58 4 >;
> > +   xlnx,datawidth = <0x40>;
> 
> Nit: The spacing around the '<' '>' is inconsistent. I'd just remove all 
> redundant
> spaces.

Ok will fix in next version...

Regards,
Kedar.

> 
>   Sören

RE: [PATCH v2 3/6] Documentation: DT: vdma: update binding doc for AXI DMA

2016-03-27 Thread Appana Durga Kedareswara Rao

Hi Soren,

> -Original Message-
> From: Sören Brinkmann [mailto:soren.brinkm...@xilinx.com]
> Sent: Monday, March 28, 2016 12:56 AM
> To: Appana Durga Kedareswara Rao
> Cc: robh...@kernel.org; pawel.m...@arm.com; mark.rutl...@arm.com;
> ijc+devicet...@hellion.org.uk; ga...@codeaurora.org; Michal Simek;
> vinod.k...@intel.com; dan.j.willi...@intel.com; Anurag Kumar Vulisha; Appana
> Durga Kedareswara Rao; moritz.fisc...@ettus.com;
> laurent.pinch...@ideasonboard.com; l...@debethencourt.com; Srikanth
> Vemula; Anirudha Sarangi; devicet...@vger.kernel.org; linux-arm-
> ker...@lists.infradead.org; linux-kernel@vger.kernel.org;
> dmaeng...@vger.kernel.org
> Subject: Re: [PATCH v2 3/6] Documentation: DT: vdma: update binding doc for
> AXI DMA
> 
> On Sun, 2016-03-27 at 23:36:05 +0530, Kedareswara rao Appana wrote:
> > This patch updates the device-tree binding doc for adding support for
> > AXI DMA.
> >
> > Signed-off-by: Kedareswara rao Appana 
> > ---
> > Changes for v2:
> > ---> Modified commit message as suggested by Vinod.
> > ---> Moved the patch to forward in the series as suggested by vinod.
> >
> >  .../devicetree/bindings/dma/xilinx/xilinx_vdma.txt | 22
> > +-
> >  1 file changed, 21 insertions(+), 1 deletion(-)
> >
> > diff --git
> > a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > index a86737c..5841421 100644
> > --- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > +++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > @@ -3,8 +3,13 @@ It can be configured to have one channel or two
> > channels. If configured  as two channels, one is to transmit to the
> > video device and another is  to receive from the video device.
> >
> > +Xilinx AXI DMA engine, it does transfers between memory and AXI4
> > +stream target devices. It can be configured to have one channel or two
> channels.
> > +If configured as two channels, one is to transmit to the device and
> > +another is to receive from the device.
> > +
> >  Required properties:
> > -- compatible: Should be "xlnx,axi-vdma-1.00.a"
> > +- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a"
> >  - #dma-cells: Should be <1>, see "dmas" property below
> >  - reg: Should contain VDMA registers location and length.
> >  - xlnx,num-fstores: Should be the number of framebuffers as configured in
> h/w.
> > @@ -59,6 +64,21 @@ axi_vdma_0: axivdma@4003 {
> > } ;
> >  } ;
> >
> > +axi_dma_0: axidma@4040 {
> 
> The node names should follow the generic names, hence 'dma-controller@...'.

Ok will fix in next version...

> 
> > +   compatible = "xlnx,axi-dma-1.00.a";
> > +   #dma-cells = <1>;
> > +   reg = < 0x4040 0x1 >;
> > +   dma-channel@4040 {
> > +   compatible = "xlnx,axi-dma-mm2s-channel";
> > +   interrupts = < 0 59 4 >;
> > +   xlnx,datawidth = <0x40>;
> > +   } ;
> > +   dma-channel@40400030 {
> > +   compatible = "xlnx,axi-dma-s2mm-channel";
> > +   interrupts = < 0 58 4 >;
> > +   xlnx,datawidth = <0x40>;
> 
> Nit: The spacing around the '<' '>' is inconsistent. I'd just remove all 
> redundant
> spaces.

Ok will fix in next version...

Regards,
Kedar.

> 
>   Sören

[PATCH 05/11] mm/slab: clean-up kmem_cache_node setup

2016-03-27 Thread js1304

From: Joonsoo Kim 

There are mostly same code for setting up kmem_cache_node either
in cpuup_prepare() or alloc_kmem_cache_node(). Factor out and
clean-up them.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 167 +-
 1 file changed, 67 insertions(+), 100 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 569d7db..b96f381 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -898,6 +898,62 @@ static int init_cache_node_node(int node)
return 0;
 }
 
+static int setup_kmem_cache_node(struct kmem_cache *cachep,
+   int node, gfp_t gfp, bool force_change)
+{
+   int ret = -ENOMEM;
+   struct kmem_cache_node *n;
+   struct array_cache *old_shared = NULL;
+   struct array_cache *new_shared = NULL;
+   struct alien_cache **new_alien = NULL;
+   LIST_HEAD(list);
+
+   if (use_alien_caches) {
+   new_alien = alloc_alien_cache(node, cachep->limit, gfp);
+   if (!new_alien)
+   goto fail;
+   }
+
+   if (cachep->shared) {
+   new_shared = alloc_arraycache(node,
+   cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
+   if (!new_shared)
+   goto fail;
+   }
+
+   ret = init_cache_node(cachep, node, gfp);
+   if (ret)
+   goto fail;
+
+   n = get_node(cachep, node);
+   spin_lock_irq(>list_lock);
+   if (n->shared) {
+   free_block(cachep, n->shared->entry,
+   n->shared->avail, node, );
+   }
+
+   if (!n->shared || force_change) {
+   old_shared = n->shared;
+   n->shared = new_shared;
+   new_shared = NULL;
+   }
+
+   if (!n->alien) {
+   n->alien = new_alien;
+   new_alien = NULL;
+   }
+
+   spin_unlock_irq(>list_lock);
+   slabs_destroy(cachep, );
+
+fail:
+   kfree(old_shared);
+   kfree(new_shared);
+   free_alien_cache(new_alien);
+
+   return ret;
+}
+
 static void cpuup_canceled(long cpu)
 {
struct kmem_cache *cachep;
@@ -969,7 +1025,6 @@ free_slab:
 static int cpuup_prepare(long cpu)
 {
struct kmem_cache *cachep;
-   struct kmem_cache_node *n = NULL;
int node = cpu_to_mem(cpu);
int err;
 
@@ -988,44 +1043,9 @@ static int cpuup_prepare(long cpu)
 * array caches
 */
list_for_each_entry(cachep, _caches, list) {
-   struct array_cache *shared = NULL;
-   struct alien_cache **alien = NULL;
-
-   if (cachep->shared) {
-   shared = alloc_arraycache(node,
-   cachep->shared * cachep->batchcount,
-   0xbaadf00d, GFP_KERNEL);
-   if (!shared)
-   goto bad;
-   }
-   if (use_alien_caches) {
-   alien = alloc_alien_cache(node, cachep->limit, 
GFP_KERNEL);
-   if (!alien) {
-   kfree(shared);
-   goto bad;
-   }
-   }
-   n = get_node(cachep, node);
-   BUG_ON(!n);
-
-   spin_lock_irq(>list_lock);
-   if (!n->shared) {
-   /*
-* We are serialised from CPU_DEAD or
-* CPU_UP_CANCELLED by the cpucontrol lock
-*/
-   n->shared = shared;
-   shared = NULL;
-   }
-#ifdef CONFIG_NUMA
-   if (!n->alien) {
-   n->alien = alien;
-   alien = NULL;
-   }
-#endif
-   spin_unlock_irq(>list_lock);
-   kfree(shared);
-   free_alien_cache(alien);
+   err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false);
+   if (err)
+   goto bad;
}
 
return 0;
@@ -3652,72 +3672,19 @@ EXPORT_SYMBOL(kfree);
 /*
  * This initializes kmem_cache_node or resizes various caches for all nodes.
  */
-static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
+static int setup_kmem_cache_node_node(struct kmem_cache *cachep, gfp_t gfp)
 {
+   int ret;
int node;
struct kmem_cache_node *n;
-   struct array_cache *new_shared;
-   struct alien_cache **new_alien = NULL;
 
for_each_online_node(node) {
-
-   if (use_alien_caches) {
-   new_alien = alloc_alien_cache(node, cachep->limit, gfp);
-   if (!new_alien)
-   goto fail;
-   }
-
-   new_shared = NULL;
-   if (cachep->shared) {
-   new_shared =

[PATCH] arm: dts: mt2701: Add clock controller device nodes

2016-03-27 Thread James Liao

Add clock controller nodes for MT2701, include topckgen, infracfg,
pericfg, apmixedsys, mmsys, imgsys, vdecsys, hifsys, ethsys and
bdpsys. This patch also add two oscillators that provide clocks for
MT2701.

Signed-off-by: James Liao 
---
This patch is based on v4.6-rc1 and MT2701 clock patches [1]. This
patch adds all clock provider nodes which are supported in [1].

[1] 
http://lists.infradead.org/pipermail/linux-mediatek/2016-February/004030.html

 arch/arm/boot/dts/mt2701.dtsi | 78 +++
 1 file changed, 78 insertions(+)

diff --git a/arch/arm/boot/dts/mt2701.dtsi b/arch/arm/boot/dts/mt2701.dtsi
index 8343768..c8fee33 100644
--- a/arch/arm/boot/dts/mt2701.dtsi
+++ b/arch/arm/boot/dts/mt2701.dtsi
@@ -12,6 +12,7 @@
  * GNU General Public License for more details.
  */
 
+#include 
 #include 
 #include 
 #include "skeleton64.dtsi"
@@ -76,6 +77,20 @@
#clock-cells = <0>;
};
 
+   clk26m: oscillator@0 {
+   compatible = "fixed-clock";
+   #clock-cells = <0>;
+   clock-frequency = <2600>;
+   clock-output-names = "clk26m";
+   };
+
+   rtc32k: oscillator@1 {
+   compatible = "fixed-clock";
+   #clock-cells = <0>;
+   clock-frequency = <32000>;
+   clock-output-names = "rtc32k";
+   };
+
timer {
compatible = "arm,armv7-timer";
interrupt-parent = <>;
@@ -85,6 +100,26 @@
 ;
};
 
+   topckgen: syscon@1000 {
+   compatible = "mediatek,mt2701-topckgen", "syscon";
+   reg = <0 0x1000 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   infracfg: syscon@10001000 {
+   compatible = "mediatek,mt2701-infracfg", "syscon";
+   reg = <0 0x10001000 0 0x1000>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   };
+
+   pericfg: syscon@10003000 {
+   compatible = "mediatek,mt2701-pericfg", "syscon";
+   reg = <0 0x10003000 0 0x1000>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   };
+
watchdog: watchdog@10007000 {
compatible = "mediatek,mt2701-wdt",
 "mediatek,mt6589-wdt";
@@ -109,6 +144,13 @@
reg = <0 0x10200100 0 0x1c>;
};
 
+   apmixedsys: syscon@10209000 {
+   compatible = "mediatek,mt2701-apmixedsys", "syscon";
+   reg = <0 0x10209000 0 0x1000>;
+   mediatek,hdmi-ibias = <0xa>;
+   #clock-cells = <1>;
+   };
+
gic: interrupt-controller@10211000 {
compatible = "arm,cortex-a7-gic";
interrupt-controller;
@@ -155,4 +197,40 @@
clocks = <_clk>;
status = "disabled";
};
+
+   mmsys: syscon@1400 {
+   compatible = "mediatek,mt2701-mmsys", "syscon";
+   reg = <0 0x1400 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   imgsys: syscon@1500 {
+   compatible = "mediatek,mt2701-imgsys", "syscon";
+   reg = <0 0x1500 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   vdecsys: syscon@1600 {
+   compatible = "mediatek,mt2701-vdecsys", "syscon";
+   reg = <0 0x1600 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   hifsys: syscon@1a00 {
+   compatible = "mediatek,mt2701-hifsys", "syscon";
+   reg = <0 0x1a00 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   ethsys: syscon@1b00 {
+   compatible = "mediatek,mt2701-ethsys", "syscon";
+   reg = <0 0x1b00 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   bdpsys: syscon@1c00 {
+   compatible = "mediatek,mt2701-bdpsys", "syscon";
+   reg = <0 0x1c00 0 0x1000>;
+   #clock-cells = <1>;
+   };
 };
-- 
1.9.1

[PATCH 05/11] mm/slab: clean-up kmem_cache_node setup

2016-03-27 Thread js1304

From: Joonsoo Kim 

There are mostly same code for setting up kmem_cache_node either
in cpuup_prepare() or alloc_kmem_cache_node(). Factor out and
clean-up them.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 167 +-
 1 file changed, 67 insertions(+), 100 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 569d7db..b96f381 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -898,6 +898,62 @@ static int init_cache_node_node(int node)
return 0;
 }
 
+static int setup_kmem_cache_node(struct kmem_cache *cachep,
+   int node, gfp_t gfp, bool force_change)
+{
+   int ret = -ENOMEM;
+   struct kmem_cache_node *n;
+   struct array_cache *old_shared = NULL;
+   struct array_cache *new_shared = NULL;
+   struct alien_cache **new_alien = NULL;
+   LIST_HEAD(list);
+
+   if (use_alien_caches) {
+   new_alien = alloc_alien_cache(node, cachep->limit, gfp);
+   if (!new_alien)
+   goto fail;
+   }
+
+   if (cachep->shared) {
+   new_shared = alloc_arraycache(node,
+   cachep->shared * cachep->batchcount, 0xbaadf00d, gfp);
+   if (!new_shared)
+   goto fail;
+   }
+
+   ret = init_cache_node(cachep, node, gfp);
+   if (ret)
+   goto fail;
+
+   n = get_node(cachep, node);
+   spin_lock_irq(>list_lock);
+   if (n->shared) {
+   free_block(cachep, n->shared->entry,
+   n->shared->avail, node, );
+   }
+
+   if (!n->shared || force_change) {
+   old_shared = n->shared;
+   n->shared = new_shared;
+   new_shared = NULL;
+   }
+
+   if (!n->alien) {
+   n->alien = new_alien;
+   new_alien = NULL;
+   }
+
+   spin_unlock_irq(>list_lock);
+   slabs_destroy(cachep, );
+
+fail:
+   kfree(old_shared);
+   kfree(new_shared);
+   free_alien_cache(new_alien);
+
+   return ret;
+}
+
 static void cpuup_canceled(long cpu)
 {
struct kmem_cache *cachep;
@@ -969,7 +1025,6 @@ free_slab:
 static int cpuup_prepare(long cpu)
 {
struct kmem_cache *cachep;
-   struct kmem_cache_node *n = NULL;
int node = cpu_to_mem(cpu);
int err;
 
@@ -988,44 +1043,9 @@ static int cpuup_prepare(long cpu)
 * array caches
 */
list_for_each_entry(cachep, _caches, list) {
-   struct array_cache *shared = NULL;
-   struct alien_cache **alien = NULL;
-
-   if (cachep->shared) {
-   shared = alloc_arraycache(node,
-   cachep->shared * cachep->batchcount,
-   0xbaadf00d, GFP_KERNEL);
-   if (!shared)
-   goto bad;
-   }
-   if (use_alien_caches) {
-   alien = alloc_alien_cache(node, cachep->limit, 
GFP_KERNEL);
-   if (!alien) {
-   kfree(shared);
-   goto bad;
-   }
-   }
-   n = get_node(cachep, node);
-   BUG_ON(!n);
-
-   spin_lock_irq(>list_lock);
-   if (!n->shared) {
-   /*
-* We are serialised from CPU_DEAD or
-* CPU_UP_CANCELLED by the cpucontrol lock
-*/
-   n->shared = shared;
-   shared = NULL;
-   }
-#ifdef CONFIG_NUMA
-   if (!n->alien) {
-   n->alien = alien;
-   alien = NULL;
-   }
-#endif
-   spin_unlock_irq(>list_lock);
-   kfree(shared);
-   free_alien_cache(alien);
+   err = setup_kmem_cache_node(cachep, node, GFP_KERNEL, false);
+   if (err)
+   goto bad;
}
 
return 0;
@@ -3652,72 +3672,19 @@ EXPORT_SYMBOL(kfree);
 /*
  * This initializes kmem_cache_node or resizes various caches for all nodes.
  */
-static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
+static int setup_kmem_cache_node_node(struct kmem_cache *cachep, gfp_t gfp)
 {
+   int ret;
int node;
struct kmem_cache_node *n;
-   struct array_cache *new_shared;
-   struct alien_cache **new_alien = NULL;
 
for_each_online_node(node) {
-
-   if (use_alien_caches) {
-   new_alien = alloc_alien_cache(node, cachep->limit, gfp);
-   if (!new_alien)
-   goto fail;
-   }
-
-   new_shared = NULL;
-   if (cachep->shared) {
-   new_shared = alloc_arraycache(node,
-

[PATCH] arm: dts: mt2701: Add clock controller device nodes

2016-03-27 Thread James Liao

Add clock controller nodes for MT2701, include topckgen, infracfg,
pericfg, apmixedsys, mmsys, imgsys, vdecsys, hifsys, ethsys and
bdpsys. This patch also add two oscillators that provide clocks for
MT2701.

Signed-off-by: James Liao 
---
This patch is based on v4.6-rc1 and MT2701 clock patches [1]. This
patch adds all clock provider nodes which are supported in [1].

[1] 
http://lists.infradead.org/pipermail/linux-mediatek/2016-February/004030.html

 arch/arm/boot/dts/mt2701.dtsi | 78 +++
 1 file changed, 78 insertions(+)

diff --git a/arch/arm/boot/dts/mt2701.dtsi b/arch/arm/boot/dts/mt2701.dtsi
index 8343768..c8fee33 100644
--- a/arch/arm/boot/dts/mt2701.dtsi
+++ b/arch/arm/boot/dts/mt2701.dtsi
@@ -12,6 +12,7 @@
  * GNU General Public License for more details.
  */
 
+#include 
 #include 
 #include 
 #include "skeleton64.dtsi"
@@ -76,6 +77,20 @@
#clock-cells = <0>;
};
 
+   clk26m: oscillator@0 {
+   compatible = "fixed-clock";
+   #clock-cells = <0>;
+   clock-frequency = <2600>;
+   clock-output-names = "clk26m";
+   };
+
+   rtc32k: oscillator@1 {
+   compatible = "fixed-clock";
+   #clock-cells = <0>;
+   clock-frequency = <32000>;
+   clock-output-names = "rtc32k";
+   };
+
timer {
compatible = "arm,armv7-timer";
interrupt-parent = <>;
@@ -85,6 +100,26 @@
 ;
};
 
+   topckgen: syscon@1000 {
+   compatible = "mediatek,mt2701-topckgen", "syscon";
+   reg = <0 0x1000 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   infracfg: syscon@10001000 {
+   compatible = "mediatek,mt2701-infracfg", "syscon";
+   reg = <0 0x10001000 0 0x1000>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   };
+
+   pericfg: syscon@10003000 {
+   compatible = "mediatek,mt2701-pericfg", "syscon";
+   reg = <0 0x10003000 0 0x1000>;
+   #clock-cells = <1>;
+   #reset-cells = <1>;
+   };
+
watchdog: watchdog@10007000 {
compatible = "mediatek,mt2701-wdt",
 "mediatek,mt6589-wdt";
@@ -109,6 +144,13 @@
reg = <0 0x10200100 0 0x1c>;
};
 
+   apmixedsys: syscon@10209000 {
+   compatible = "mediatek,mt2701-apmixedsys", "syscon";
+   reg = <0 0x10209000 0 0x1000>;
+   mediatek,hdmi-ibias = <0xa>;
+   #clock-cells = <1>;
+   };
+
gic: interrupt-controller@10211000 {
compatible = "arm,cortex-a7-gic";
interrupt-controller;
@@ -155,4 +197,40 @@
clocks = <_clk>;
status = "disabled";
};
+
+   mmsys: syscon@1400 {
+   compatible = "mediatek,mt2701-mmsys", "syscon";
+   reg = <0 0x1400 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   imgsys: syscon@1500 {
+   compatible = "mediatek,mt2701-imgsys", "syscon";
+   reg = <0 0x1500 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   vdecsys: syscon@1600 {
+   compatible = "mediatek,mt2701-vdecsys", "syscon";
+   reg = <0 0x1600 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   hifsys: syscon@1a00 {
+   compatible = "mediatek,mt2701-hifsys", "syscon";
+   reg = <0 0x1a00 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   ethsys: syscon@1b00 {
+   compatible = "mediatek,mt2701-ethsys", "syscon";
+   reg = <0 0x1b00 0 0x1000>;
+   #clock-cells = <1>;
+   };
+
+   bdpsys: syscon@1c00 {
+   compatible = "mediatek,mt2701-bdpsys", "syscon";
+   reg = <0 0x1c00 0 0x1000>;
+   #clock-cells = <1>;
+   };
 };
-- 
1.9.1

[PATCH 06/11] mm/slab: don't keep free slabs if free_objects exceeds free_limit

2016-03-27 Thread js1304

From: Joonsoo Kim 

Currently, determination to free a slab is done whenever free object is
put into the slab. This has a problem that free slabs are not freed
even if we have free slabs and have more free_objects than free_limit
when processed slab isn't a free slab. This would cause to keep
too much memory in the slab subsystem. This patch try to fix it
by checking number of free object after all free work is done. If there
is free slab at that time, we can free it so we keep free slab as minimal
as possible.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 23 ++-
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index b96f381..df11757 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3258,6 +3258,9 @@ static void free_block(struct kmem_cache *cachep, void 
**objpp,
 {
int i;
struct kmem_cache_node *n = get_node(cachep, node);
+   struct page *page;
+
+   n->free_objects += nr_objects;
 
for (i = 0; i < nr_objects; i++) {
void *objp;
@@ -3270,17 +3273,11 @@ static void free_block(struct kmem_cache *cachep, void 
**objpp,
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp);
STATS_DEC_ACTIVE(cachep);
-   n->free_objects++;
 
/* fixup slab chains */
-   if (page->active == 0) {
-   if (n->free_objects > n->free_limit) {
-   n->free_objects -= cachep->num;
-   list_add_tail(>lru, list);
-   } else {
-   list_add(>lru, >slabs_free);
-   }
-   } else {
+   if (page->active == 0)
+   list_add(>lru, >slabs_free);
+   else {
/* Unconditionally move a slab to the end of the
 * partial list on free - maximum time for the
 * other objects to be freed, too.
@@ -3288,6 +3285,14 @@ static void free_block(struct kmem_cache *cachep, void 
**objpp,
list_add_tail(>lru, >slabs_partial);
}
}
+
+   while (n->free_objects > n->free_limit && !list_empty(>slabs_free)) {
+   n->free_objects -= cachep->num;
+
+   page = list_last_entry(>slabs_free, struct page, lru);
+   list_del(>lru);
+   list_add(>lru, list);
+   }
 }
 
 static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
-- 
1.9.1

[PATCH 07/11] mm/slab: racy access/modify the slab color

2016-03-27 Thread js1304

From: Joonsoo Kim 

Slab color isn't needed to be changed strictly. Because locking
for changing slab color could cause more lock contention so this patch
implements racy access/modify the slab color. This is a preparation step
to implement lockless allocation path when there is no free objects in
the kmem_cache.

Below is the result of concurrent allocation/free in slab allocation
benchmark made by Christoph a long time ago. I make the output simpler.
The number shows cycle count during alloc/free respectively so less
is better.

* Before
Kmalloc N*alloc N*free(32): Average=365/806
Kmalloc N*alloc N*free(64): Average=452/690
Kmalloc N*alloc N*free(128): Average=736/886
Kmalloc N*alloc N*free(256): Average=1167/985
Kmalloc N*alloc N*free(512): Average=2088/1125
Kmalloc N*alloc N*free(1024): Average=4115/1184
Kmalloc N*alloc N*free(2048): Average=8451/1748
Kmalloc N*alloc N*free(4096): Average=16024/2048

* After
Kmalloc N*alloc N*free(32): Average=355/750
Kmalloc N*alloc N*free(64): Average=452/812
Kmalloc N*alloc N*free(128): Average=559/1070
Kmalloc N*alloc N*free(256): Average=1176/980
Kmalloc N*alloc N*free(512): Average=1939/1189
Kmalloc N*alloc N*free(1024): Average=3521/1278
Kmalloc N*alloc N*free(2048): Average=7152/1838
Kmalloc N*alloc N*free(4096): Average=13438/2013

It shows that contention is reduced for object size >= 1024
and performance increases by roughly 15%.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index df11757..52fc5e3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2536,20 +2536,7 @@ static int cache_grow(struct kmem_cache *cachep,
}
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
 
-   /* Take the node list lock to change the colour_next on this node */
check_irq_off();
-   n = get_node(cachep, nodeid);
-   spin_lock(>list_lock);
-
-   /* Get colour for the slab, and cal the next value. */
-   offset = n->colour_next;
-   n->colour_next++;
-   if (n->colour_next >= cachep->colour)
-   n->colour_next = 0;
-   spin_unlock(>list_lock);
-
-   offset *= cachep->colour_off;
-
if (gfpflags_allow_blocking(local_flags))
local_irq_enable();
 
@@ -2570,6 +2557,19 @@ static int cache_grow(struct kmem_cache *cachep,
if (!page)
goto failed;
 
+   n = get_node(cachep, nodeid);
+
+   /* Get colour for the slab, and cal the next value. */
+   n->colour_next++;
+   if (n->colour_next >= cachep->colour)
+   n->colour_next = 0;
+
+   offset = n->colour_next;
+   if (offset >= cachep->colour)
+   offset = 0;
+
+   offset *= cachep->colour_off;
+
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
-- 
1.9.1

[PATCH 06/11] mm/slab: don't keep free slabs if free_objects exceeds free_limit

2016-03-27 Thread js1304

From: Joonsoo Kim 

Currently, determination to free a slab is done whenever free object is
put into the slab. This has a problem that free slabs are not freed
even if we have free slabs and have more free_objects than free_limit
when processed slab isn't a free slab. This would cause to keep
too much memory in the slab subsystem. This patch try to fix it
by checking number of free object after all free work is done. If there
is free slab at that time, we can free it so we keep free slab as minimal
as possible.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 23 ++-
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index b96f381..df11757 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3258,6 +3258,9 @@ static void free_block(struct kmem_cache *cachep, void 
**objpp,
 {
int i;
struct kmem_cache_node *n = get_node(cachep, node);
+   struct page *page;
+
+   n->free_objects += nr_objects;
 
for (i = 0; i < nr_objects; i++) {
void *objp;
@@ -3270,17 +3273,11 @@ static void free_block(struct kmem_cache *cachep, void 
**objpp,
check_spinlock_acquired_node(cachep, node);
slab_put_obj(cachep, page, objp);
STATS_DEC_ACTIVE(cachep);
-   n->free_objects++;
 
/* fixup slab chains */
-   if (page->active == 0) {
-   if (n->free_objects > n->free_limit) {
-   n->free_objects -= cachep->num;
-   list_add_tail(>lru, list);
-   } else {
-   list_add(>lru, >slabs_free);
-   }
-   } else {
+   if (page->active == 0)
+   list_add(>lru, >slabs_free);
+   else {
/* Unconditionally move a slab to the end of the
 * partial list on free - maximum time for the
 * other objects to be freed, too.
@@ -3288,6 +3285,14 @@ static void free_block(struct kmem_cache *cachep, void 
**objpp,
list_add_tail(>lru, >slabs_partial);
}
}
+
+   while (n->free_objects > n->free_limit && !list_empty(>slabs_free)) {
+   n->free_objects -= cachep->num;
+
+   page = list_last_entry(>slabs_free, struct page, lru);
+   list_del(>lru);
+   list_add(>lru, list);
+   }
 }
 
 static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
-- 
1.9.1

[PATCH 07/11] mm/slab: racy access/modify the slab color

2016-03-27 Thread js1304

From: Joonsoo Kim 

Slab color isn't needed to be changed strictly. Because locking
for changing slab color could cause more lock contention so this patch
implements racy access/modify the slab color. This is a preparation step
to implement lockless allocation path when there is no free objects in
the kmem_cache.

Below is the result of concurrent allocation/free in slab allocation
benchmark made by Christoph a long time ago. I make the output simpler.
The number shows cycle count during alloc/free respectively so less
is better.

* Before
Kmalloc N*alloc N*free(32): Average=365/806
Kmalloc N*alloc N*free(64): Average=452/690
Kmalloc N*alloc N*free(128): Average=736/886
Kmalloc N*alloc N*free(256): Average=1167/985
Kmalloc N*alloc N*free(512): Average=2088/1125
Kmalloc N*alloc N*free(1024): Average=4115/1184
Kmalloc N*alloc N*free(2048): Average=8451/1748
Kmalloc N*alloc N*free(4096): Average=16024/2048

* After
Kmalloc N*alloc N*free(32): Average=355/750
Kmalloc N*alloc N*free(64): Average=452/812
Kmalloc N*alloc N*free(128): Average=559/1070
Kmalloc N*alloc N*free(256): Average=1176/980
Kmalloc N*alloc N*free(512): Average=1939/1189
Kmalloc N*alloc N*free(1024): Average=3521/1278
Kmalloc N*alloc N*free(2048): Average=7152/1838
Kmalloc N*alloc N*free(4096): Average=13438/2013

It shows that contention is reduced for object size >= 1024
and performance increases by roughly 15%.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index df11757..52fc5e3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2536,20 +2536,7 @@ static int cache_grow(struct kmem_cache *cachep,
}
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
 
-   /* Take the node list lock to change the colour_next on this node */
check_irq_off();
-   n = get_node(cachep, nodeid);
-   spin_lock(>list_lock);
-
-   /* Get colour for the slab, and cal the next value. */
-   offset = n->colour_next;
-   n->colour_next++;
-   if (n->colour_next >= cachep->colour)
-   n->colour_next = 0;
-   spin_unlock(>list_lock);
-
-   offset *= cachep->colour_off;
-
if (gfpflags_allow_blocking(local_flags))
local_irq_enable();
 
@@ -2570,6 +2557,19 @@ static int cache_grow(struct kmem_cache *cachep,
if (!page)
goto failed;
 
+   n = get_node(cachep, nodeid);
+
+   /* Get colour for the slab, and cal the next value. */
+   n->colour_next++;
+   if (n->colour_next >= cachep->colour)
+   n->colour_next = 0;
+
+   offset = n->colour_next;
+   if (offset >= cachep->colour)
+   offset = 0;
+
+   offset *= cachep->colour_off;
+
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
-- 
1.9.1

[PATCH 09/11] mm/slab: separate cache_grow() to two parts

2016-03-27 Thread js1304

From: Joonsoo Kim 

This is a preparation step to implement lockless allocation path when
there is no free objects in kmem_cache. What we'd like to do here is
to refill cpu cache without holding a node lock. To accomplish this
purpose, refill should be done after new slab allocation but before
attaching the slab to the management list. So, this patch separates
cache_grow() to two parts, allocation and attaching to the list
in order to add some code inbetween them in the following patch.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 74 ---
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index ce8ed65..401e60c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -213,6 +213,11 @@ static void slabs_destroy(struct kmem_cache *cachep, 
struct list_head *list);
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
 static void cache_reap(struct work_struct *unused);
 
+static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
+   void **list);
+static inline void fixup_slab_list(struct kmem_cache *cachep,
+   struct kmem_cache_node *n, struct page *page,
+   void **list);
 static int slab_early_init = 1;
 
 #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
@@ -1796,7 +1801,7 @@ static size_t calculate_slab_order(struct kmem_cache 
*cachep,
 
/*
 * Needed to avoid possible looping condition
-* in cache_grow()
+* in cache_grow_begin()
 */
if (OFF_SLAB(freelist_cache))
continue;
@@ -2518,7 +2523,8 @@ static void slab_map_pages(struct kmem_cache *cache, 
struct page *page,
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static struct page *cache_grow_begin(struct kmem_cache *cachep,
+   gfp_t flags, int nodeid)
 {
void *freelist;
size_t offset;
@@ -2584,21 +2590,40 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t 
flags, int nodeid)
 
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   check_irq_off();
-   spin_lock(>list_lock);
 
-   /* Make slab active. */
-   list_add_tail(>lru, &(n->slabs_free));
-   STATS_INC_GROWN(cachep);
-   n->free_objects += cachep->num;
-   spin_unlock(>list_lock);
-   return page_node;
+   return page;
+
 opps1:
kmem_freepages(cachep, page);
 failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   return -1;
+   return NULL;
+}
+
+static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+{
+   struct kmem_cache_node *n;
+   void *list = NULL;
+
+   check_irq_off();
+
+   if (!page)
+   return;
+
+   INIT_LIST_HEAD(>lru);
+   n = get_node(cachep, page_to_nid(page));
+
+   spin_lock(>list_lock);
+   if (!page->active)
+   list_add_tail(>lru, &(n->slabs_free));
+   else
+   fixup_slab_list(cachep, n, page, );
+   STATS_INC_GROWN(cachep);
+   n->free_objects += cachep->num - page->active;
+   spin_unlock(>list_lock);
+
+   fixup_objfreelist_debug(cachep, );
 }
 
 #if DEBUG
@@ -2809,6 +2834,7 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
struct array_cache *ac;
int node;
void *list = NULL;
+   struct page *page;
 
check_irq_off();
node = numa_mem_id();
@@ -2836,7 +2862,6 @@ retry:
}
 
while (batchcount > 0) {
-   struct page *page;
/* Get slab alloc is to come from. */
page = get_first_slab(n, false);
if (!page)
@@ -2869,8 +2894,6 @@ alloc_done:
fixup_objfreelist_debug(cachep, );
 
if (unlikely(!ac->avail)) {
-   int x;
-
/* Check if we can use obj in pfmemalloc slab */
if (sk_memalloc_socks()) {
void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
@@ -2879,14 +2902,18 @@ alloc_done:
return obj;
}
 
-   x = cache_grow(cachep, gfp_exact_node(flags), node);
+   page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
+   cache_grow_end(cachep, page);
 
-   /* cache_grow can reenable interrupts, then ac could change. */
+   /*
+* cache_grow_begin() can reenable interrupts,
+* then ac could change.
+*/

[PATCH 08/11] mm/slab: make cache_grow() handle the page allocated on arbitrary node

2016-03-27 Thread js1304

From: Joonsoo Kim 

Currently, cache_grow() assumes that allocated page's nodeid would be
same with parameter nodeid which is used for allocation request. If
we discard this assumption, we can handle fallback_alloc() case
gracefully. So, this patch makes cache_grow() handle the page allocated
on arbitrary node and clean-up relevant code.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 60 +---
 1 file changed, 21 insertions(+), 39 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 52fc5e3..ce8ed65 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2518,13 +2518,14 @@ static void slab_map_pages(struct kmem_cache *cache, 
struct page *page,
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(struct kmem_cache *cachep,
-   gfp_t flags, int nodeid, struct page *page)
+static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
void *freelist;
size_t offset;
gfp_t local_flags;
+   int page_node;
struct kmem_cache_node *n;
+   struct page *page;
 
/*
 * Be lazy and only check for valid flags here,  keeping it out of the
@@ -2552,12 +2553,12 @@ static int cache_grow(struct kmem_cache *cachep,
 * Get mem for the objs.  Attempt to allocate a physical page from
 * 'nodeid'.
 */
-   if (!page)
-   page = kmem_getpages(cachep, local_flags, nodeid);
+   page = kmem_getpages(cachep, local_flags, nodeid);
if (!page)
goto failed;
 
-   n = get_node(cachep, nodeid);
+   page_node = page_to_nid(page);
+   n = get_node(cachep, page_node);
 
/* Get colour for the slab, and cal the next value. */
n->colour_next++;
@@ -2572,7 +2573,7 @@ static int cache_grow(struct kmem_cache *cachep,
 
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
-   local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
+   local_flags & ~GFP_CONSTRAINT_MASK, page_node);
if (OFF_SLAB(cachep) && !freelist)
goto opps1;
 
@@ -2591,13 +2592,13 @@ static int cache_grow(struct kmem_cache *cachep,
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num;
spin_unlock(>list_lock);
-   return 1;
+   return page_node;
 opps1:
kmem_freepages(cachep, page);
 failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   return 0;
+   return -1;
 }
 
 #if DEBUG
@@ -2878,14 +2879,14 @@ alloc_done:
return obj;
}
 
-   x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);
+   x = cache_grow(cachep, gfp_exact_node(flags), node);
 
/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
node = numa_mem_id();
 
/* no objects in sight? abort */
-   if (!x && ac->avail == 0)
+   if (x < 0 && ac->avail == 0)
return NULL;
 
if (!ac->avail) /* objects refilled by interrupt? */
@@ -3014,7 +3015,6 @@ static void *alternate_node_alloc(struct kmem_cache 
*cachep, gfp_t flags)
 static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
 {
struct zonelist *zonelist;
-   gfp_t local_flags;
struct zoneref *z;
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags);
@@ -3025,8 +3025,6 @@ static void *fallback_alloc(struct kmem_cache *cache, 
gfp_t flags)
if (flags & __GFP_THISNODE)
return NULL;
 
-   local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
-
 retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
zonelist = node_zonelist(mempolicy_slab_node(), flags);
@@ -3056,33 +3054,17 @@ retry:
 * We may trigger various forms of reclaim on the allowed
 * set and go into memory reserves if necessary.
 */
-   struct page *page;
+   nid = cache_grow(cache, flags, numa_mem_id());
+   if (nid >= 0) {
+   obj = cache_alloc_node(cache,
+   gfp_exact_node(flags), nid);
 
-   if (gfpflags_allow_blocking(local_flags))
-   local_irq_enable();
-   kmem_flagcheck(cache, flags);
-   page = kmem_getpages(cache, local_flags, numa_mem_id());
-   if (gfpflags_allow_blocking(local_flags))
-   local_irq_disable();
-   if (page) {
/*
-* Insert into the appropriate per node queues
+*

[PATCH 11/11] mm/slab: lockless decision to grow cache

2016-03-27 Thread js1304

From: Joonsoo Kim 

To check whther free objects exist or not precisely, we need to grab
a lock. But, accuracy isn't that important because race window would
be even small and if there is too much free object, cache reaper would
reap it. So, this patch makes the check for free object exisistence
not to hold a lock. This will reduce lock contention in heavily
allocation case.

Note that until now, n->shared can be freed during the processing by
writing slabinfo, but, with some trick in this patch, we can access it
freely within interrupt disabled period.

Below is the result of concurrent allocation/free in slab allocation
benchmark made by Christoph a long time ago. I make the output simpler.
The number shows cycle count during alloc/free respectively so less
is better.

* Before
Kmalloc N*alloc N*free(32): Average=248/966
Kmalloc N*alloc N*free(64): Average=261/949
Kmalloc N*alloc N*free(128): Average=314/1016
Kmalloc N*alloc N*free(256): Average=741/1061
Kmalloc N*alloc N*free(512): Average=1246/1152
Kmalloc N*alloc N*free(1024): Average=2437/1259
Kmalloc N*alloc N*free(2048): Average=4980/1800
Kmalloc N*alloc N*free(4096): Average=9000/2078

* After
Kmalloc N*alloc N*free(32): Average=344/792
Kmalloc N*alloc N*free(64): Average=347/882
Kmalloc N*alloc N*free(128): Average=390/959
Kmalloc N*alloc N*free(256): Average=393/1067
Kmalloc N*alloc N*free(512): Average=683/1229
Kmalloc N*alloc N*free(1024): Average=1295/1325
Kmalloc N*alloc N*free(2048): Average=2513/1664
Kmalloc N*alloc N*free(4096): Average=4742/2172

It shows that allocation performance decreases for the object size
up to 128 and it may be due to extra checks in cache_alloc_refill().
But, with considering improvement of free performance, net result looks
the same. Result for other size class looks very promising, roughly,
50% performance improvement.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 21 ++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 029d6b3..b70aabf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -951,6 +951,15 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
spin_unlock_irq(>list_lock);
slabs_destroy(cachep, );
 
+   /*
+* To protect lockless access to n->shared during irq disabled context.
+* If n->shared isn't NULL in irq disabled context, accessing to it is
+* guaranteed to be valid until irq is re-enabled, because it will be
+* freed after kick_all_cpus_sync().
+*/
+   if (force_change)
+   kick_all_cpus_sync();
+
 fail:
kfree(old_shared);
kfree(new_shared);
@@ -2855,7 +2864,7 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
 {
int batchcount;
struct kmem_cache_node *n;
-   struct array_cache *ac;
+   struct array_cache *ac, *shared;
int node;
void *list = NULL;
struct page *page;
@@ -2876,11 +2885,16 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
n = get_node(cachep, node);
 
BUG_ON(ac->avail > 0 || !n);
+   shared = READ_ONCE(n->shared);
+   if (!n->free_objects && (!shared || !shared->avail))
+   goto direct_grow;
+
spin_lock(>list_lock);
+   shared = READ_ONCE(n->shared);
 
/* See if we can refill from the shared array */
-   if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
-   n->shared->touched = 1;
+   if (shared && transfer_objects(ac, shared, batchcount)) {
+   shared->touched = 1;
goto alloc_done;
}
 
@@ -2902,6 +2916,7 @@ alloc_done:
spin_unlock(>list_lock);
fixup_objfreelist_debug(cachep, );
 
+direct_grow:
if (unlikely(!ac->avail)) {
/* Check if we can use obj in pfmemalloc slab */
if (sk_memalloc_socks()) {
-- 
1.9.1

[PATCH 09/11] mm/slab: separate cache_grow() to two parts

2016-03-27 Thread js1304

From: Joonsoo Kim 

This is a preparation step to implement lockless allocation path when
there is no free objects in kmem_cache. What we'd like to do here is
to refill cpu cache without holding a node lock. To accomplish this
purpose, refill should be done after new slab allocation but before
attaching the slab to the management list. So, this patch separates
cache_grow() to two parts, allocation and attaching to the list
in order to add some code inbetween them in the following patch.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 74 ---
 1 file changed, 52 insertions(+), 22 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index ce8ed65..401e60c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -213,6 +213,11 @@ static void slabs_destroy(struct kmem_cache *cachep, 
struct list_head *list);
 static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
 static void cache_reap(struct work_struct *unused);
 
+static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
+   void **list);
+static inline void fixup_slab_list(struct kmem_cache *cachep,
+   struct kmem_cache_node *n, struct page *page,
+   void **list);
 static int slab_early_init = 1;
 
 #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
@@ -1796,7 +1801,7 @@ static size_t calculate_slab_order(struct kmem_cache 
*cachep,
 
/*
 * Needed to avoid possible looping condition
-* in cache_grow()
+* in cache_grow_begin()
 */
if (OFF_SLAB(freelist_cache))
continue;
@@ -2518,7 +2523,8 @@ static void slab_map_pages(struct kmem_cache *cache, 
struct page *page,
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static struct page *cache_grow_begin(struct kmem_cache *cachep,
+   gfp_t flags, int nodeid)
 {
void *freelist;
size_t offset;
@@ -2584,21 +2590,40 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t 
flags, int nodeid)
 
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   check_irq_off();
-   spin_lock(>list_lock);
 
-   /* Make slab active. */
-   list_add_tail(>lru, &(n->slabs_free));
-   STATS_INC_GROWN(cachep);
-   n->free_objects += cachep->num;
-   spin_unlock(>list_lock);
-   return page_node;
+   return page;
+
 opps1:
kmem_freepages(cachep, page);
 failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   return -1;
+   return NULL;
+}
+
+static void cache_grow_end(struct kmem_cache *cachep, struct page *page)
+{
+   struct kmem_cache_node *n;
+   void *list = NULL;
+
+   check_irq_off();
+
+   if (!page)
+   return;
+
+   INIT_LIST_HEAD(>lru);
+   n = get_node(cachep, page_to_nid(page));
+
+   spin_lock(>list_lock);
+   if (!page->active)
+   list_add_tail(>lru, &(n->slabs_free));
+   else
+   fixup_slab_list(cachep, n, page, );
+   STATS_INC_GROWN(cachep);
+   n->free_objects += cachep->num - page->active;
+   spin_unlock(>list_lock);
+
+   fixup_objfreelist_debug(cachep, );
 }
 
 #if DEBUG
@@ -2809,6 +2834,7 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
struct array_cache *ac;
int node;
void *list = NULL;
+   struct page *page;
 
check_irq_off();
node = numa_mem_id();
@@ -2836,7 +2862,6 @@ retry:
}
 
while (batchcount > 0) {
-   struct page *page;
/* Get slab alloc is to come from. */
page = get_first_slab(n, false);
if (!page)
@@ -2869,8 +2894,6 @@ alloc_done:
fixup_objfreelist_debug(cachep, );
 
if (unlikely(!ac->avail)) {
-   int x;
-
/* Check if we can use obj in pfmemalloc slab */
if (sk_memalloc_socks()) {
void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
@@ -2879,14 +2902,18 @@ alloc_done:
return obj;
}
 
-   x = cache_grow(cachep, gfp_exact_node(flags), node);
+   page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
+   cache_grow_end(cachep, page);
 
-   /* cache_grow can reenable interrupts, then ac could change. */
+   /*
+* cache_grow_begin() can reenable interrupts,
+* then ac could change.
+*/
ac = cpu_cache_get(cachep);

[PATCH 08/11] mm/slab: make cache_grow() handle the page allocated on arbitrary node

2016-03-27 Thread js1304

From: Joonsoo Kim 

Currently, cache_grow() assumes that allocated page's nodeid would be
same with parameter nodeid which is used for allocation request. If
we discard this assumption, we can handle fallback_alloc() case
gracefully. So, this patch makes cache_grow() handle the page allocated
on arbitrary node and clean-up relevant code.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 60 +---
 1 file changed, 21 insertions(+), 39 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 52fc5e3..ce8ed65 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2518,13 +2518,14 @@ static void slab_map_pages(struct kmem_cache *cache, 
struct page *page,
  * Grow (by 1) the number of slabs within a cache.  This is called by
  * kmem_cache_alloc() when there are no active objs left in a cache.
  */
-static int cache_grow(struct kmem_cache *cachep,
-   gfp_t flags, int nodeid, struct page *page)
+static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
void *freelist;
size_t offset;
gfp_t local_flags;
+   int page_node;
struct kmem_cache_node *n;
+   struct page *page;
 
/*
 * Be lazy and only check for valid flags here,  keeping it out of the
@@ -2552,12 +2553,12 @@ static int cache_grow(struct kmem_cache *cachep,
 * Get mem for the objs.  Attempt to allocate a physical page from
 * 'nodeid'.
 */
-   if (!page)
-   page = kmem_getpages(cachep, local_flags, nodeid);
+   page = kmem_getpages(cachep, local_flags, nodeid);
if (!page)
goto failed;
 
-   n = get_node(cachep, nodeid);
+   page_node = page_to_nid(page);
+   n = get_node(cachep, page_node);
 
/* Get colour for the slab, and cal the next value. */
n->colour_next++;
@@ -2572,7 +2573,7 @@ static int cache_grow(struct kmem_cache *cachep,
 
/* Get slab management. */
freelist = alloc_slabmgmt(cachep, page, offset,
-   local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
+   local_flags & ~GFP_CONSTRAINT_MASK, page_node);
if (OFF_SLAB(cachep) && !freelist)
goto opps1;
 
@@ -2591,13 +2592,13 @@ static int cache_grow(struct kmem_cache *cachep,
STATS_INC_GROWN(cachep);
n->free_objects += cachep->num;
spin_unlock(>list_lock);
-   return 1;
+   return page_node;
 opps1:
kmem_freepages(cachep, page);
 failed:
if (gfpflags_allow_blocking(local_flags))
local_irq_disable();
-   return 0;
+   return -1;
 }
 
 #if DEBUG
@@ -2878,14 +2879,14 @@ alloc_done:
return obj;
}
 
-   x = cache_grow(cachep, gfp_exact_node(flags), node, NULL);
+   x = cache_grow(cachep, gfp_exact_node(flags), node);
 
/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
node = numa_mem_id();
 
/* no objects in sight? abort */
-   if (!x && ac->avail == 0)
+   if (x < 0 && ac->avail == 0)
return NULL;
 
if (!ac->avail) /* objects refilled by interrupt? */
@@ -3014,7 +3015,6 @@ static void *alternate_node_alloc(struct kmem_cache 
*cachep, gfp_t flags)
 static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
 {
struct zonelist *zonelist;
-   gfp_t local_flags;
struct zoneref *z;
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags);
@@ -3025,8 +3025,6 @@ static void *fallback_alloc(struct kmem_cache *cache, 
gfp_t flags)
if (flags & __GFP_THISNODE)
return NULL;
 
-   local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
-
 retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
zonelist = node_zonelist(mempolicy_slab_node(), flags);
@@ -3056,33 +3054,17 @@ retry:
 * We may trigger various forms of reclaim on the allowed
 * set and go into memory reserves if necessary.
 */
-   struct page *page;
+   nid = cache_grow(cache, flags, numa_mem_id());
+   if (nid >= 0) {
+   obj = cache_alloc_node(cache,
+   gfp_exact_node(flags), nid);
 
-   if (gfpflags_allow_blocking(local_flags))
-   local_irq_enable();
-   kmem_flagcheck(cache, flags);
-   page = kmem_getpages(cache, local_flags, numa_mem_id());
-   if (gfpflags_allow_blocking(local_flags))
-   local_irq_disable();
-   if (page) {
/*
-* Insert into the appropriate per node queues
+* Another processor may allocate the objects in
+

[PATCH 11/11] mm/slab: lockless decision to grow cache

2016-03-27 Thread js1304

From: Joonsoo Kim 

To check whther free objects exist or not precisely, we need to grab
a lock. But, accuracy isn't that important because race window would
be even small and if there is too much free object, cache reaper would
reap it. So, this patch makes the check for free object exisistence
not to hold a lock. This will reduce lock contention in heavily
allocation case.

Note that until now, n->shared can be freed during the processing by
writing slabinfo, but, with some trick in this patch, we can access it
freely within interrupt disabled period.

Below is the result of concurrent allocation/free in slab allocation
benchmark made by Christoph a long time ago. I make the output simpler.
The number shows cycle count during alloc/free respectively so less
is better.

* Before
Kmalloc N*alloc N*free(32): Average=248/966
Kmalloc N*alloc N*free(64): Average=261/949
Kmalloc N*alloc N*free(128): Average=314/1016
Kmalloc N*alloc N*free(256): Average=741/1061
Kmalloc N*alloc N*free(512): Average=1246/1152
Kmalloc N*alloc N*free(1024): Average=2437/1259
Kmalloc N*alloc N*free(2048): Average=4980/1800
Kmalloc N*alloc N*free(4096): Average=9000/2078

* After
Kmalloc N*alloc N*free(32): Average=344/792
Kmalloc N*alloc N*free(64): Average=347/882
Kmalloc N*alloc N*free(128): Average=390/959
Kmalloc N*alloc N*free(256): Average=393/1067
Kmalloc N*alloc N*free(512): Average=683/1229
Kmalloc N*alloc N*free(1024): Average=1295/1325
Kmalloc N*alloc N*free(2048): Average=2513/1664
Kmalloc N*alloc N*free(4096): Average=4742/2172

It shows that allocation performance decreases for the object size
up to 128 and it may be due to extra checks in cache_alloc_refill().
But, with considering improvement of free performance, net result looks
the same. Result for other size class looks very promising, roughly,
50% performance improvement.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 21 ++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 029d6b3..b70aabf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -951,6 +951,15 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep,
spin_unlock_irq(>list_lock);
slabs_destroy(cachep, );
 
+   /*
+* To protect lockless access to n->shared during irq disabled context.
+* If n->shared isn't NULL in irq disabled context, accessing to it is
+* guaranteed to be valid until irq is re-enabled, because it will be
+* freed after kick_all_cpus_sync().
+*/
+   if (force_change)
+   kick_all_cpus_sync();
+
 fail:
kfree(old_shared);
kfree(new_shared);
@@ -2855,7 +2864,7 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
 {
int batchcount;
struct kmem_cache_node *n;
-   struct array_cache *ac;
+   struct array_cache *ac, *shared;
int node;
void *list = NULL;
struct page *page;
@@ -2876,11 +2885,16 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
n = get_node(cachep, node);
 
BUG_ON(ac->avail > 0 || !n);
+   shared = READ_ONCE(n->shared);
+   if (!n->free_objects && (!shared || !shared->avail))
+   goto direct_grow;
+
spin_lock(>list_lock);
+   shared = READ_ONCE(n->shared);
 
/* See if we can refill from the shared array */
-   if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
-   n->shared->touched = 1;
+   if (shared && transfer_objects(ac, shared, batchcount)) {
+   shared->touched = 1;
goto alloc_done;
}
 
@@ -2902,6 +2916,7 @@ alloc_done:
spin_unlock(>list_lock);
fixup_objfreelist_debug(cachep, );
 
+direct_grow:
if (unlikely(!ac->avail)) {
/* Check if we can use obj in pfmemalloc slab */
if (sk_memalloc_socks()) {
-- 
1.9.1

[PATCH 01/11] mm/slab: hold a slab_mutex when calling __kmem_cache_shrink()

2016-03-27 Thread js1304

From: Joonsoo Kim 

Major kmem_cache metadata in slab subsystem is synchronized with
the slab_mutex. In SLAB, if some of them is changed, node's shared
array cache would be freed and re-populated. If __kmem_cache_shrink()
is called at the same time, it will call drain_array() with n->shared
without holding node lock so problem can happen.

We can fix this small theoretical race condition by holding node lock
in drain_array(), but, holding a slab_mutex in kmem_cache_shrink()
looks more appropriate solution because stable state would make things
less error-prone and this is not performance critical path.

In addtion, annotate on SLAB functions.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c| 2 ++
 mm/slab_common.c | 4 
 2 files changed, 6 insertions(+)

diff --git a/mm/slab.c b/mm/slab.c
index a53a0f6..043606a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2218,6 +2218,7 @@ static void do_drain(void *arg)
ac->avail = 0;
 }
 
+/* Should be called with slab_mutex to prevent from freeing shared array */
 static void drain_cpu_caches(struct kmem_cache *cachep)
 {
struct kmem_cache_node *n;
@@ -3871,6 +3872,7 @@ skip_setup:
  * Drain an array if it contains any elements taking the node lock only if
  * necessary. Note that the node listlock also protects the array_cache
  * if drain_array() is used on the shared array.
+ * Should be called with slab_mutex to prevent from freeing shared array.
  */
 static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
 struct array_cache *ac, int force, int node)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a65dad7..5bed565 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -755,7 +755,11 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
get_online_cpus();
get_online_mems();
kasan_cache_shrink(cachep);
+
+   mutex_lock(_mutex);
ret = __kmem_cache_shrink(cachep, false);
+   mutex_unlock(_mutex);
+
put_online_mems();
put_online_cpus();
return ret;
-- 
1.9.1

[PATCH 10/11] mm/slab: refill cpu cache through a new slab without holding a node lock

2016-03-27 Thread js1304

From: Joonsoo Kim 

Until now, cache growing makes a free slab on node's slab list and then
we can allocate free objects from it. This necessarily requires
to hold a node lock which is very contended. If we refill cpu cache
before attaching it to node's slab list, we can avoid holding a node lock
as much as possible because this newly allocated slab is only visible
to the current task. This will reduce lock contention.

Below is the result of concurrent allocation/free in slab allocation
benchmark made by Christoph a long time ago. I make the output simpler.
The number shows cycle count during alloc/free respectively so less
is better.

* Before
Kmalloc N*alloc N*free(32): Average=355/750
Kmalloc N*alloc N*free(64): Average=452/812
Kmalloc N*alloc N*free(128): Average=559/1070
Kmalloc N*alloc N*free(256): Average=1176/980
Kmalloc N*alloc N*free(512): Average=1939/1189
Kmalloc N*alloc N*free(1024): Average=3521/1278
Kmalloc N*alloc N*free(2048): Average=7152/1838
Kmalloc N*alloc N*free(4096): Average=13438/2013

* After
Kmalloc N*alloc N*free(32): Average=248/966
Kmalloc N*alloc N*free(64): Average=261/949
Kmalloc N*alloc N*free(128): Average=314/1016
Kmalloc N*alloc N*free(256): Average=741/1061
Kmalloc N*alloc N*free(512): Average=1246/1152
Kmalloc N*alloc N*free(1024): Average=2437/1259
Kmalloc N*alloc N*free(2048): Average=4980/1800
Kmalloc N*alloc N*free(4096): Average=9000/2078

It shows that contention is reduced for all the object sizes
and performance increases by 30 ~ 40%.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 68 +--
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 401e60c..029d6b3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2827,6 +2827,30 @@ static noinline void *cache_alloc_pfmemalloc(struct 
kmem_cache *cachep,
return obj;
 }
 
+/*
+ * Slab list should be fixed up by fixup_slab_list() for existing slab
+ * or cache_grow_end() for new slab
+ */
+static __always_inline int alloc_block(struct kmem_cache *cachep,
+   struct array_cache *ac, struct page *page, int batchcount)
+{
+   /*
+* There must be at least one object available for
+* allocation.
+*/
+   BUG_ON(page->active >= cachep->num);
+
+   while (page->active < cachep->num && batchcount--) {
+   STATS_INC_ALLOCED(cachep);
+   STATS_INC_ACTIVE(cachep);
+   STATS_SET_HIGH(cachep);
+
+   ac->entry[ac->avail++] = slab_get_obj(cachep, page);
+   }
+
+   return batchcount;
+}
+
 static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
 {
int batchcount;
@@ -2839,7 +2863,6 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
check_irq_off();
node = numa_mem_id();
 
-retry:
ac = cpu_cache_get(cachep);
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -2869,21 +2892,7 @@ retry:
 
check_spinlock_acquired(cachep);
 
-   /*
-* The slab was either on partial or free list so
-* there must be at least one object available for
-* allocation.
-*/
-   BUG_ON(page->active >= cachep->num);
-
-   while (page->active < cachep->num && batchcount--) {
-   STATS_INC_ALLOCED(cachep);
-   STATS_INC_ACTIVE(cachep);
-   STATS_SET_HIGH(cachep);
-
-   ac->entry[ac->avail++] = slab_get_obj(cachep, page);
-   }
-
+   batchcount = alloc_block(cachep, ac, page, batchcount);
fixup_slab_list(cachep, n, page, );
}
 
@@ -2903,21 +2912,18 @@ alloc_done:
}
 
page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
-   cache_grow_end(cachep, page);
 
/*
 * cache_grow_begin() can reenable interrupts,
 * then ac could change.
 */
ac = cpu_cache_get(cachep);
-   node = numa_mem_id();
+   if (!ac->avail && page)
+   alloc_block(cachep, ac, page, batchcount);
+   cache_grow_end(cachep, page);
 
-   /* no objects in sight? abort */
-   if (!page && ac->avail == 0)
+   if (!ac->avail)
return NULL;
-
-   if (!ac->avail) /* objects refilled by interrupt? */
-   goto retry;
}
ac->touched = 1;
 
@@ -3111,14 +3117,13 @@ static void *cache_alloc_node(struct kmem_cache 
*cachep, gfp_t flags,
 {
struct page *page;
struct kmem_cache_node *n;
-   void *obj;
+   void *obj = NULL;
void *list = NULL;
 
VM_BUG_ON(nodeid < 0 || nodeid

[PATCH 10/11] mm/slab: refill cpu cache through a new slab without holding a node lock

2016-03-27 Thread js1304

From: Joonsoo Kim 

Until now, cache growing makes a free slab on node's slab list and then
we can allocate free objects from it. This necessarily requires
to hold a node lock which is very contended. If we refill cpu cache
before attaching it to node's slab list, we can avoid holding a node lock
as much as possible because this newly allocated slab is only visible
to the current task. This will reduce lock contention.

Below is the result of concurrent allocation/free in slab allocation
benchmark made by Christoph a long time ago. I make the output simpler.
The number shows cycle count during alloc/free respectively so less
is better.

* Before
Kmalloc N*alloc N*free(32): Average=355/750
Kmalloc N*alloc N*free(64): Average=452/812
Kmalloc N*alloc N*free(128): Average=559/1070
Kmalloc N*alloc N*free(256): Average=1176/980
Kmalloc N*alloc N*free(512): Average=1939/1189
Kmalloc N*alloc N*free(1024): Average=3521/1278
Kmalloc N*alloc N*free(2048): Average=7152/1838
Kmalloc N*alloc N*free(4096): Average=13438/2013

* After
Kmalloc N*alloc N*free(32): Average=248/966
Kmalloc N*alloc N*free(64): Average=261/949
Kmalloc N*alloc N*free(128): Average=314/1016
Kmalloc N*alloc N*free(256): Average=741/1061
Kmalloc N*alloc N*free(512): Average=1246/1152
Kmalloc N*alloc N*free(1024): Average=2437/1259
Kmalloc N*alloc N*free(2048): Average=4980/1800
Kmalloc N*alloc N*free(4096): Average=9000/2078

It shows that contention is reduced for all the object sizes
and performance increases by 30 ~ 40%.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 68 +--
 1 file changed, 36 insertions(+), 32 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 401e60c..029d6b3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2827,6 +2827,30 @@ static noinline void *cache_alloc_pfmemalloc(struct 
kmem_cache *cachep,
return obj;
 }
 
+/*
+ * Slab list should be fixed up by fixup_slab_list() for existing slab
+ * or cache_grow_end() for new slab
+ */
+static __always_inline int alloc_block(struct kmem_cache *cachep,
+   struct array_cache *ac, struct page *page, int batchcount)
+{
+   /*
+* There must be at least one object available for
+* allocation.
+*/
+   BUG_ON(page->active >= cachep->num);
+
+   while (page->active < cachep->num && batchcount--) {
+   STATS_INC_ALLOCED(cachep);
+   STATS_INC_ACTIVE(cachep);
+   STATS_SET_HIGH(cachep);
+
+   ac->entry[ac->avail++] = slab_get_obj(cachep, page);
+   }
+
+   return batchcount;
+}
+
 static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
 {
int batchcount;
@@ -2839,7 +2863,6 @@ static void *cache_alloc_refill(struct kmem_cache 
*cachep, gfp_t flags)
check_irq_off();
node = numa_mem_id();
 
-retry:
ac = cpu_cache_get(cachep);
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -2869,21 +2892,7 @@ retry:
 
check_spinlock_acquired(cachep);
 
-   /*
-* The slab was either on partial or free list so
-* there must be at least one object available for
-* allocation.
-*/
-   BUG_ON(page->active >= cachep->num);
-
-   while (page->active < cachep->num && batchcount--) {
-   STATS_INC_ALLOCED(cachep);
-   STATS_INC_ACTIVE(cachep);
-   STATS_SET_HIGH(cachep);
-
-   ac->entry[ac->avail++] = slab_get_obj(cachep, page);
-   }
-
+   batchcount = alloc_block(cachep, ac, page, batchcount);
fixup_slab_list(cachep, n, page, );
}
 
@@ -2903,21 +2912,18 @@ alloc_done:
}
 
page = cache_grow_begin(cachep, gfp_exact_node(flags), node);
-   cache_grow_end(cachep, page);
 
/*
 * cache_grow_begin() can reenable interrupts,
 * then ac could change.
 */
ac = cpu_cache_get(cachep);
-   node = numa_mem_id();
+   if (!ac->avail && page)
+   alloc_block(cachep, ac, page, batchcount);
+   cache_grow_end(cachep, page);
 
-   /* no objects in sight? abort */
-   if (!page && ac->avail == 0)
+   if (!ac->avail)
return NULL;
-
-   if (!ac->avail) /* objects refilled by interrupt? */
-   goto retry;
}
ac->touched = 1;
 
@@ -3111,14 +3117,13 @@ static void *cache_alloc_node(struct kmem_cache 
*cachep, gfp_t flags,
 {
struct page *page;
struct kmem_cache_node *n;
-   void *obj;
+   void *obj = NULL;
void *list = NULL;
 
VM_BUG_ON(nodeid < 0 || nodeid >= MAX_NUMNODES);
n = get_node(cachep,

[PATCH 01/11] mm/slab: hold a slab_mutex when calling __kmem_cache_shrink()

2016-03-27 Thread js1304

From: Joonsoo Kim 

Major kmem_cache metadata in slab subsystem is synchronized with
the slab_mutex. In SLAB, if some of them is changed, node's shared
array cache would be freed and re-populated. If __kmem_cache_shrink()
is called at the same time, it will call drain_array() with n->shared
without holding node lock so problem can happen.

We can fix this small theoretical race condition by holding node lock
in drain_array(), but, holding a slab_mutex in kmem_cache_shrink()
looks more appropriate solution because stable state would make things
less error-prone and this is not performance critical path.

In addtion, annotate on SLAB functions.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c| 2 ++
 mm/slab_common.c | 4 
 2 files changed, 6 insertions(+)

diff --git a/mm/slab.c b/mm/slab.c
index a53a0f6..043606a 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2218,6 +2218,7 @@ static void do_drain(void *arg)
ac->avail = 0;
 }
 
+/* Should be called with slab_mutex to prevent from freeing shared array */
 static void drain_cpu_caches(struct kmem_cache *cachep)
 {
struct kmem_cache_node *n;
@@ -3871,6 +3872,7 @@ skip_setup:
  * Drain an array if it contains any elements taking the node lock only if
  * necessary. Note that the node listlock also protects the array_cache
  * if drain_array() is used on the shared array.
+ * Should be called with slab_mutex to prevent from freeing shared array.
  */
 static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
 struct array_cache *ac, int force, int node)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index a65dad7..5bed565 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -755,7 +755,11 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
get_online_cpus();
get_online_mems();
kasan_cache_shrink(cachep);
+
+   mutex_lock(_mutex);
ret = __kmem_cache_shrink(cachep, false);
+   mutex_unlock(_mutex);
+
put_online_mems();
put_online_cpus();
return ret;
-- 
1.9.1

[PATCH 02/11] mm/slab: remove BAD_ALIEN_MAGIC again

2016-03-27 Thread js1304

From: Joonsoo Kim 

Initial attemp to remove BAD_ALIEN_MAGIC is once reverted by
'commit edcad2509550 ("Revert "slab: remove BAD_ALIEN_MAGIC"")'
because it causes a problem on m68k which has many node
but !CONFIG_NUMA. In this case, although alien cache isn't used
at all but to cope with some initialization path, garbage value
is used and that is BAD_ALIEN_MAGIC. Now, this patch set
use_alien_caches to 0 when !CONFIG_NUMA, there is no initialization
path problem so we don't need BAD_ALIEN_MAGIC at all. So remove it.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 043606a..a5a205b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -421,8 +421,6 @@ static struct kmem_cache kmem_cache_boot = {
.name = "kmem_cache",
 };
 
-#define BAD_ALIEN_MAGIC 0x01020304ul
-
 static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -637,7 +635,7 @@ static int transfer_objects(struct array_cache *to,
 static inline struct alien_cache **alloc_alien_cache(int node,
int limit, gfp_t gfp)
 {
-   return (struct alien_cache **)BAD_ALIEN_MAGIC;
+   return NULL;
 }
 
 static inline void free_alien_cache(struct alien_cache **ac_ptr)
@@ -1205,7 +1203,7 @@ void __init kmem_cache_init(void)
sizeof(struct rcu_head));
kmem_cache = _cache_boot;
 
-   if (num_possible_nodes() == 1)
+   if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
use_alien_caches = 0;
 
for (i = 0; i < NUM_INIT_LISTS; i++)
-- 
1.9.1

[PATCH 02/11] mm/slab: remove BAD_ALIEN_MAGIC again

2016-03-27 Thread js1304

From: Joonsoo Kim 

Initial attemp to remove BAD_ALIEN_MAGIC is once reverted by
'commit edcad2509550 ("Revert "slab: remove BAD_ALIEN_MAGIC"")'
because it causes a problem on m68k which has many node
but !CONFIG_NUMA. In this case, although alien cache isn't used
at all but to cope with some initialization path, garbage value
is used and that is BAD_ALIEN_MAGIC. Now, this patch set
use_alien_caches to 0 when !CONFIG_NUMA, there is no initialization
path problem so we don't need BAD_ALIEN_MAGIC at all. So remove it.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 043606a..a5a205b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -421,8 +421,6 @@ static struct kmem_cache kmem_cache_boot = {
.name = "kmem_cache",
 };
 
-#define BAD_ALIEN_MAGIC 0x01020304ul
-
 static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -637,7 +635,7 @@ static int transfer_objects(struct array_cache *to,
 static inline struct alien_cache **alloc_alien_cache(int node,
int limit, gfp_t gfp)
 {
-   return (struct alien_cache **)BAD_ALIEN_MAGIC;
+   return NULL;
 }
 
 static inline void free_alien_cache(struct alien_cache **ac_ptr)
@@ -1205,7 +1203,7 @@ void __init kmem_cache_init(void)
sizeof(struct rcu_head));
kmem_cache = _cache_boot;
 
-   if (num_possible_nodes() == 1)
+   if (!IS_ENABLED(CONFIG_NUMA) || num_possible_nodes() == 1)
use_alien_caches = 0;
 
for (i = 0; i < NUM_INIT_LISTS; i++)
-- 
1.9.1

[PATCH 03/11] mm/slab: drain the free slab as much as possible

2016-03-27 Thread js1304

From: Joonsoo Kim 

slabs_tofree() implies freeing all free slab. We can do it with
just providing INT_MAX.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index a5a205b..ba2eacf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -888,12 +888,6 @@ static int init_cache_node_node(int node)
return 0;
 }
 
-static inline int slabs_tofree(struct kmem_cache *cachep,
-   struct kmem_cache_node *n)
-{
-   return (n->free_objects + cachep->num - 1) / cachep->num;
-}
-
 static void cpuup_canceled(long cpu)
 {
struct kmem_cache *cachep;
@@ -958,7 +952,7 @@ free_slab:
n = get_node(cachep, node);
if (!n)
continue;
-   drain_freelist(cachep, n, slabs_tofree(cachep, n));
+   drain_freelist(cachep, n, INT_MAX);
}
 }
 
@@ -1110,7 +1104,7 @@ static int __meminit drain_cache_node_node(int node)
if (!n)
continue;
 
-   drain_freelist(cachep, n, slabs_tofree(cachep, n));
+   drain_freelist(cachep, n, INT_MAX);
 
if (!list_empty(>slabs_full) ||
!list_empty(>slabs_partial)) {
@@ -2280,7 +2274,7 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool 
deactivate)
 
check_irq_on();
for_each_kmem_cache_node(cachep, node, n) {
-   drain_freelist(cachep, n, slabs_tofree(cachep, n));
+   drain_freelist(cachep, n, INT_MAX);
 
ret += !list_empty(>slabs_full) ||
!list_empty(>slabs_partial);
-- 
1.9.1

RE: [PATCH v2 5/6] Documentation: DT: vdma: update binding doc for AXI CDMA

2016-03-27 Thread Appana Durga Kedareswara Rao

Hi Soren,

> -Original Message-
> From: Sören Brinkmann [mailto:soren.brinkm...@xilinx.com]
> Sent: Monday, March 28, 2016 12:58 AM
> To: Appana Durga Kedareswara Rao
> Cc: robh...@kernel.org; pawel.m...@arm.com; mark.rutl...@arm.com;
> ijc+devicet...@hellion.org.uk; ga...@codeaurora.org; Michal Simek;
> vinod.k...@intel.com; dan.j.willi...@intel.com; Anurag Kumar Vulisha; Appana
> Durga Kedareswara Rao; moritz.fisc...@ettus.com;
> laurent.pinch...@ideasonboard.com; l...@debethencourt.com; Srikanth
> Vemula; Anirudha Sarangi; devicet...@vger.kernel.org; linux-arm-
> ker...@lists.infradead.org; linux-kernel@vger.kernel.org;
> dmaeng...@vger.kernel.org
> Subject: Re: [PATCH v2 5/6] Documentation: DT: vdma: update binding doc for
> AXI CDMA
> 
> On Sun, 2016-03-27 at 23:36:06 +0530, Kedareswara rao Appana wrote:
> > This patch updates the device-tree binding doc for adding support for
> > AXI CDMA.
> >
> > Signed-off-by: Kedareswara rao Appana 
> > ---
> > ---> Modified commit message as suggested by Vinod.
> > ---> Moved the patch to forward in the series as suggested by vinod.
> >
> >  .../devicetree/bindings/dma/xilinx/xilinx_vdma.txt | 18
> +-
> >  1 file changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git
> > a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > index 5841421..2b0c12b 100644
> > --- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > +++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > @@ -8,8 +8,12 @@ target devices. It can be configured to have one channel
> or two channels.
> >  If configured as two channels, one is to transmit to the device and
> > another  is to receive from the device.
> >
> > +Xilinx AXI CDMA engine, it does transfers between memory-mapped
> > +source address and a memory-mapped destination address.
> > +
> >  Required properties:
> > -- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a"
> > +- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a" or
> > + "xlnx,axi-cdma-1.00.a""
> >  - #dma-cells: Should be <1>, see "dmas" property below
> >  - reg: Should contain VDMA registers location and length.
> >  - xlnx,num-fstores: Should be the number of framebuffers as configured in
> h/w.
> > @@ -80,6 +84,18 @@ axi_dma_0: axidma@4040 {
> > } ;
> >  } ;
> >
> > +axi_cdma_0: axicdma@7e20 {
> > +   compatible = "xlnx,axi-cdma-1.00.a";
> > +   #dma-cells = <1>;
> > +   reg = < 0x7e20 0x1 >;
> > +   xlnx,addrwidth = <0x20>;
> > +   dma-channel@7e20 {
> > +   compatible = "xlnx,axi-dma-mm2s-channel";
> > +   interrupts = < 0 55 4 >;
> > +   xlnx,datawidth = <0x40>;
> > +   } ;
> > +} ;
> 
> As in the other patch, the node name should be 'dma-controller@...' and the
> inconsistend spacing could be fixed.

Ok will fix...

> 
> Also, it seems this adds pretty much identical examples that just differ in 
> the
> compat string. Is that really needed?

Most of the properties are same across the three DMA's
For AXI VDMA there are few required properties that are not required for AXI 
DMA/CDMA.
That's why added example for the other IP's as well I mean for AXI DMA and CDMA.

Regards,
Kedar.

> 
>   Sören

mm/slab: reduce lock contention in alloc path

2016-03-27 Thread js1304

From: Joonsoo Kim 

While processing concurrent allocation, SLAB could be contended
a lot because it did a lots of work with holding a lock. This
patchset try to reduce the number of critical section to reduce
lock contention. Major changes are lockless decision to allocate
more slab and lockless cpu cache refill from the newly allocated slab.

Below is the result of concurrent allocation/free in slab allocation
benchmark made by Christoph a long time ago. I make the output simpler.
The number shows cycle count during alloc/free respectively so less
is better.

* Before
Kmalloc N*alloc N*free(32): Average=365/806
Kmalloc N*alloc N*free(64): Average=452/690
Kmalloc N*alloc N*free(128): Average=736/886
Kmalloc N*alloc N*free(256): Average=1167/985
Kmalloc N*alloc N*free(512): Average=2088/1125
Kmalloc N*alloc N*free(1024): Average=4115/1184
Kmalloc N*alloc N*free(2048): Average=8451/1748
Kmalloc N*alloc N*free(4096): Average=16024/2048

* After
Kmalloc N*alloc N*free(32): Average=344/792
Kmalloc N*alloc N*free(64): Average=347/882
Kmalloc N*alloc N*free(128): Average=390/959
Kmalloc N*alloc N*free(256): Average=393/1067
Kmalloc N*alloc N*free(512): Average=683/1229
Kmalloc N*alloc N*free(1024): Average=1295/1325
Kmalloc N*alloc N*free(2048): Average=2513/1664
Kmalloc N*alloc N*free(4096): Average=4742/2172

It shows that performance improves greatly (roughly more than 50%)
for the object class whose size is more than 128 bytes.

Thanks.

Joonsoo Kim (11):
  mm/slab: hold a slab_mutex when calling __kmem_cache_shrink()
  mm/slab: remove BAD_ALIEN_MAGIC again
  mm/slab: drain the free slab as much as possible
  mm/slab: factor out kmem_cache_node initialization code
  mm/slab: clean-up kmem_cache_node setup
  mm/slab: don't keep free slabs if free_objects exceeds free_limit
  mm/slab: racy access/modify the slab color
  mm/slab: make cache_grow() handle the page allocated on arbitrary node
  mm/slab: separate cache_grow() to two parts
  mm/slab: refill cpu cache through a new slab without holding a node
lock
  mm/slab: lockless decision to grow cache

 mm/slab.c| 495 ---
 mm/slab_common.c |   4 +
 2 files changed, 255 insertions(+), 244 deletions(-)

-- 
1.9.1

mm/slab: reduce lock contention in alloc path

2016-03-27 Thread js1304

From: Joonsoo Kim 

While processing concurrent allocation, SLAB could be contended
a lot because it did a lots of work with holding a lock. This
patchset try to reduce the number of critical section to reduce
lock contention. Major changes are lockless decision to allocate
more slab and lockless cpu cache refill from the newly allocated slab.

Below is the result of concurrent allocation/free in slab allocation
benchmark made by Christoph a long time ago. I make the output simpler.
The number shows cycle count during alloc/free respectively so less
is better.

* Before
Kmalloc N*alloc N*free(32): Average=365/806
Kmalloc N*alloc N*free(64): Average=452/690
Kmalloc N*alloc N*free(128): Average=736/886
Kmalloc N*alloc N*free(256): Average=1167/985
Kmalloc N*alloc N*free(512): Average=2088/1125
Kmalloc N*alloc N*free(1024): Average=4115/1184
Kmalloc N*alloc N*free(2048): Average=8451/1748
Kmalloc N*alloc N*free(4096): Average=16024/2048

* After
Kmalloc N*alloc N*free(32): Average=344/792
Kmalloc N*alloc N*free(64): Average=347/882
Kmalloc N*alloc N*free(128): Average=390/959
Kmalloc N*alloc N*free(256): Average=393/1067
Kmalloc N*alloc N*free(512): Average=683/1229
Kmalloc N*alloc N*free(1024): Average=1295/1325
Kmalloc N*alloc N*free(2048): Average=2513/1664
Kmalloc N*alloc N*free(4096): Average=4742/2172

It shows that performance improves greatly (roughly more than 50%)
for the object class whose size is more than 128 bytes.

Thanks.

Joonsoo Kim (11):
  mm/slab: hold a slab_mutex when calling __kmem_cache_shrink()
  mm/slab: remove BAD_ALIEN_MAGIC again
  mm/slab: drain the free slab as much as possible
  mm/slab: factor out kmem_cache_node initialization code
  mm/slab: clean-up kmem_cache_node setup
  mm/slab: don't keep free slabs if free_objects exceeds free_limit
  mm/slab: racy access/modify the slab color
  mm/slab: make cache_grow() handle the page allocated on arbitrary node
  mm/slab: separate cache_grow() to two parts
  mm/slab: refill cpu cache through a new slab without holding a node
lock
  mm/slab: lockless decision to grow cache

 mm/slab.c| 495 ---
 mm/slab_common.c |   4 +
 2 files changed, 255 insertions(+), 244 deletions(-)

-- 
1.9.1

RE: [PATCH v2 5/6] Documentation: DT: vdma: update binding doc for AXI CDMA

2016-03-27 Thread Appana Durga Kedareswara Rao

Hi Soren,

> -Original Message-
> From: Sören Brinkmann [mailto:soren.brinkm...@xilinx.com]
> Sent: Monday, March 28, 2016 12:58 AM
> To: Appana Durga Kedareswara Rao
> Cc: robh...@kernel.org; pawel.m...@arm.com; mark.rutl...@arm.com;
> ijc+devicet...@hellion.org.uk; ga...@codeaurora.org; Michal Simek;
> vinod.k...@intel.com; dan.j.willi...@intel.com; Anurag Kumar Vulisha; Appana
> Durga Kedareswara Rao; moritz.fisc...@ettus.com;
> laurent.pinch...@ideasonboard.com; l...@debethencourt.com; Srikanth
> Vemula; Anirudha Sarangi; devicet...@vger.kernel.org; linux-arm-
> ker...@lists.infradead.org; linux-kernel@vger.kernel.org;
> dmaeng...@vger.kernel.org
> Subject: Re: [PATCH v2 5/6] Documentation: DT: vdma: update binding doc for
> AXI CDMA
> 
> On Sun, 2016-03-27 at 23:36:06 +0530, Kedareswara rao Appana wrote:
> > This patch updates the device-tree binding doc for adding support for
> > AXI CDMA.
> >
> > Signed-off-by: Kedareswara rao Appana 
> > ---
> > ---> Modified commit message as suggested by Vinod.
> > ---> Moved the patch to forward in the series as suggested by vinod.
> >
> >  .../devicetree/bindings/dma/xilinx/xilinx_vdma.txt | 18
> +-
> >  1 file changed, 17 insertions(+), 1 deletion(-)
> >
> > diff --git
> > a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > index 5841421..2b0c12b 100644
> > --- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > +++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
> > @@ -8,8 +8,12 @@ target devices. It can be configured to have one channel
> or two channels.
> >  If configured as two channels, one is to transmit to the device and
> > another  is to receive from the device.
> >
> > +Xilinx AXI CDMA engine, it does transfers between memory-mapped
> > +source address and a memory-mapped destination address.
> > +
> >  Required properties:
> > -- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a"
> > +- compatible: Should be "xlnx,axi-vdma-1.00.a" or "xlnx,axi-dma-1.00.a" or
> > + "xlnx,axi-cdma-1.00.a""
> >  - #dma-cells: Should be <1>, see "dmas" property below
> >  - reg: Should contain VDMA registers location and length.
> >  - xlnx,num-fstores: Should be the number of framebuffers as configured in
> h/w.
> > @@ -80,6 +84,18 @@ axi_dma_0: axidma@4040 {
> > } ;
> >  } ;
> >
> > +axi_cdma_0: axicdma@7e20 {
> > +   compatible = "xlnx,axi-cdma-1.00.a";
> > +   #dma-cells = <1>;
> > +   reg = < 0x7e20 0x1 >;
> > +   xlnx,addrwidth = <0x20>;
> > +   dma-channel@7e20 {
> > +   compatible = "xlnx,axi-dma-mm2s-channel";
> > +   interrupts = < 0 55 4 >;
> > +   xlnx,datawidth = <0x40>;
> > +   } ;
> > +} ;
> 
> As in the other patch, the node name should be 'dma-controller@...' and the
> inconsistend spacing could be fixed.

Ok will fix...

> 
> Also, it seems this adds pretty much identical examples that just differ in 
> the
> compat string. Is that really needed?

Most of the properties are same across the three DMA's
For AXI VDMA there are few required properties that are not required for AXI 
DMA/CDMA.
That's why added example for the other IP's as well I mean for AXI DMA and CDMA.

Regards,
Kedar.

> 
>   Sören

[PATCH 03/11] mm/slab: drain the free slab as much as possible

2016-03-27 Thread js1304

From: Joonsoo Kim 

slabs_tofree() implies freeing all free slab. We can do it with
just providing INT_MAX.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index a5a205b..ba2eacf 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -888,12 +888,6 @@ static int init_cache_node_node(int node)
return 0;
 }
 
-static inline int slabs_tofree(struct kmem_cache *cachep,
-   struct kmem_cache_node *n)
-{
-   return (n->free_objects + cachep->num - 1) / cachep->num;
-}
-
 static void cpuup_canceled(long cpu)
 {
struct kmem_cache *cachep;
@@ -958,7 +952,7 @@ free_slab:
n = get_node(cachep, node);
if (!n)
continue;
-   drain_freelist(cachep, n, slabs_tofree(cachep, n));
+   drain_freelist(cachep, n, INT_MAX);
}
 }
 
@@ -1110,7 +1104,7 @@ static int __meminit drain_cache_node_node(int node)
if (!n)
continue;
 
-   drain_freelist(cachep, n, slabs_tofree(cachep, n));
+   drain_freelist(cachep, n, INT_MAX);
 
if (!list_empty(>slabs_full) ||
!list_empty(>slabs_partial)) {
@@ -2280,7 +2274,7 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool 
deactivate)
 
check_irq_on();
for_each_kmem_cache_node(cachep, node, n) {
-   drain_freelist(cachep, n, slabs_tofree(cachep, n));
+   drain_freelist(cachep, n, INT_MAX);
 
ret += !list_empty(>slabs_full) ||
!list_empty(>slabs_partial);
-- 
1.9.1

[PATCH 04/11] mm/slab: factor out kmem_cache_node initialization code

2016-03-27 Thread js1304

From: Joonsoo Kim 

It can be reused on other place, so factor out it. Following
patch will use it.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 68 ---
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index ba2eacf..569d7db 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -841,6 +841,40 @@ static inline gfp_t gfp_exact_node(gfp_t flags)
 }
 #endif
 
+static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
+{
+   struct kmem_cache_node *n;
+
+   /*
+* Set up the kmem_cache_node for cpu before we can
+* begin anything. Make sure some other cpu on this
+* node has not already allocated this
+*/
+   n = get_node(cachep, node);
+   if (n)
+   return 0;
+
+   n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
+   if (!n)
+   return -ENOMEM;
+
+   kmem_cache_node_init(n);
+   n->next_reap = jiffies + REAPTIMEOUT_NODE +
+   ((unsigned long)cachep) % REAPTIMEOUT_NODE;
+
+   n->free_limit =
+   (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;
+
+   /*
+* The kmem_cache_nodes don't come and go as CPUs
+* come and go.  slab_mutex is sufficient
+* protection here.
+*/
+   cachep->node[node] = n;
+
+   return 0;
+}
+
 /*
  * Allocates and initializes node for a node on each slab cache, used for
  * either memory or cpu hotplug.  If memory is being hot-added, the 
kmem_cache_node
@@ -852,39 +886,15 @@ static inline gfp_t gfp_exact_node(gfp_t flags)
  */
 static int init_cache_node_node(int node)
 {
+   int ret;
struct kmem_cache *cachep;
-   struct kmem_cache_node *n;
-   const size_t memsize = sizeof(struct kmem_cache_node);
 
list_for_each_entry(cachep, _caches, list) {
-   /*
-* Set up the kmem_cache_node for cpu before we can
-* begin anything. Make sure some other cpu on this
-* node has not already allocated this
-*/
-   n = get_node(cachep, node);
-   if (!n) {
-   n = kmalloc_node(memsize, GFP_KERNEL, node);
-   if (!n)
-   return -ENOMEM;
-   kmem_cache_node_init(n);
-   n->next_reap = jiffies + REAPTIMEOUT_NODE +
-   ((unsigned long)cachep) % REAPTIMEOUT_NODE;
-
-   /*
-* The kmem_cache_nodes don't come and go as CPUs
-* come and go.  slab_mutex is sufficient
-* protection here.
-*/
-   cachep->node[node] = n;
-   }
-
-   spin_lock_irq(>list_lock);
-   n->free_limit =
-   (1 + nr_cpus_node(node)) *
-   cachep->batchcount + cachep->num;
-   spin_unlock_irq(>list_lock);
+   ret = init_cache_node(cachep, node, GFP_KERNEL);
+   if (ret)
+   return ret;
}
+
return 0;
 }
 
-- 
1.9.1

[PATCH 04/11] mm/slab: factor out kmem_cache_node initialization code

2016-03-27 Thread js1304

From: Joonsoo Kim 

It can be reused on other place, so factor out it. Following
patch will use it.

Signed-off-by: Joonsoo Kim 
---
 mm/slab.c | 68 ---
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index ba2eacf..569d7db 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -841,6 +841,40 @@ static inline gfp_t gfp_exact_node(gfp_t flags)
 }
 #endif
 
+static int init_cache_node(struct kmem_cache *cachep, int node, gfp_t gfp)
+{
+   struct kmem_cache_node *n;
+
+   /*
+* Set up the kmem_cache_node for cpu before we can
+* begin anything. Make sure some other cpu on this
+* node has not already allocated this
+*/
+   n = get_node(cachep, node);
+   if (n)
+   return 0;
+
+   n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
+   if (!n)
+   return -ENOMEM;
+
+   kmem_cache_node_init(n);
+   n->next_reap = jiffies + REAPTIMEOUT_NODE +
+   ((unsigned long)cachep) % REAPTIMEOUT_NODE;
+
+   n->free_limit =
+   (1 + nr_cpus_node(node)) * cachep->batchcount + cachep->num;
+
+   /*
+* The kmem_cache_nodes don't come and go as CPUs
+* come and go.  slab_mutex is sufficient
+* protection here.
+*/
+   cachep->node[node] = n;
+
+   return 0;
+}
+
 /*
  * Allocates and initializes node for a node on each slab cache, used for
  * either memory or cpu hotplug.  If memory is being hot-added, the 
kmem_cache_node
@@ -852,39 +886,15 @@ static inline gfp_t gfp_exact_node(gfp_t flags)
  */
 static int init_cache_node_node(int node)
 {
+   int ret;
struct kmem_cache *cachep;
-   struct kmem_cache_node *n;
-   const size_t memsize = sizeof(struct kmem_cache_node);
 
list_for_each_entry(cachep, _caches, list) {
-   /*
-* Set up the kmem_cache_node for cpu before we can
-* begin anything. Make sure some other cpu on this
-* node has not already allocated this
-*/
-   n = get_node(cachep, node);
-   if (!n) {
-   n = kmalloc_node(memsize, GFP_KERNEL, node);
-   if (!n)
-   return -ENOMEM;
-   kmem_cache_node_init(n);
-   n->next_reap = jiffies + REAPTIMEOUT_NODE +
-   ((unsigned long)cachep) % REAPTIMEOUT_NODE;
-
-   /*
-* The kmem_cache_nodes don't come and go as CPUs
-* come and go.  slab_mutex is sufficient
-* protection here.
-*/
-   cachep->node[node] = n;
-   }
-
-   spin_lock_irq(>list_lock);
-   n->free_limit =
-   (1 + nr_cpus_node(node)) *
-   cachep->batchcount + cachep->num;
-   spin_unlock_irq(>list_lock);
+   ret = init_cache_node(cachep, node, GFP_KERNEL);
+   if (ret)
+   return ret;
}
+
return 0;
 }
 
-- 
1.9.1

Re: [PATCH v2] ARM: exynos_defconfig: Enable Samsung media platform drivers as modules

2016-03-27 Thread Javier Martinez Canillas

Hello Krzysztof,

On 03/28/2016 12:28 AM, Krzysztof Kozlowski wrote:
> On 25.03.2016 12:15, Javier Martinez Canillas wrote:
>>>
>>> How about doing the same for multi_v7?
>>>
>>
>> I didn't consider multi_v7 because media drivers aren't necessary for booting
>> the boards and so it could increase build times for not real benefits in most
>> machines. But I can enable it in multi_v7 as a follow-up if you think that it
>> makes sense there too.
> 
> I consider use cases of multi_v7 the same as exynos (except the
> difference in policy: use always modules). This means that if it makes
> sense to enable some media drivers on exynos (for build coverage, boot
> testing and real usage) then it makes sense to do the same for multi_v7.
>

Ok, thanks a lot for your suggestion. I'll post a patch for multi_v7 then.
 
> In ideal future we would move entirely from exynos-specific image to one
> common image for testing: the multi_v7. It would make things simpler.
> 
> Best regards,
> Krzysztof
> 

Best regards,
-- 
Javier Martinez Canillas
Open Source Group
Samsung Research America

Re: [PATCH v2] ARM: exynos_defconfig: Enable Samsung media platform drivers as modules

2016-03-27 Thread Javier Martinez Canillas

Hello Krzysztof,

On 03/28/2016 12:28 AM, Krzysztof Kozlowski wrote:
> On 25.03.2016 12:15, Javier Martinez Canillas wrote:
>>>
>>> How about doing the same for multi_v7?
>>>
>>
>> I didn't consider multi_v7 because media drivers aren't necessary for booting
>> the boards and so it could increase build times for not real benefits in most
>> machines. But I can enable it in multi_v7 as a follow-up if you think that it
>> makes sense there too.
> 
> I consider use cases of multi_v7 the same as exynos (except the
> difference in policy: use always modules). This means that if it makes
> sense to enable some media drivers on exynos (for build coverage, boot
> testing and real usage) then it makes sense to do the same for multi_v7.
>

Ok, thanks a lot for your suggestion. I'll post a patch for multi_v7 then.
 
> In ideal future we would move entirely from exynos-specific image to one
> common image for testing: the multi_v7. It would make things simpler.
> 
> Best regards,
> Krzysztof
> 

Best regards,
-- 
Javier Martinez Canillas
Open Source Group
Samsung Research America

Re: [PATCH v2 00/18] Support non-lru page migration

2016-03-27 Thread Minchan Kim

Hello Andrew,

On Mon, Mar 21, 2016 at 03:30:49PM +0900, Minchan Kim wrote:
> Recently, I got many reports about perfermance degradation
> in embedded system(Android mobile phone, webOS TV and so on)
> and failed to fork easily.
> 
> The problem was fragmentation caused by zram and GPU driver
> pages. Their pages cannot be migrated so compaction cannot
> work well, either so reclaimer ends up shrinking all of working
> set pages. It made system very slow and even to fail to fork
> easily.
> 
> Other pain point is that they cannot work with CMA.
> Most of CMA memory space could be idle(ie, it could be used
> for movable pages unless driver is using) but if driver(i.e.,
> zram) cannot migrate his page, that memory space could be
> wasted. In our product which has big CMA memory, it reclaims
> zones too exccessively although there are lots of free space
> in CMA so system was very slow easily.
> 
> To solve these problem, this patch try to add facility to
> migrate non-lru pages via introducing new friend functions
> of migratepage in address_space_operation and new page flags.
> 
>   (isolate_page, putback_page)
>   (PG_movable, PG_isolated)
> 
> For details, please read description in
> "mm/compaction: support non-lru movable page migration".
> 
> Originally, Gioh Kim tried to support this feature but he moved
> so I took over the work. But I took many code from his work and
> changed a little bit.
> Thanks, Gioh!
> 
> And I should mention Konstantin Khlebnikov. He really heped Gioh
> at that time so he should deserve to have many credit, too.
> Thanks, Konstantin!
> 
> This patchset consists of five parts
> 
> 1. clean up migration
>   mm: use put_page to free page instead of putback_lru_page
> 
> 2. zsmalloc clean-up for preparing page migration
>   zsmalloc: use first_page rather than page
>   zsmalloc: clean up many BUG_ON
>   zsmalloc: reordering function parameter
>   zsmalloc: remove unused pool param in obj_free
>   zsmalloc: keep max_object in size_class
>   zsmalloc: squeeze inuse into page->mapping
>   zsmalloc: squeeze freelist into page->mapping
>   zsmalloc: move struct zs_meta from mapping to freelist
>   zsmalloc: factor page chain functionality out
>   zsmalloc: separate free_zspage from putback_zspage
>   zsmalloc: zs_compact refactoring

In this series, [2-5] are clean up regardless of goal of the patchset
so it could be merged independently.
I want to reduce patchset size in next post.
If anyone are not against, could you merge cleanup patchset?

   zsmalloc: use first_page rather than page
   zsmalloc: clean up many BUG_ON
   zsmalloc: reordering function parameter
   zsmalloc: remove unused pool param in obj_free

Thanks.

Re: [PATCH v2 00/18] Support non-lru page migration

2016-03-27 Thread Minchan Kim

Hello Andrew,

On Mon, Mar 21, 2016 at 03:30:49PM +0900, Minchan Kim wrote:
> Recently, I got many reports about perfermance degradation
> in embedded system(Android mobile phone, webOS TV and so on)
> and failed to fork easily.
> 
> The problem was fragmentation caused by zram and GPU driver
> pages. Their pages cannot be migrated so compaction cannot
> work well, either so reclaimer ends up shrinking all of working
> set pages. It made system very slow and even to fail to fork
> easily.
> 
> Other pain point is that they cannot work with CMA.
> Most of CMA memory space could be idle(ie, it could be used
> for movable pages unless driver is using) but if driver(i.e.,
> zram) cannot migrate his page, that memory space could be
> wasted. In our product which has big CMA memory, it reclaims
> zones too exccessively although there are lots of free space
> in CMA so system was very slow easily.
> 
> To solve these problem, this patch try to add facility to
> migrate non-lru pages via introducing new friend functions
> of migratepage in address_space_operation and new page flags.
> 
>   (isolate_page, putback_page)
>   (PG_movable, PG_isolated)
> 
> For details, please read description in
> "mm/compaction: support non-lru movable page migration".
> 
> Originally, Gioh Kim tried to support this feature but he moved
> so I took over the work. But I took many code from his work and
> changed a little bit.
> Thanks, Gioh!
> 
> And I should mention Konstantin Khlebnikov. He really heped Gioh
> at that time so he should deserve to have many credit, too.
> Thanks, Konstantin!
> 
> This patchset consists of five parts
> 
> 1. clean up migration
>   mm: use put_page to free page instead of putback_lru_page
> 
> 2. zsmalloc clean-up for preparing page migration
>   zsmalloc: use first_page rather than page
>   zsmalloc: clean up many BUG_ON
>   zsmalloc: reordering function parameter
>   zsmalloc: remove unused pool param in obj_free
>   zsmalloc: keep max_object in size_class
>   zsmalloc: squeeze inuse into page->mapping
>   zsmalloc: squeeze freelist into page->mapping
>   zsmalloc: move struct zs_meta from mapping to freelist
>   zsmalloc: factor page chain functionality out
>   zsmalloc: separate free_zspage from putback_zspage
>   zsmalloc: zs_compact refactoring

In this series, [2-5] are clean up regardless of goal of the patchset
so it could be merged independently.
I want to reduce patchset size in next post.
If anyone are not against, could you merge cleanup patchset?

   zsmalloc: use first_page rather than page
   zsmalloc: clean up many BUG_ON
   zsmalloc: reordering function parameter
   zsmalloc: remove unused pool param in obj_free

Thanks.

arm:collie_defconfig broken since commit ff2b135922 ("gpio: make the gpiochip a real device")

2016-03-27 Thread Guenter Roeck


Hi,

arm:collie_defconfig is broken since commit ff2b135922 ("gpio: make the gpiochip a 
real device").

Test is quite simple:

Build arm:collie_defconfig, run with
qemu-system-arm -M collie -kernel arch/arm/boot/zImage --append 
"console=ttySA1" -monitor null -nographic

Prior to the above commit, there is console output (the image crashes because it
doesn't have a root file system, but that is irrelevant). After the commit, the
console is silent.

Let me know if I can do anything to help tracking down the problem.

Thanks,
Guenter

---
Bisect log:

# bad: [f55532a0c0b8bb6148f4e07853b876ef73bc69ca] Linux 4.6-rc1
# good: [b562e44f507e863c6792946e4e1b1449fbbac85d] Linux 4.5
git bisect start 'HEAD' 'v4.5'
# bad: [6b5f04b6cf8ebab9a65d9c0026c650bb2538fd0f] Merge branch 'for-4.6' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
git bisect bad 6b5f04b6cf8ebab9a65d9c0026c650bb2538fd0f
# good: [96b9b1c95660d4bc5510c5d798d3817ae9f0b391] Merge tag 'tty-4.6-rc1' of 
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
git bisect good 96b9b1c95660d4bc5510c5d798d3817ae9f0b391
# good: [18f038e6bfb715310526ac05e4f20e55683471de] staging: dgnc: cleanup 
properly
git bisect good 18f038e6bfb715310526ac05e4f20e55683471de
# bad: [10fdfee7f7fd8d4a6a8455ac4c9fbbc51d79b9f7] Merge branch 'for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
git bisect bad 10fdfee7f7fd8d4a6a8455ac4c9fbbc51d79b9f7
# good: [3d15cfdb1b77536c205d8e49c0312219ddf162ec] Merge tag 
'linux-kselftest-4.6-rc1' of 
git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest
git bisect good 3d15cfdb1b77536c205d8e49c0312219ddf162ec
# bad: [ccbd805aa934dd1b863ef115a9c55f119b2388cf] Revert "Share upstreaming 
patches"
git bisect bad ccbd805aa934dd1b863ef115a9c55f119b2388cf
# bad: [db303a9053c8a81717cb6497f4cf63b773b21ca8] gpio: arizona: Use 
devm_gpiochip_add_data() for gpio registration
git bisect bad db303a9053c8a81717cb6497f4cf63b773b21ca8
# bad: [ed07247dbf5b0a23b0289c9e8e4a2ceb9b8f2e9d] gpio: Remove unused 
asm/gpio.h files
git bisect bad ed07247dbf5b0a23b0289c9e8e4a2ceb9b8f2e9d
# bad: [2f890cf0dfe421ecd2095d8cabb89e7207b499ee] gpio: ath79: Make the driver 
removable
git bisect bad 2f890cf0dfe421ecd2095d8cabb89e7207b499ee
# good: [daec0beda6fd7a741e4381741b3f7ff15970fdf6] Merge branch 
'error-return-from-get' into devel
git bisect good daec0beda6fd7a741e4381741b3f7ff15970fdf6
# bad: [40c159b776f882f2e2cbe20c9e29832841e5c0f9] gpio: add a userspace 
character device ABI
git bisect bad 40c159b776f882f2e2cbe20c9e29832841e5c0f9
# bad: [34ffd85d9c46cde3dc987cac82bff370a937ac4b] gpio: refer to gpio device in 
prints and debugfs
git bisect bad 34ffd85d9c46cde3dc987cac82bff370a937ac4b
# bad: [ff2b1359229927563addbf2f5ad480660c350903] gpio: make the gpiochip a 
real device
git bisect bad ff2b1359229927563addbf2f5ad480660c350903
# first bad commit: [ff2b1359229927563addbf2f5ad480660c350903] gpio: make the 
gpiochip a real device

arm:collie_defconfig broken since commit ff2b135922 ("gpio: make the gpiochip a real device")

2016-03-27 Thread Guenter Roeck


Hi,

arm:collie_defconfig is broken since commit ff2b135922 ("gpio: make the gpiochip a 
real device").

Test is quite simple:

Build arm:collie_defconfig, run with
qemu-system-arm -M collie -kernel arch/arm/boot/zImage --append 
"console=ttySA1" -monitor null -nographic

Prior to the above commit, there is console output (the image crashes because it
doesn't have a root file system, but that is irrelevant). After the commit, the
console is silent.

Let me know if I can do anything to help tracking down the problem.

Thanks,
Guenter

---
Bisect log:

# bad: [f55532a0c0b8bb6148f4e07853b876ef73bc69ca] Linux 4.6-rc1
# good: [b562e44f507e863c6792946e4e1b1449fbbac85d] Linux 4.5
git bisect start 'HEAD' 'v4.5'
# bad: [6b5f04b6cf8ebab9a65d9c0026c650bb2538fd0f] Merge branch 'for-4.6' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
git bisect bad 6b5f04b6cf8ebab9a65d9c0026c650bb2538fd0f
# good: [96b9b1c95660d4bc5510c5d798d3817ae9f0b391] Merge tag 'tty-4.6-rc1' of 
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
git bisect good 96b9b1c95660d4bc5510c5d798d3817ae9f0b391
# good: [18f038e6bfb715310526ac05e4f20e55683471de] staging: dgnc: cleanup 
properly
git bisect good 18f038e6bfb715310526ac05e4f20e55683471de
# bad: [10fdfee7f7fd8d4a6a8455ac4c9fbbc51d79b9f7] Merge branch 'for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
git bisect bad 10fdfee7f7fd8d4a6a8455ac4c9fbbc51d79b9f7
# good: [3d15cfdb1b77536c205d8e49c0312219ddf162ec] Merge tag 
'linux-kselftest-4.6-rc1' of 
git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest
git bisect good 3d15cfdb1b77536c205d8e49c0312219ddf162ec
# bad: [ccbd805aa934dd1b863ef115a9c55f119b2388cf] Revert "Share upstreaming 
patches"
git bisect bad ccbd805aa934dd1b863ef115a9c55f119b2388cf
# bad: [db303a9053c8a81717cb6497f4cf63b773b21ca8] gpio: arizona: Use 
devm_gpiochip_add_data() for gpio registration
git bisect bad db303a9053c8a81717cb6497f4cf63b773b21ca8
# bad: [ed07247dbf5b0a23b0289c9e8e4a2ceb9b8f2e9d] gpio: Remove unused 
asm/gpio.h files
git bisect bad ed07247dbf5b0a23b0289c9e8e4a2ceb9b8f2e9d
# bad: [2f890cf0dfe421ecd2095d8cabb89e7207b499ee] gpio: ath79: Make the driver 
removable
git bisect bad 2f890cf0dfe421ecd2095d8cabb89e7207b499ee
# good: [daec0beda6fd7a741e4381741b3f7ff15970fdf6] Merge branch 
'error-return-from-get' into devel
git bisect good daec0beda6fd7a741e4381741b3f7ff15970fdf6
# bad: [40c159b776f882f2e2cbe20c9e29832841e5c0f9] gpio: add a userspace 
character device ABI
git bisect bad 40c159b776f882f2e2cbe20c9e29832841e5c0f9
# bad: [34ffd85d9c46cde3dc987cac82bff370a937ac4b] gpio: refer to gpio device in 
prints and debugfs
git bisect bad 34ffd85d9c46cde3dc987cac82bff370a937ac4b
# bad: [ff2b1359229927563addbf2f5ad480660c350903] gpio: make the gpiochip a 
real device
git bisect bad ff2b1359229927563addbf2f5ad480660c350903
# first bad commit: [ff2b1359229927563addbf2f5ad480660c350903] gpio: make the 
gpiochip a real device

[PATCH v2 2/5] phy: Add support for NS2 SATA3 PHY in Broadcom SATA3 PHY driver

2016-03-27 Thread Anup Patel

This patch adds support for Broadcom NS2 SATA3 PHY in existing
Broadcom SATA3 PHY driver.

Signed-off-by: Anup Patel 
---
 drivers/phy/phy-brcm-sata.c | 238 +---
 1 file changed, 200 insertions(+), 38 deletions(-)

diff --git a/drivers/phy/phy-brcm-sata.c b/drivers/phy/phy-brcm-sata.c
index c97b9d6..6c4c5cb 100644
--- a/drivers/phy/phy-brcm-sata.c
+++ b/drivers/phy/phy-brcm-sata.c
@@ -14,6 +14,7 @@
  * GNU General Public License for more details.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -24,22 +25,26 @@
 #include 
 #include 
 
-#define SATA_MDIO_BANK_OFFSET  0x23c
-#define SATA_MDIO_REG_OFFSET(ofs)  ((ofs) * 4)
+#define SATA_PCB_BANK_OFFSET   0x23c
+#define SATA_PCB_REG_OFFSET(ofs)   ((ofs) * 4)
 
 #define MAX_PORTS  2
 
 /* Register offset between PHYs in PCB space */
-#define SATA_MDIO_REG_28NM_SPACE_SIZE  0x1000
+#define SATA_PCB_REG_28NM_SPACE_SIZE   0x1000
 
 /* The older SATA PHY registers duplicated per port registers within the map,
  * rather than having a separate map per port.
  */
-#define SATA_MDIO_REG_40NM_SPACE_SIZE  0x10
+#define SATA_PCB_REG_40NM_SPACE_SIZE   0x10
+
+/* Register offset between PHYs in PHY control space */
+#define SATA_PHY_CTRL_REG_28NM_SPACE_SIZE  0x8
 
 enum brcm_sata_phy_version {
-   BRCM_SATA_PHY_28NM,
-   BRCM_SATA_PHY_40NM,
+   BRCM_SATA_PHY_STB_28NM,
+   BRCM_SATA_PHY_STB_40NM,
+   BRCM_SATA_PHY_IPROC_NS2,
 };
 
 struct brcm_sata_port {
@@ -52,15 +57,48 @@ struct brcm_sata_port {
 struct brcm_sata_phy {
struct device *dev;
void __iomem *phy_base;
+   void __iomem *ctrl_base;
enum brcm_sata_phy_version version;
 
struct brcm_sata_port phys[MAX_PORTS];
 };
 
-enum sata_mdio_phy_regs {
-   PLL_REG_BANK_0  = 0x50,
+enum sata_phy_regs {
+   BLOCK0_REG_BANK = 0x000,
+   BLOCK0_XGXSSTATUS   = 0x81,
+   BLOCK0_XGXSSTATUS_PLL_LOCK  = BIT(12),
+   BLOCK0_SPARE= 0x8d,
+   BLOCK0_SPARE_OOB_CLK_SEL_MASK   = 0x3,
+   BLOCK0_SPARE_OOB_CLK_SEL_REFBY2 = 0x1,
+
+   PLL_REG_BANK_0  = 0x050,
PLL_REG_BANK_0_PLLCONTROL_0 = 0x81,
 
+   PLL1_REG_BANK   = 0x060,
+   PLL1_ACTRL2 = 0x82,
+   PLL1_ACTRL3 = 0x83,
+   PLL1_ACTRL4 = 0x84,
+
+   OOB_REG_BANK= 0x150,
+   OOB_CTRL1   = 0x80,
+   OOB_CTRL1_BURST_MAX_MASK= 0xf,
+   OOB_CTRL1_BURST_MAX_SHIFT   = 12,
+   OOB_CTRL1_BURST_MIN_MASK= 0xf,
+   OOB_CTRL1_BURST_MIN_SHIFT   = 8,
+   OOB_CTRL1_WAKE_IDLE_MAX_MASK= 0xf,
+   OOB_CTRL1_WAKE_IDLE_MAX_SHIFT   = 4,
+   OOB_CTRL1_WAKE_IDLE_MIN_MASK= 0xf,
+   OOB_CTRL1_WAKE_IDLE_MIN_SHIFT   = 0,
+   OOB_CTRL2   = 0x81,
+   OOB_CTRL2_SEL_ENA_SHIFT = 15,
+   OOB_CTRL2_SEL_ENA_RC_SHIFT  = 14,
+   OOB_CTRL2_RESET_IDLE_MAX_MASK   = 0x3f,
+   OOB_CTRL2_RESET_IDLE_MAX_SHIFT  = 8,
+   OOB_CTRL2_BURST_CNT_MASK= 0x3,
+   OOB_CTRL2_BURST_CNT_SHIFT   = 6,
+   OOB_CTRL2_RESET_IDLE_MIN_MASK   = 0x3f,
+   OOB_CTRL2_RESET_IDLE_MIN_SHIFT  = 0,
+
TXPMD_REG_BANK  = 0x1a0,
TXPMD_CONTROL1  = 0x81,
TXPMD_CONTROL1_TX_SSC_EN_FRC= BIT(0),
@@ -72,69 +110,183 @@ enum sata_mdio_phy_regs {
TXPMD_TX_FREQ_CTRL_CONTROL3_FMAX_MASK   = 0x3ff,
 };
 
-static inline void __iomem *brcm_sata_phy_base(struct brcm_sata_port *port)
+enum sata_phy_ctrl_regs {
+   PHY_CTRL_1  = 0x0,
+   PHY_CTRL_1_RESET= BIT(0),
+};
+
+static inline void __iomem *brcm_sata_pcb_base(struct brcm_sata_port *port)
 {
struct brcm_sata_phy *priv = port->phy_priv;
-   u32 offset = 0;
+   u32 size = 0;
+
+   switch (priv->version) {
+   case BRCM_SATA_PHY_STB_28NM:
+   case BRCM_SATA_PHY_IPROC_NS2:
+   size = SATA_PCB_REG_28NM_SPACE_SIZE;
+   break;
+   case BRCM_SATA_PHY_STB_40NM:
+   size = SATA_PCB_REG_40NM_SPACE_SIZE;
+   break;
+   default:
+   dev_err(priv->dev, "invalid phy version\n");
+   break;
+   };
 
-   if (priv->version == BRCM_SATA_PHY_28NM)
-   offset = SATA_MDIO_REG_28NM_SPACE_SIZE;
-   else

[PATCH v2 2/5] phy: Add support for NS2 SATA3 PHY in Broadcom SATA3 PHY driver

2016-03-27 Thread Anup Patel

This patch adds support for Broadcom NS2 SATA3 PHY in existing
Broadcom SATA3 PHY driver.

Signed-off-by: Anup Patel 
---
 drivers/phy/phy-brcm-sata.c | 238 +---
 1 file changed, 200 insertions(+), 38 deletions(-)

diff --git a/drivers/phy/phy-brcm-sata.c b/drivers/phy/phy-brcm-sata.c
index c97b9d6..6c4c5cb 100644
--- a/drivers/phy/phy-brcm-sata.c
+++ b/drivers/phy/phy-brcm-sata.c
@@ -14,6 +14,7 @@
  * GNU General Public License for more details.
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -24,22 +25,26 @@
 #include 
 #include 
 
-#define SATA_MDIO_BANK_OFFSET  0x23c
-#define SATA_MDIO_REG_OFFSET(ofs)  ((ofs) * 4)
+#define SATA_PCB_BANK_OFFSET   0x23c
+#define SATA_PCB_REG_OFFSET(ofs)   ((ofs) * 4)
 
 #define MAX_PORTS  2
 
 /* Register offset between PHYs in PCB space */
-#define SATA_MDIO_REG_28NM_SPACE_SIZE  0x1000
+#define SATA_PCB_REG_28NM_SPACE_SIZE   0x1000
 
 /* The older SATA PHY registers duplicated per port registers within the map,
  * rather than having a separate map per port.
  */
-#define SATA_MDIO_REG_40NM_SPACE_SIZE  0x10
+#define SATA_PCB_REG_40NM_SPACE_SIZE   0x10
+
+/* Register offset between PHYs in PHY control space */
+#define SATA_PHY_CTRL_REG_28NM_SPACE_SIZE  0x8
 
 enum brcm_sata_phy_version {
-   BRCM_SATA_PHY_28NM,
-   BRCM_SATA_PHY_40NM,
+   BRCM_SATA_PHY_STB_28NM,
+   BRCM_SATA_PHY_STB_40NM,
+   BRCM_SATA_PHY_IPROC_NS2,
 };
 
 struct brcm_sata_port {
@@ -52,15 +57,48 @@ struct brcm_sata_port {
 struct brcm_sata_phy {
struct device *dev;
void __iomem *phy_base;
+   void __iomem *ctrl_base;
enum brcm_sata_phy_version version;
 
struct brcm_sata_port phys[MAX_PORTS];
 };
 
-enum sata_mdio_phy_regs {
-   PLL_REG_BANK_0  = 0x50,
+enum sata_phy_regs {
+   BLOCK0_REG_BANK = 0x000,
+   BLOCK0_XGXSSTATUS   = 0x81,
+   BLOCK0_XGXSSTATUS_PLL_LOCK  = BIT(12),
+   BLOCK0_SPARE= 0x8d,
+   BLOCK0_SPARE_OOB_CLK_SEL_MASK   = 0x3,
+   BLOCK0_SPARE_OOB_CLK_SEL_REFBY2 = 0x1,
+
+   PLL_REG_BANK_0  = 0x050,
PLL_REG_BANK_0_PLLCONTROL_0 = 0x81,
 
+   PLL1_REG_BANK   = 0x060,
+   PLL1_ACTRL2 = 0x82,
+   PLL1_ACTRL3 = 0x83,
+   PLL1_ACTRL4 = 0x84,
+
+   OOB_REG_BANK= 0x150,
+   OOB_CTRL1   = 0x80,
+   OOB_CTRL1_BURST_MAX_MASK= 0xf,
+   OOB_CTRL1_BURST_MAX_SHIFT   = 12,
+   OOB_CTRL1_BURST_MIN_MASK= 0xf,
+   OOB_CTRL1_BURST_MIN_SHIFT   = 8,
+   OOB_CTRL1_WAKE_IDLE_MAX_MASK= 0xf,
+   OOB_CTRL1_WAKE_IDLE_MAX_SHIFT   = 4,
+   OOB_CTRL1_WAKE_IDLE_MIN_MASK= 0xf,
+   OOB_CTRL1_WAKE_IDLE_MIN_SHIFT   = 0,
+   OOB_CTRL2   = 0x81,
+   OOB_CTRL2_SEL_ENA_SHIFT = 15,
+   OOB_CTRL2_SEL_ENA_RC_SHIFT  = 14,
+   OOB_CTRL2_RESET_IDLE_MAX_MASK   = 0x3f,
+   OOB_CTRL2_RESET_IDLE_MAX_SHIFT  = 8,
+   OOB_CTRL2_BURST_CNT_MASK= 0x3,
+   OOB_CTRL2_BURST_CNT_SHIFT   = 6,
+   OOB_CTRL2_RESET_IDLE_MIN_MASK   = 0x3f,
+   OOB_CTRL2_RESET_IDLE_MIN_SHIFT  = 0,
+
TXPMD_REG_BANK  = 0x1a0,
TXPMD_CONTROL1  = 0x81,
TXPMD_CONTROL1_TX_SSC_EN_FRC= BIT(0),
@@ -72,69 +110,183 @@ enum sata_mdio_phy_regs {
TXPMD_TX_FREQ_CTRL_CONTROL3_FMAX_MASK   = 0x3ff,
 };
 
-static inline void __iomem *brcm_sata_phy_base(struct brcm_sata_port *port)
+enum sata_phy_ctrl_regs {
+   PHY_CTRL_1  = 0x0,
+   PHY_CTRL_1_RESET= BIT(0),
+};
+
+static inline void __iomem *brcm_sata_pcb_base(struct brcm_sata_port *port)
 {
struct brcm_sata_phy *priv = port->phy_priv;
-   u32 offset = 0;
+   u32 size = 0;
+
+   switch (priv->version) {
+   case BRCM_SATA_PHY_STB_28NM:
+   case BRCM_SATA_PHY_IPROC_NS2:
+   size = SATA_PCB_REG_28NM_SPACE_SIZE;
+   break;
+   case BRCM_SATA_PHY_STB_40NM:
+   size = SATA_PCB_REG_40NM_SPACE_SIZE;
+   break;
+   default:
+   dev_err(priv->dev, "invalid phy version\n");
+   break;
+   };
 
-   if (priv->version == BRCM_SATA_PHY_28NM)
-   offset = SATA_MDIO_REG_28NM_SPACE_SIZE;
-   else if (priv->version ==

[PATCH v2 1/5] phy: Rename phy-brcmstb-sata driver to phy-brcm-sata driver

2016-03-27 Thread Anup Patel

Currently, we have a common SATA3 PHY driver for all Broadcom
STB SoCs. This driver can be extended and re-used for Broadcom
iProc SoCs having same SATA3 PHY.

This patch renames existing Broadcom STB SATA3 PHY driver to
common Broadcom SATA3 PHY driver to share this PHY driver across
Broadcom SoCs.

Signed-off-by: Anup Patel 
---
 drivers/phy/Kconfig | 18 +-
 drivers/phy/Makefile|  2 +-
 drivers/phy/{phy-brcmstb-sata.c => phy-brcm-sata.c} |  8 
 3 files changed, 14 insertions(+), 14 deletions(-)
 rename drivers/phy/{phy-brcmstb-sata.c => phy-brcm-sata.c} (97%)

diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 26566db..c0187a7 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -403,15 +403,6 @@ config PHY_TUSB1210
help
  Support for TI TUSB1210 USB ULPI PHY.
 
-config PHY_BRCMSTB_SATA
-   tristate "Broadcom STB SATA PHY driver"
-   depends on ARCH_BRCMSTB || BMIPS_GENERIC
-   depends on OF
-   select GENERIC_PHY
-   help
- Enable this to support the SATA3 PHY on 28nm or 40nm Broadcom STB 
SoCs.
- Likely useful only with CONFIG_SATA_BRCMSTB enabled.
-
 config PHY_CYGNUS_PCIE
tristate "Broadcom Cygnus PCIe PHY driver"
depends on OF && (ARCH_BCM_CYGNUS || COMPILE_TEST)
@@ -421,4 +412,13 @@ config PHY_CYGNUS_PCIE
  Enable this to support the Broadcom Cygnus PCIe PHY.
  If unsure, say N.
 
+config PHY_BRCM_SATA
+   tristate "Broadcom SATA PHY driver"
+   depends on ARCH_BRCMSTB || ARCH_BCM_IPROC || BMIPS_GENERIC || 
COMPILE_TEST
+   depends on OF
+   select GENERIC_PHY
+   default ARCH_BCM_IPROC
+   help
+ Enable this to support the Broadcom SATA PHY.
+ If unsure, say N.
 endmenu
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index 24596a9..596fae9 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -49,6 +49,6 @@ obj-$(CONFIG_PHY_QCOM_UFS)+= phy-qcom-ufs.o
 obj-$(CONFIG_PHY_QCOM_UFS) += phy-qcom-ufs-qmp-20nm.o
 obj-$(CONFIG_PHY_QCOM_UFS) += phy-qcom-ufs-qmp-14nm.o
 obj-$(CONFIG_PHY_TUSB1210) += phy-tusb1210.o
-obj-$(CONFIG_PHY_BRCMSTB_SATA) += phy-brcmstb-sata.o
 obj-$(CONFIG_PHY_PISTACHIO_USB)+= phy-pistachio-usb.o
 obj-$(CONFIG_PHY_CYGNUS_PCIE)  += phy-bcm-cygnus-pcie.o
+obj-$(CONFIG_PHY_BRCM_SATA)+= phy-brcm-sata.o
diff --git a/drivers/phy/phy-brcmstb-sata.c b/drivers/phy/phy-brcm-sata.c
similarity index 97%
rename from drivers/phy/phy-brcmstb-sata.c
rename to drivers/phy/phy-brcm-sata.c
index a23172f..c97b9d6 100644
--- a/drivers/phy/phy-brcmstb-sata.c
+++ b/drivers/phy/phy-brcm-sata.c
@@ -1,7 +1,7 @@
 /*
  * Broadcom SATA3 AHCI Controller PHY Driver
  *
- * Copyright © 2009-2015 Broadcom Corporation
+ * Copyright (C) 2016 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -238,13 +238,13 @@ static struct platform_driver brcm_sata_phy_driver = {
.probe  = brcm_sata_phy_probe,
.driver = {
.of_match_table = brcm_sata_phy_of_match,
-   .name   = "brcmstb-sata-phy",
+   .name   = "brcm-sata-phy",
}
 };
 module_platform_driver(brcm_sata_phy_driver);
 
-MODULE_DESCRIPTION("Broadcom STB SATA PHY driver");
+MODULE_DESCRIPTION("Broadcom SATA PHY driver");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Carino");
 MODULE_AUTHOR("Brian Norris");
-MODULE_ALIAS("platform:phy-brcmstb-sata");
+MODULE_ALIAS("platform:phy-brcm-sata");
-- 
1.9.1

[PATCH v2 3/5] dt-bindings: phy: bindings document for common Broadcom SATA3 PHY driver

2016-03-27 Thread Anup Patel

This patch:
1. Renames DT bindings document of Broadcom STB SATA3 PHY driver to
common Broadcom SATA3 PHY driver bindings document
2. Adds bindings info for NS2 SATA3 PHY

Signed-off-by: Anup Patel 
Acked-by: Rob Herring 
---
 .../phy/{brcm,brcmstb-sata-phy.txt => brcm-sata-phy.txt}  | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)
 rename Documentation/devicetree/bindings/phy/{brcm,brcmstb-sata-phy.txt => 
brcm-sata-phy.txt} (69%)

diff --git a/Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt 
b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
similarity index 69%
rename from Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt
rename to Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
index d87ab7c..d023120 100644
--- a/Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt
+++ b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
@@ -1,14 +1,17 @@
-* Broadcom SATA3 PHY for STB
+* Broadcom SATA3 PHY
 
 Required properties:
 - compatible: should be one or more of
  "brcm,bcm7425-sata-phy"
  "brcm,bcm7445-sata-phy"
+ "brcm,iproc-ns2-sata-phy"
  "brcm,phy-sata3"
 - address-cells: should be 1
 - size-cells: should be 0
-- reg: register range for the PHY PCB interface
-- reg-names: should be "phy"
+- reg: register ranges for the PHY PCB interface
+- reg-names: should be "phy" and "phy-ctrl"
+ The "phy-ctrl" registers are only required for
+ "brcm,iproc-ns2-sata-phy".
 
 Sub-nodes:
   Each port's PHY should be represented as a sub-node.
@@ -16,12 +19,12 @@ Sub-nodes:
 Sub-nodes required properties:
 - reg: the PHY number
 - phy-cells: generic PHY binding; must be 0
-Optional:
-- brcm,enable-ssc: use spread spectrum clocking (SSC) on this port
 
+Sub-nodes optional properties:
+- brcm,enable-ssc: use spread spectrum clocking (SSC) on this port
+ This property is not applicable for "brcm,iproc-ns2-sata-phy".
 
 Example:
-
sata-phy@f0458100 {
compatible = "brcm,bcm7445-sata-phy", "brcm,phy-sata3";
reg = <0xf0458100 0x1e00>, <0xf045804c 0x10>;
-- 
1.9.1

[PATCH v2 4/5] dt-bindings: ata: add compatible string for iProc AHCI controller

2016-03-27 Thread Anup Patel

The Broadcom iProc SoCs have AHCI compliant SATA controller. This
patch adds common compatible string for AHCI SATA controller on
iProc SoCs.

Signed-off-by: Anup Patel 
Acked-by: Rob Herring 
---
 Documentation/devicetree/bindings/ata/ahci-platform.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt 
b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index 30df832..3990348 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -10,6 +10,7 @@ PHYs.
 Required properties:
 - compatible: compatible string, one of:
   - "allwinner,sun4i-a10-ahci"
+  - "brcm,iproc-ahci"
   - "hisilicon,hisi-ahci"
   - "cavium,octeon-7130-ahci"
   - "ibm,476gtr-ahci"
-- 
1.9.1

[PATCH v2 5/5] arm64: dts: Add SATA3 AHCI and SATA3 PHY DT nodes for NS2

2016-03-27 Thread Anup Patel

We have one dual-port SATA3 AHCI controller present in
NS2 SoC.

This patch enables SATA3 AHCI controller and SATA3 PHY
for NS2 SoC in NS2 DT.

Signed-off-by: Anup Patel 
Reviewed-by: Ray Jui 
Reviewed-by: Scott Branden 
---
 arch/arm64/boot/dts/broadcom/ns2-svk.dts | 12 +
 arch/arm64/boot/dts/broadcom/ns2.dtsi| 43 
 2 files changed, 55 insertions(+)

diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts 
b/arch/arm64/boot/dts/broadcom/ns2-svk.dts
index ce0ab84..06cf9c5 100644
--- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts
+++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts
@@ -72,6 +72,18 @@
status = "ok";
 };
 
+_phy0 {
+   status = "ok";
+};
+
+_phy1 {
+   status = "ok";
+};
+
+ {
+   status = "ok";
+};
+
  {
status = "ok";
 };
diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi 
b/arch/arm64/boot/dts/broadcom/ns2.dtsi
index 6f81c9d..c8dccf8 100644
--- a/arch/arm64/boot/dts/broadcom/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi
@@ -413,6 +413,49 @@
reg = <0x6622 0x28>;
};
 
+   sata_phy: sata_phy@663f0100 {
+   compatible = "brcm,iproc-ns2-sata-phy";
+   reg = <0x663f0100 0x1f00>,
+ <0x663f004c 0x10>;
+   reg-names = "phy", "phy-ctrl";
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   sata_phy0: sata-phy@0 {
+   reg = <0>;
+   #phy-cells = <0>;
+   status = "disabled";
+   };
+
+   sata_phy1: sata-phy@1 {
+   reg = <1>;
+   #phy-cells = <0>;
+   status = "disabled";
+   };
+   };
+
+   sata: ahci@663f2000 {
+   compatible = "brcm,iproc-ahci", "generic-ahci";
+   reg = <0x663f2000 0x1000>;
+   reg-names = "ahci";
+   interrupts = ;
+   #address-cells = <1>;
+   #size-cells = <0>;
+   status = "disabled";
+
+   sata0: sata-port@0 {
+   reg = <0>;
+   phys = <_phy0>;
+   phy-names = "sata-phy";
+   };
+
+   sata1: sata-port@1 {
+   reg = <1>;
+   phys = <_phy1>;
+   phy-names = "sata-phy";
+   };
+   };
+
sdio0: sdhci@6642 {
compatible = "brcm,sdhci-iproc-cygnus";
reg = <0x6642 0x100>;
-- 
1.9.1

[PATCH v2 4/5] dt-bindings: ata: add compatible string for iProc AHCI controller

2016-03-27 Thread Anup Patel

The Broadcom iProc SoCs have AHCI compliant SATA controller. This
patch adds common compatible string for AHCI SATA controller on
iProc SoCs.

Signed-off-by: Anup Patel 
Acked-by: Rob Herring 
---
 Documentation/devicetree/bindings/ata/ahci-platform.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt 
b/Documentation/devicetree/bindings/ata/ahci-platform.txt
index 30df832..3990348 100644
--- a/Documentation/devicetree/bindings/ata/ahci-platform.txt
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -10,6 +10,7 @@ PHYs.
 Required properties:
 - compatible: compatible string, one of:
   - "allwinner,sun4i-a10-ahci"
+  - "brcm,iproc-ahci"
   - "hisilicon,hisi-ahci"
   - "cavium,octeon-7130-ahci"
   - "ibm,476gtr-ahci"
-- 
1.9.1

[PATCH v2 5/5] arm64: dts: Add SATA3 AHCI and SATA3 PHY DT nodes for NS2

2016-03-27 Thread Anup Patel

We have one dual-port SATA3 AHCI controller present in
NS2 SoC.

This patch enables SATA3 AHCI controller and SATA3 PHY
for NS2 SoC in NS2 DT.

Signed-off-by: Anup Patel 
Reviewed-by: Ray Jui 
Reviewed-by: Scott Branden 
---
 arch/arm64/boot/dts/broadcom/ns2-svk.dts | 12 +
 arch/arm64/boot/dts/broadcom/ns2.dtsi| 43 
 2 files changed, 55 insertions(+)

diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts 
b/arch/arm64/boot/dts/broadcom/ns2-svk.dts
index ce0ab84..06cf9c5 100644
--- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts
+++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts
@@ -72,6 +72,18 @@
status = "ok";
 };
 
+_phy0 {
+   status = "ok";
+};
+
+_phy1 {
+   status = "ok";
+};
+
+ {
+   status = "ok";
+};
+
  {
status = "ok";
 };
diff --git a/arch/arm64/boot/dts/broadcom/ns2.dtsi 
b/arch/arm64/boot/dts/broadcom/ns2.dtsi
index 6f81c9d..c8dccf8 100644
--- a/arch/arm64/boot/dts/broadcom/ns2.dtsi
+++ b/arch/arm64/boot/dts/broadcom/ns2.dtsi
@@ -413,6 +413,49 @@
reg = <0x6622 0x28>;
};
 
+   sata_phy: sata_phy@663f0100 {
+   compatible = "brcm,iproc-ns2-sata-phy";
+   reg = <0x663f0100 0x1f00>,
+ <0x663f004c 0x10>;
+   reg-names = "phy", "phy-ctrl";
+   #address-cells = <1>;
+   #size-cells = <0>;
+
+   sata_phy0: sata-phy@0 {
+   reg = <0>;
+   #phy-cells = <0>;
+   status = "disabled";
+   };
+
+   sata_phy1: sata-phy@1 {
+   reg = <1>;
+   #phy-cells = <0>;
+   status = "disabled";
+   };
+   };
+
+   sata: ahci@663f2000 {
+   compatible = "brcm,iproc-ahci", "generic-ahci";
+   reg = <0x663f2000 0x1000>;
+   reg-names = "ahci";
+   interrupts = ;
+   #address-cells = <1>;
+   #size-cells = <0>;
+   status = "disabled";
+
+   sata0: sata-port@0 {
+   reg = <0>;
+   phys = <_phy0>;
+   phy-names = "sata-phy";
+   };
+
+   sata1: sata-port@1 {
+   reg = <1>;
+   phys = <_phy1>;
+   phy-names = "sata-phy";
+   };
+   };
+
sdio0: sdhci@6642 {
compatible = "brcm,sdhci-iproc-cygnus";
reg = <0x6642 0x100>;
-- 
1.9.1

[PATCH v2 1/5] phy: Rename phy-brcmstb-sata driver to phy-brcm-sata driver

2016-03-27 Thread Anup Patel

Currently, we have a common SATA3 PHY driver for all Broadcom
STB SoCs. This driver can be extended and re-used for Broadcom
iProc SoCs having same SATA3 PHY.

This patch renames existing Broadcom STB SATA3 PHY driver to
common Broadcom SATA3 PHY driver to share this PHY driver across
Broadcom SoCs.

Signed-off-by: Anup Patel 
---
 drivers/phy/Kconfig | 18 +-
 drivers/phy/Makefile|  2 +-
 drivers/phy/{phy-brcmstb-sata.c => phy-brcm-sata.c} |  8 
 3 files changed, 14 insertions(+), 14 deletions(-)
 rename drivers/phy/{phy-brcmstb-sata.c => phy-brcm-sata.c} (97%)

diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig
index 26566db..c0187a7 100644
--- a/drivers/phy/Kconfig
+++ b/drivers/phy/Kconfig
@@ -403,15 +403,6 @@ config PHY_TUSB1210
help
  Support for TI TUSB1210 USB ULPI PHY.
 
-config PHY_BRCMSTB_SATA
-   tristate "Broadcom STB SATA PHY driver"
-   depends on ARCH_BRCMSTB || BMIPS_GENERIC
-   depends on OF
-   select GENERIC_PHY
-   help
- Enable this to support the SATA3 PHY on 28nm or 40nm Broadcom STB 
SoCs.
- Likely useful only with CONFIG_SATA_BRCMSTB enabled.
-
 config PHY_CYGNUS_PCIE
tristate "Broadcom Cygnus PCIe PHY driver"
depends on OF && (ARCH_BCM_CYGNUS || COMPILE_TEST)
@@ -421,4 +412,13 @@ config PHY_CYGNUS_PCIE
  Enable this to support the Broadcom Cygnus PCIe PHY.
  If unsure, say N.
 
+config PHY_BRCM_SATA
+   tristate "Broadcom SATA PHY driver"
+   depends on ARCH_BRCMSTB || ARCH_BCM_IPROC || BMIPS_GENERIC || 
COMPILE_TEST
+   depends on OF
+   select GENERIC_PHY
+   default ARCH_BCM_IPROC
+   help
+ Enable this to support the Broadcom SATA PHY.
+ If unsure, say N.
 endmenu
diff --git a/drivers/phy/Makefile b/drivers/phy/Makefile
index 24596a9..596fae9 100644
--- a/drivers/phy/Makefile
+++ b/drivers/phy/Makefile
@@ -49,6 +49,6 @@ obj-$(CONFIG_PHY_QCOM_UFS)+= phy-qcom-ufs.o
 obj-$(CONFIG_PHY_QCOM_UFS) += phy-qcom-ufs-qmp-20nm.o
 obj-$(CONFIG_PHY_QCOM_UFS) += phy-qcom-ufs-qmp-14nm.o
 obj-$(CONFIG_PHY_TUSB1210) += phy-tusb1210.o
-obj-$(CONFIG_PHY_BRCMSTB_SATA) += phy-brcmstb-sata.o
 obj-$(CONFIG_PHY_PISTACHIO_USB)+= phy-pistachio-usb.o
 obj-$(CONFIG_PHY_CYGNUS_PCIE)  += phy-bcm-cygnus-pcie.o
+obj-$(CONFIG_PHY_BRCM_SATA)+= phy-brcm-sata.o
diff --git a/drivers/phy/phy-brcmstb-sata.c b/drivers/phy/phy-brcm-sata.c
similarity index 97%
rename from drivers/phy/phy-brcmstb-sata.c
rename to drivers/phy/phy-brcm-sata.c
index a23172f..c97b9d6 100644
--- a/drivers/phy/phy-brcmstb-sata.c
+++ b/drivers/phy/phy-brcm-sata.c
@@ -1,7 +1,7 @@
 /*
  * Broadcom SATA3 AHCI Controller PHY Driver
  *
- * Copyright © 2009-2015 Broadcom Corporation
+ * Copyright (C) 2016 Broadcom
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -238,13 +238,13 @@ static struct platform_driver brcm_sata_phy_driver = {
.probe  = brcm_sata_phy_probe,
.driver = {
.of_match_table = brcm_sata_phy_of_match,
-   .name   = "brcmstb-sata-phy",
+   .name   = "brcm-sata-phy",
}
 };
 module_platform_driver(brcm_sata_phy_driver);
 
-MODULE_DESCRIPTION("Broadcom STB SATA PHY driver");
+MODULE_DESCRIPTION("Broadcom SATA PHY driver");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Carino");
 MODULE_AUTHOR("Brian Norris");
-MODULE_ALIAS("platform:phy-brcmstb-sata");
+MODULE_ALIAS("platform:phy-brcm-sata");
-- 
1.9.1

[PATCH v2 3/5] dt-bindings: phy: bindings document for common Broadcom SATA3 PHY driver

2016-03-27 Thread Anup Patel

This patch:
1. Renames DT bindings document of Broadcom STB SATA3 PHY driver to
common Broadcom SATA3 PHY driver bindings document
2. Adds bindings info for NS2 SATA3 PHY

Signed-off-by: Anup Patel 
Acked-by: Rob Herring 
---
 .../phy/{brcm,brcmstb-sata-phy.txt => brcm-sata-phy.txt}  | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)
 rename Documentation/devicetree/bindings/phy/{brcm,brcmstb-sata-phy.txt => 
brcm-sata-phy.txt} (69%)

diff --git a/Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt 
b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
similarity index 69%
rename from Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt
rename to Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
index d87ab7c..d023120 100644
--- a/Documentation/devicetree/bindings/phy/brcm,brcmstb-sata-phy.txt
+++ b/Documentation/devicetree/bindings/phy/brcm-sata-phy.txt
@@ -1,14 +1,17 @@
-* Broadcom SATA3 PHY for STB
+* Broadcom SATA3 PHY
 
 Required properties:
 - compatible: should be one or more of
  "brcm,bcm7425-sata-phy"
  "brcm,bcm7445-sata-phy"
+ "brcm,iproc-ns2-sata-phy"
  "brcm,phy-sata3"
 - address-cells: should be 1
 - size-cells: should be 0
-- reg: register range for the PHY PCB interface
-- reg-names: should be "phy"
+- reg: register ranges for the PHY PCB interface
+- reg-names: should be "phy" and "phy-ctrl"
+ The "phy-ctrl" registers are only required for
+ "brcm,iproc-ns2-sata-phy".
 
 Sub-nodes:
   Each port's PHY should be represented as a sub-node.
@@ -16,12 +19,12 @@ Sub-nodes:
 Sub-nodes required properties:
 - reg: the PHY number
 - phy-cells: generic PHY binding; must be 0
-Optional:
-- brcm,enable-ssc: use spread spectrum clocking (SSC) on this port
 
+Sub-nodes optional properties:
+- brcm,enable-ssc: use spread spectrum clocking (SSC) on this port
+ This property is not applicable for "brcm,iproc-ns2-sata-phy".
 
 Example:
-
sata-phy@f0458100 {
compatible = "brcm,bcm7445-sata-phy", "brcm,phy-sata3";
reg = <0xf0458100 0x1e00>, <0xf045804c 0x10>;
-- 
1.9.1

[PATCH v2 0/5] Add SATA3 support for Broadcom NS2 SVK

2016-03-27 Thread Anup Patel

The Broadcom NS2 SoC has a AHCI compliant SATA3 controller with
two ports.

This patchset adds common Broadcom SATA3 PHY driver and related
DT bindings document. It also adds appropriate DT nodes in NS2 DT.

The patchset is based on v4.6-rc1 tag and is available in branch
ns2_sata3_v2 of https://github.com/Broadcom/arm64-linux.git

All patches have been tested on Broadcom NS2 SVK.

Changes since v1:
 - Added ACKed by Rob for DT bindings related patches

Anup Patel (5):
  phy: Rename phy-brcmstb-sata driver to phy-brcm-sata driver
  phy: Add support for NS2 SATA3 PHY in Broadcom SATA3 PHY driver
  dt-bindings: phy: bindings document for common Broadcom SATA3 PHY
driver
  dt-bindings: ata: add compatible string for iProc AHCI controller
  arm64: dts: Add SATA3 AHCI and SATA3 PHY DT nodes for NS2

 .../devicetree/bindings/ata/ahci-platform.txt  |   1 +
 ...brcm,brcmstb-sata-phy.txt => brcm-sata-phy.txt} |  15 +-
 arch/arm64/boot/dts/broadcom/ns2-svk.dts   |  12 +
 arch/arm64/boot/dts/broadcom/ns2.dtsi  |  43 +++
 drivers/phy/Kconfig|  18 +-
 drivers/phy/Makefile   |   2 +-
 drivers/phy/phy-brcm-sata.c| 412 +
 drivers/phy/phy-brcmstb-sata.c | 250 -
 8 files changed, 487 insertions(+), 266 deletions(-)
 rename Documentation/devicetree/bindings/phy/{brcm,brcmstb-sata-phy.txt => 
brcm-sata-phy.txt} (69%)
 create mode 100644 drivers/phy/phy-brcm-sata.c
 delete mode 100644 drivers/phy/phy-brcmstb-sata.c

-- 
1.9.1

[PATCH v2 0/5] Add SATA3 support for Broadcom NS2 SVK

2016-03-27 Thread Anup Patel

The Broadcom NS2 SoC has a AHCI compliant SATA3 controller with
two ports.

This patchset adds common Broadcom SATA3 PHY driver and related
DT bindings document. It also adds appropriate DT nodes in NS2 DT.

The patchset is based on v4.6-rc1 tag and is available in branch
ns2_sata3_v2 of https://github.com/Broadcom/arm64-linux.git

All patches have been tested on Broadcom NS2 SVK.

Changes since v1:
 - Added ACKed by Rob for DT bindings related patches

Anup Patel (5):
  phy: Rename phy-brcmstb-sata driver to phy-brcm-sata driver
  phy: Add support for NS2 SATA3 PHY in Broadcom SATA3 PHY driver
  dt-bindings: phy: bindings document for common Broadcom SATA3 PHY
driver
  dt-bindings: ata: add compatible string for iProc AHCI controller
  arm64: dts: Add SATA3 AHCI and SATA3 PHY DT nodes for NS2

 .../devicetree/bindings/ata/ahci-platform.txt  |   1 +
 ...brcm,brcmstb-sata-phy.txt => brcm-sata-phy.txt} |  15 +-
 arch/arm64/boot/dts/broadcom/ns2-svk.dts   |  12 +
 arch/arm64/boot/dts/broadcom/ns2.dtsi  |  43 +++
 drivers/phy/Kconfig|  18 +-
 drivers/phy/Makefile   |   2 +-
 drivers/phy/phy-brcm-sata.c| 412 +
 drivers/phy/phy-brcmstb-sata.c | 250 -
 8 files changed, 487 insertions(+), 266 deletions(-)
 rename Documentation/devicetree/bindings/phy/{brcm,brcmstb-sata-phy.txt => 
brcm-sata-phy.txt} (69%)
 create mode 100644 drivers/phy/phy-brcm-sata.c
 delete mode 100644 drivers/phy/phy-brcmstb-sata.c

-- 
1.9.1

Re: [PATCH] zram: revive swap_slot_free_notify

2016-03-27 Thread Minchan Kim

On Wed, Mar 23, 2016 at 01:45:34PM +0900, Joonsoo Kim wrote:
> On Tue, Mar 22, 2016 at 11:06:29PM +0900, Minchan Kim wrote:
> > On Tue, Mar 22, 2016 at 05:20:08PM +0900, Joonsoo Kim wrote:
> > > 2016-03-22 17:00 GMT+09:00 Minchan Kim :
> > > > On Tue, Mar 22, 2016 at 02:08:59PM +0900, Joonsoo Kim wrote:
> > > >> On Fri, Mar 18, 2016 at 04:58:31PM +0900, Minchan Kim wrote:
> > > >> >  "remove compressed copy from zram in-memory"
> > > >> > applied swap_slot_free_notify call in *end_swap_bio_read* to
> > > >> > remove duplicated memory between zram and memory.
> > > >> >
> > > >> > However, with introducing rw_page in zram <8c7f01025f7b>
> > > >> > "zram: implement rw_page operation of zram", it became void
> > > >> > because rw_page doesn't need bio.
> > > >> >
> > > >> > This patch restores the function for rw_page.
> > > >> >
> > > >> > Signed-off-by: Minchan Kim 
> > > >> > ---
> > > >> >  mm/page_io.c | 93 
> > > >> > 
> > > >> >  1 file changed, 50 insertions(+), 43 deletions(-)
> > > >> >
> > > >> > diff --git a/mm/page_io.c b/mm/page_io.c
> > > >> > index ff74e512f029..18aac7819cc9 100644
> > > >> > --- a/mm/page_io.c
> > > >> > +++ b/mm/page_io.c
> > > >> > @@ -66,6 +66,54 @@ void end_swap_bio_write(struct bio *bio)
> > > >> > bio_put(bio);
> > > >> >  }
> > > >> >
> > > >> > +static void swap_slot_free_notify(struct page *page)
> > > >> > +{
> > > >> > +   struct swap_info_struct *sis;
> > > >> > +   struct gendisk *disk;
> > > >> > +
> > > >> > +   /*
> > > >> > +* There is no guarantee that the page is in swap cache - the 
> > > >> > software
> > > >> > +* suspend code (at least) uses end_swap_bio_read() against a 
> > > >> > non-
> > > >> > +* swapcache page.  So we must check PG_swapcache before 
> > > >> > proceeding with
> > > >> > +* this optimization.
> > > >> > +*/
> > > >> > +   if (unlikely(!PageSwapCache(page)))
> > > >> > +   return;
> > > >> > +
> > > >> > +   sis = page_swap_info(page);
> > > >> > +   if (!(sis->flags & SWP_BLKDEV))
> > > >> > +   return;
> > > >> > +
> > > >> > +   /*
> > > >> > +* The swap subsystem performs lazy swap slot freeing,
> > > >> > +* expecting that the page will be swapped out again.
> > > >> > +* So we can avoid an unnecessary write if the page
> > > >> > +* isn't redirtied.
> > > >> > +* This is good for real swap storage because we can
> > > >> > +* reduce unnecessary I/O and enhance wear-leveling
> > > >> > +* if an SSD is used as the as swap device.
> > > >> > +* But if in-memory swap device (eg zram) is used,
> > > >> > +* this causes a duplicated copy between uncompressed
> > > >> > +* data in VM-owned memory and compressed data in
> > > >> > +* zram-owned memory.  So let's free zram-owned memory
> > > >> > +* and make the VM-owned decompressed page *dirty*,
> > > >> > +* so the page should be swapped out somewhere again if
> > > >> > +* we again wish to reclaim it.
> > > >> > +*/
> > > >> > +   disk = sis->bdev->bd_disk;
> > > >> > +   if (disk->fops->swap_slot_free_notify) {
> > > >> > +   swp_entry_t entry;
> > > >> > +   unsigned long offset;
> > > >> > +
> > > >> > +   entry.val = page_private(page);
> > > >> > +   offset = swp_offset(entry);
> > > >> > +
> > > >> > +   SetPageDirty(page);
> > > >> > +   disk->fops->swap_slot_free_notify(sis->bdev,
> > > >> > +   offset);
> > > >> > +   }
> > > >> > +}
> > > >> > +
> > > >> >  static void end_swap_bio_read(struct bio *bio)
> > > >> >  {
> > > >> > struct page *page = bio->bi_io_vec[0].bv_page;
> > > >> > @@ -81,49 +129,7 @@ static void end_swap_bio_read(struct bio *bio)
> > > >> > }
> > > >> >
> > > >> > SetPageUptodate(page);
> > > >> > -
> > > >> > -   /*
> > > >> > -* There is no guarantee that the page is in swap cache - the 
> > > >> > software
> > > >> > -* suspend code (at least) uses end_swap_bio_read() against a 
> > > >> > non-
> > > >> > -* swapcache page.  So we must check PG_swapcache before 
> > > >> > proceeding with
> > > >> > -* this optimization.
> > > >> > -*/
> > > >> > -   if (likely(PageSwapCache(page))) {
> > > >> > -   struct swap_info_struct *sis;
> > > >> > -
> > > >> > -   sis = page_swap_info(page);
> > > >> > -   if (sis->flags & SWP_BLKDEV) {
> > > >> > -   /*
> > > >> > -* The swap subsystem performs lazy swap slot 
> > > >> > freeing,
> > > >> > -* expecting that the page will be swapped out 
> > > >> > again.
> > > >> > -* So we can avoid an unnecessary write if the 
> > > >> > page
> > > >> > -* isn't redirtied.
> > > >> > -* This is good for real swap storage because we 
> > > >> > can
> > > >> > -*

Re: [PATCH] zram: revive swap_slot_free_notify

2016-03-27 Thread Minchan Kim

On Wed, Mar 23, 2016 at 01:45:34PM +0900, Joonsoo Kim wrote:
> On Tue, Mar 22, 2016 at 11:06:29PM +0900, Minchan Kim wrote:
> > On Tue, Mar 22, 2016 at 05:20:08PM +0900, Joonsoo Kim wrote:
> > > 2016-03-22 17:00 GMT+09:00 Minchan Kim :
> > > > On Tue, Mar 22, 2016 at 02:08:59PM +0900, Joonsoo Kim wrote:
> > > >> On Fri, Mar 18, 2016 at 04:58:31PM +0900, Minchan Kim wrote:
> > > >> >  "remove compressed copy from zram in-memory"
> > > >> > applied swap_slot_free_notify call in *end_swap_bio_read* to
> > > >> > remove duplicated memory between zram and memory.
> > > >> >
> > > >> > However, with introducing rw_page in zram <8c7f01025f7b>
> > > >> > "zram: implement rw_page operation of zram", it became void
> > > >> > because rw_page doesn't need bio.
> > > >> >
> > > >> > This patch restores the function for rw_page.
> > > >> >
> > > >> > Signed-off-by: Minchan Kim 
> > > >> > ---
> > > >> >  mm/page_io.c | 93 
> > > >> > 
> > > >> >  1 file changed, 50 insertions(+), 43 deletions(-)
> > > >> >
> > > >> > diff --git a/mm/page_io.c b/mm/page_io.c
> > > >> > index ff74e512f029..18aac7819cc9 100644
> > > >> > --- a/mm/page_io.c
> > > >> > +++ b/mm/page_io.c
> > > >> > @@ -66,6 +66,54 @@ void end_swap_bio_write(struct bio *bio)
> > > >> > bio_put(bio);
> > > >> >  }
> > > >> >
> > > >> > +static void swap_slot_free_notify(struct page *page)
> > > >> > +{
> > > >> > +   struct swap_info_struct *sis;
> > > >> > +   struct gendisk *disk;
> > > >> > +
> > > >> > +   /*
> > > >> > +* There is no guarantee that the page is in swap cache - the 
> > > >> > software
> > > >> > +* suspend code (at least) uses end_swap_bio_read() against a 
> > > >> > non-
> > > >> > +* swapcache page.  So we must check PG_swapcache before 
> > > >> > proceeding with
> > > >> > +* this optimization.
> > > >> > +*/
> > > >> > +   if (unlikely(!PageSwapCache(page)))
> > > >> > +   return;
> > > >> > +
> > > >> > +   sis = page_swap_info(page);
> > > >> > +   if (!(sis->flags & SWP_BLKDEV))
> > > >> > +   return;
> > > >> > +
> > > >> > +   /*
> > > >> > +* The swap subsystem performs lazy swap slot freeing,
> > > >> > +* expecting that the page will be swapped out again.
> > > >> > +* So we can avoid an unnecessary write if the page
> > > >> > +* isn't redirtied.
> > > >> > +* This is good for real swap storage because we can
> > > >> > +* reduce unnecessary I/O and enhance wear-leveling
> > > >> > +* if an SSD is used as the as swap device.
> > > >> > +* But if in-memory swap device (eg zram) is used,
> > > >> > +* this causes a duplicated copy between uncompressed
> > > >> > +* data in VM-owned memory and compressed data in
> > > >> > +* zram-owned memory.  So let's free zram-owned memory
> > > >> > +* and make the VM-owned decompressed page *dirty*,
> > > >> > +* so the page should be swapped out somewhere again if
> > > >> > +* we again wish to reclaim it.
> > > >> > +*/
> > > >> > +   disk = sis->bdev->bd_disk;
> > > >> > +   if (disk->fops->swap_slot_free_notify) {
> > > >> > +   swp_entry_t entry;
> > > >> > +   unsigned long offset;
> > > >> > +
> > > >> > +   entry.val = page_private(page);
> > > >> > +   offset = swp_offset(entry);
> > > >> > +
> > > >> > +   SetPageDirty(page);
> > > >> > +   disk->fops->swap_slot_free_notify(sis->bdev,
> > > >> > +   offset);
> > > >> > +   }
> > > >> > +}
> > > >> > +
> > > >> >  static void end_swap_bio_read(struct bio *bio)
> > > >> >  {
> > > >> > struct page *page = bio->bi_io_vec[0].bv_page;
> > > >> > @@ -81,49 +129,7 @@ static void end_swap_bio_read(struct bio *bio)
> > > >> > }
> > > >> >
> > > >> > SetPageUptodate(page);
> > > >> > -
> > > >> > -   /*
> > > >> > -* There is no guarantee that the page is in swap cache - the 
> > > >> > software
> > > >> > -* suspend code (at least) uses end_swap_bio_read() against a 
> > > >> > non-
> > > >> > -* swapcache page.  So we must check PG_swapcache before 
> > > >> > proceeding with
> > > >> > -* this optimization.
> > > >> > -*/
> > > >> > -   if (likely(PageSwapCache(page))) {
> > > >> > -   struct swap_info_struct *sis;
> > > >> > -
> > > >> > -   sis = page_swap_info(page);
> > > >> > -   if (sis->flags & SWP_BLKDEV) {
> > > >> > -   /*
> > > >> > -* The swap subsystem performs lazy swap slot 
> > > >> > freeing,
> > > >> > -* expecting that the page will be swapped out 
> > > >> > again.
> > > >> > -* So we can avoid an unnecessary write if the 
> > > >> > page
> > > >> > -* isn't redirtied.
> > > >> > -* This is good for real swap storage because we 
> > > >> > can
> > > >> > -* reduce unnecessary I/O and enhance 
> > >

Re: [PATCH] mmc: Provide tracepoints for request processing

2016-03-27 Thread Baolin Wang

On 25 March 2016 at 22:07, Jens Axboe  wrote:
> On 03/25/2016 01:32 AM, Baolin Wang wrote:
>>
>> On 24 March 2016 at 22:08, Jens Axboe  wrote:
>>>
>>> On 03/24/2016 05:54 AM, Baolin Wang wrote:


 This patch provides some tracepoints for the lifecycle of a request from
 fetching to completion to help with performance analysis of MMC
 subsystem.
>>>
>>>
>>>
>>> Most of these already exist as block layer trace points, why do we need
>>> mmc
>>> specific ones?
>>
>>
>> Currently the MMC core does not have any tracepoints for use with
>> ftrace. These are very useful as they provide a very low overhead
>> runtime controllable way of getting diagnostics from the system which
>> is capable of recording a great deal of information without impacting
>> system performance. We have tracepoints in the block layer so we can
>> do some trace of MMC but none in MMC itself so adding some where
>> appropriate would help people follow the activity of subsystem.
>
>
> But more than half of the trace points you added, those are DIRECTLY related
> to the block event. So what you are saying makes little sense. I see you
> resend it with the same trace points, I'll comment on that mail.

OK. I'll address your comments on that new mail. Thanks.

>
> --
> Jens Axboe
>



-- 
Baolin.wang
Best Regards

Re: [PATCH] mmc: Provide tracepoints for request processing

2016-03-27 Thread Baolin Wang

On 25 March 2016 at 22:07, Jens Axboe  wrote:
> On 03/25/2016 01:32 AM, Baolin Wang wrote:
>>
>> On 24 March 2016 at 22:08, Jens Axboe  wrote:
>>>
>>> On 03/24/2016 05:54 AM, Baolin Wang wrote:


 This patch provides some tracepoints for the lifecycle of a request from
 fetching to completion to help with performance analysis of MMC
 subsystem.
>>>
>>>
>>>
>>> Most of these already exist as block layer trace points, why do we need
>>> mmc
>>> specific ones?
>>
>>
>> Currently the MMC core does not have any tracepoints for use with
>> ftrace. These are very useful as they provide a very low overhead
>> runtime controllable way of getting diagnostics from the system which
>> is capable of recording a great deal of information without impacting
>> system performance. We have tracepoints in the block layer so we can
>> do some trace of MMC but none in MMC itself so adding some where
>> appropriate would help people follow the activity of subsystem.
>
>
> But more than half of the trace points you added, those are DIRECTLY related
> to the block event. So what you are saying makes little sense. I see you
> resend it with the same trace points, I'll comment on that mail.

OK. I'll address your comments on that new mail. Thanks.

>
> --
> Jens Axboe
>



-- 
Baolin.wang
Best Regards

[PATCH V2] staging: dgnc: replace dgnc_offset_table with bit shift.

2016-03-27 Thread Daeseok Youn

the dgnc_offset_table has a same value with (1 << port).
So I tried to replace dgnc_offset_table array with 1 << port.
And also there are redundant assignments(tmp and current_port)
inside while loop for checking uart port, and remove them.

Signed-off-by: Daeseok Youn 
---
V2: clean up useless variables and increasing port value is just
for traversing all port with uart_poll. So It need to replace
'continue' with 'break' in switch statement and last of this function,
move onto next port.

 drivers/staging/dgnc/dgnc_neo.c | 44 +++--
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_neo.c b/drivers/staging/dgnc/dgnc_neo.c
index d732e6e..6b57c44 100644
--- a/drivers/staging/dgnc/dgnc_neo.c
+++ b/drivers/staging/dgnc/dgnc_neo.c
@@ -77,9 +77,6 @@ struct board_ops dgnc_neo_ops = {
.send_immediate_char =  neo_send_immediate_char
 };
 
-static uint dgnc_offset_table[8] = { 0x01, 0x02, 0x04, 0x08,
-0x10, 0x20, 0x40, 0x80 };
-
 /*
  * This function allows calls to ensure that all outstanding
  * PCI writes have been completed, by doing a PCI read against
@@ -923,9 +920,7 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
struct dgnc_board *brd = voidbrd;
struct channel_t *ch;
int port = 0;
-   int type = 0;
-   int current_port;
-   u32 tmp;
+   int type;
u32 uart_poll;
unsigned long flags;
unsigned long flags2;
@@ -960,29 +955,12 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
 
/* At this point, we have at least SOMETHING to service, dig further... 
*/
 
-   current_port = 0;
-
/* Loop on each port */
while ((uart_poll & 0xff) != 0) {
-   tmp = uart_poll;
-
-   /* Check current port to see if it has interrupt pending */
-   if ((tmp & dgnc_offset_table[current_port]) != 0) {
-   port = current_port;
-   type = tmp >> (8 + (port * 3));
-   type &= 0x7;
-   } else {
-   current_port++;
-   continue;
-   }
-
-   /* Remove this port + type from uart_poll */
-   uart_poll &= ~(dgnc_offset_table[port]);
+   type = uart_poll >> (8 + (port * 3));
+   type &= 0x7;
 
-   if (!type) {
-   /* If no type, just ignore it, and move onto next port 
*/
-   continue;
-   }
+   uart_poll &= ~(0x01 << port);
 
/* Switch on type of interrupt we have */
switch (type) {
@@ -994,7 +972,7 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
 
/* Verify the port is in range. */
if (port >= brd->nasync)
-   continue;
+   break;
 
ch = brd->channels[port];
neo_copy_data_from_uart_to_queue(ch);
@@ -1004,14 +982,14 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
dgnc_check_queue_flow_control(ch);
spin_unlock_irqrestore(>ch_lock, flags2);
 
-   continue;
+   break;
 
case UART_17158_RX_LINE_STATUS:
/*
 * RXRDY and RX LINE Status (logic OR of LSR[4:1])
 */
neo_parse_lsr(brd, port);
-   continue;
+   break;
 
case UART_17158_TXRDY:
/*
@@ -1027,14 +1005,14 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
 * it should be, I was getting things like RXDY too. 
Weird.
 */
neo_parse_isr(brd, port);
-   continue;
+   break;
 
case UART_17158_MSR:
/*
 * MSR or flow control was seen.
 */
neo_parse_isr(brd, port);
-   continue;
+   break;
 
default:
/*
@@ -1043,8 +1021,10 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
 * these once and awhile.
 * Its harmless, just ignore it and move on.
 */
-   continue;
+   break;
}
+
+   port++;
}
 
/*
-- 
1.9.1

[PATCH V2] staging: dgnc: replace dgnc_offset_table with bit shift.

2016-03-27 Thread Daeseok Youn

the dgnc_offset_table has a same value with (1 << port).
So I tried to replace dgnc_offset_table array with 1 << port.
And also there are redundant assignments(tmp and current_port)
inside while loop for checking uart port, and remove them.

Signed-off-by: Daeseok Youn 
---
V2: clean up useless variables and increasing port value is just
for traversing all port with uart_poll. So It need to replace
'continue' with 'break' in switch statement and last of this function,
move onto next port.

 drivers/staging/dgnc/dgnc_neo.c | 44 +++--
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/drivers/staging/dgnc/dgnc_neo.c b/drivers/staging/dgnc/dgnc_neo.c
index d732e6e..6b57c44 100644
--- a/drivers/staging/dgnc/dgnc_neo.c
+++ b/drivers/staging/dgnc/dgnc_neo.c
@@ -77,9 +77,6 @@ struct board_ops dgnc_neo_ops = {
.send_immediate_char =  neo_send_immediate_char
 };
 
-static uint dgnc_offset_table[8] = { 0x01, 0x02, 0x04, 0x08,
-0x10, 0x20, 0x40, 0x80 };
-
 /*
  * This function allows calls to ensure that all outstanding
  * PCI writes have been completed, by doing a PCI read against
@@ -923,9 +920,7 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
struct dgnc_board *brd = voidbrd;
struct channel_t *ch;
int port = 0;
-   int type = 0;
-   int current_port;
-   u32 tmp;
+   int type;
u32 uart_poll;
unsigned long flags;
unsigned long flags2;
@@ -960,29 +955,12 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
 
/* At this point, we have at least SOMETHING to service, dig further... 
*/
 
-   current_port = 0;
-
/* Loop on each port */
while ((uart_poll & 0xff) != 0) {
-   tmp = uart_poll;
-
-   /* Check current port to see if it has interrupt pending */
-   if ((tmp & dgnc_offset_table[current_port]) != 0) {
-   port = current_port;
-   type = tmp >> (8 + (port * 3));
-   type &= 0x7;
-   } else {
-   current_port++;
-   continue;
-   }
-
-   /* Remove this port + type from uart_poll */
-   uart_poll &= ~(dgnc_offset_table[port]);
+   type = uart_poll >> (8 + (port * 3));
+   type &= 0x7;
 
-   if (!type) {
-   /* If no type, just ignore it, and move onto next port 
*/
-   continue;
-   }
+   uart_poll &= ~(0x01 << port);
 
/* Switch on type of interrupt we have */
switch (type) {
@@ -994,7 +972,7 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
 
/* Verify the port is in range. */
if (port >= brd->nasync)
-   continue;
+   break;
 
ch = brd->channels[port];
neo_copy_data_from_uart_to_queue(ch);
@@ -1004,14 +982,14 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
dgnc_check_queue_flow_control(ch);
spin_unlock_irqrestore(>ch_lock, flags2);
 
-   continue;
+   break;
 
case UART_17158_RX_LINE_STATUS:
/*
 * RXRDY and RX LINE Status (logic OR of LSR[4:1])
 */
neo_parse_lsr(brd, port);
-   continue;
+   break;
 
case UART_17158_TXRDY:
/*
@@ -1027,14 +1005,14 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
 * it should be, I was getting things like RXDY too. 
Weird.
 */
neo_parse_isr(brd, port);
-   continue;
+   break;
 
case UART_17158_MSR:
/*
 * MSR or flow control was seen.
 */
neo_parse_isr(brd, port);
-   continue;
+   break;
 
default:
/*
@@ -1043,8 +1021,10 @@ static irqreturn_t neo_intr(int irq, void *voidbrd)
 * these once and awhile.
 * Its harmless, just ignore it and move on.
 */
-   continue;
+   break;
}
+
+   port++;
}
 
/*
-- 
1.9.1

Re: [PATCH HACK 1/6] livepatch-test: Add more cases

2016-03-27 Thread Kamalesh Babulal

* Balbir Singh  [2016-03-26 18:11:22]:

> On Fri, Mar 25, 2016 at 3:37 AM, Kamalesh Babulal
>  wrote:
> > * Michael Ellerman  [2016-03-24 22:04:00]:
> >
> >> Not for merging.
> >>
> >
> > Hi Michael,
> >
> > Loading the livepatch sample module, trigger following warning
> >
> 
> The #if IS_MODULE(CONFIG_SCSI) code is buggy, you probably have
> CONFIG_SCSI=y, make it M or you can fix the code yourself. I had the
> same issue while testing
> 
> BTW, the tests worked fine with the changes proposed by Michael.
> 

Thanks, It helped. I was able to load the sample livepatch module
with proposed changes.

Thanks,
Kamalesh.

Re: [PATCH HACK 1/6] livepatch-test: Add more cases

2016-03-27 Thread Kamalesh Babulal

* Balbir Singh  [2016-03-26 18:11:22]:

> On Fri, Mar 25, 2016 at 3:37 AM, Kamalesh Babulal
>  wrote:
> > * Michael Ellerman  [2016-03-24 22:04:00]:
> >
> >> Not for merging.
> >>
> >
> > Hi Michael,
> >
> > Loading the livepatch sample module, trigger following warning
> >
> 
> The #if IS_MODULE(CONFIG_SCSI) code is buggy, you probably have
> CONFIG_SCSI=y, make it M or you can fix the code yourself. I had the
> same issue while testing
> 
> BTW, the tests worked fine with the changes proposed by Michael.
> 

Thanks, It helped. I was able to load the sample livepatch module
with proposed changes.

Thanks,
Kamalesh.

Re: [PATCH v2] ARM: exynos_defconfig: Enable Samsung media platform drivers as modules

2016-03-27 Thread Krzysztof Kozlowski

On 25.03.2016 12:15, Javier Martinez Canillas wrote:
>>
>> How about doing the same for multi_v7?
>>
> 
> I didn't consider multi_v7 because media drivers aren't necessary for booting
> the boards and so it could increase build times for not real benefits in most
> machines. But I can enable it in multi_v7 as a follow-up if you think that it
> makes sense there too.

I consider use cases of multi_v7 the same as exynos (except the
difference in policy: use always modules). This means that if it makes
sense to enable some media drivers on exynos (for build coverage, boot
testing and real usage) then it makes sense to do the same for multi_v7.

In ideal future we would move entirely from exynos-specific image to one
common image for testing: the multi_v7. It would make things simpler.

Best regards,
Krzysztof

Re: [PATCH v2] ARM: exynos_defconfig: Enable Samsung media platform drivers as modules

2016-03-27 Thread Krzysztof Kozlowski

On 25.03.2016 12:15, Javier Martinez Canillas wrote:
>>
>> How about doing the same for multi_v7?
>>
> 
> I didn't consider multi_v7 because media drivers aren't necessary for booting
> the boards and so it could increase build times for not real benefits in most
> machines. But I can enable it in multi_v7 as a follow-up if you think that it
> makes sense there too.

I consider use cases of multi_v7 the same as exynos (except the
difference in policy: use always modules). This means that if it makes
sense to enable some media drivers on exynos (for build coverage, boot
testing and real usage) then it makes sense to do the same for multi_v7.

In ideal future we would move entirely from exynos-specific image to one
common image for testing: the multi_v7. It would make things simpler.

Best regards,
Krzysztof

Re: [PATCH] brcmfmac: sdio: remove unused variable retry_limit

2016-03-27 Thread Julian Calaby

Hi All,

On Mon, Mar 21, 2016 at 4:34 AM, Colin King  wrote:
> From: Colin Ian King 
>
> retry_limit has never been used during the life of this driver, so
> we may as well remove it as it is redundant.
>
> Signed-off-by: Colin Ian King 

Looks right to me.

Reviewed-by: Julian Calaby 


> ---
>  drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 3 ---
>  1 file changed, 3 deletions(-)
>
> diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c 
> b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
> index 43fd3f4..cd92ba7 100644
> --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
> +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
> @@ -535,9 +535,6 @@ static int qcount[NUMPRIO];
>
>  #define RETRYCHAN(chan) ((chan) == SDPCM_EVENT_CHANNEL)
>
> -/* Retry count for register access failures */
> -static const uint retry_limit = 2;
> -
>  /* Limit on rounding up frames */
>  static const uint max_roundup = 512;
>
> --
> 2.7.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Julian Calaby

Email: julian.cal...@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/

Re: [PATCH] brcmfmac: sdio: remove unused variable retry_limit

2016-03-27 Thread Julian Calaby

Hi All,

On Mon, Mar 21, 2016 at 4:34 AM, Colin King  wrote:
> From: Colin Ian King 
>
> retry_limit has never been used during the life of this driver, so
> we may as well remove it as it is redundant.
>
> Signed-off-by: Colin Ian King 

Looks right to me.

Reviewed-by: Julian Calaby 


> ---
>  drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 3 ---
>  1 file changed, 3 deletions(-)
>
> diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c 
> b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
> index 43fd3f4..cd92ba7 100644
> --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
> +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
> @@ -535,9 +535,6 @@ static int qcount[NUMPRIO];
>
>  #define RETRYCHAN(chan) ((chan) == SDPCM_EVENT_CHANNEL)
>
> -/* Retry count for register access failures */
> -static const uint retry_limit = 2;
> -
>  /* Limit on rounding up frames */
>  static const uint max_roundup = 512;
>
> --
> 2.7.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Julian Calaby

Email: julian.cal...@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/

Re: [PATCH] Staging: rtl8723au: Remove function rtw_enqueue_{recvbuf23a/recvbuf23a_to_head}

2016-03-27 Thread Julian Calaby

Hi All,

On Sat, Mar 26, 2016 at 5:24 PM, Bhumika Goyal  wrote:
> The functions rtw_enqueue_recvbuf23a and rtw_enqueue_recvbuf23a_to_head
> are never used anywhere in the kernel. So, remove their definition and
> prototype.
> Grepped to find occurences.
>
> Signed-off-by: Bhumika Goyal 

Looks right to me.

Reviewed-by: Julian Calaby 

> ---
>  drivers/staging/rtl8723au/core/rtw_recv.c| 25 -
>  drivers/staging/rtl8723au/include/rtw_recv.h |  2 --
>  2 files changed, 27 deletions(-)
>
> diff --git a/drivers/staging/rtl8723au/core/rtw_recv.c 
> b/drivers/staging/rtl8723au/core/rtw_recv.c
> index 989ed07..150dabc 100644
> --- a/drivers/staging/rtl8723au/core/rtw_recv.c
> +++ b/drivers/staging/rtl8723au/core/rtw_recv.c
> @@ -211,31 +211,6 @@ u32 rtw_free_uc_swdec_pending_queue23a(struct 
> rtw_adapter *adapter)
> return cnt;
>  }
>
> -int rtw_enqueue_recvbuf23a_to_head(struct recv_buf *precvbuf, struct 
> rtw_queue *queue)
> -{
> -   spin_lock_bh(>lock);
> -
> -   list_del_init(>list);
> -   list_add(>list, get_list_head(queue));
> -
> -   spin_unlock_bh(>lock);
> -
> -   return _SUCCESS;
> -}
> -
> -int rtw_enqueue_recvbuf23a(struct recv_buf *precvbuf, struct rtw_queue 
> *queue)
> -{
> -   unsigned long irqL;
> -
> -   spin_lock_irqsave(>lock, irqL);
> -
> -   list_del_init(>list);
> -
> -   list_add_tail(>list, get_list_head(queue));
> -   spin_unlock_irqrestore(>lock, irqL);
> -   return _SUCCESS;
> -}
> -
>  struct recv_buf *rtw_dequeue_recvbuf23a (struct rtw_queue *queue)
>  {
> unsigned long irqL;
> diff --git a/drivers/staging/rtl8723au/include/rtw_recv.h 
> b/drivers/staging/rtl8723au/include/rtw_recv.h
> index dc784be..85a5edb 100644
> --- a/drivers/staging/rtl8723au/include/rtw_recv.h
> +++ b/drivers/staging/rtl8723au/include/rtw_recv.h
> @@ -279,8 +279,6 @@ int rtw_enqueue_recvframe23a(struct recv_frame 
> *precvframe, struct rtw_queue *qu
>
>  u32 rtw_free_uc_swdec_pending_queue23a(struct rtw_adapter *adapter);
>
> -int rtw_enqueue_recvbuf23a_to_head(struct recv_buf *precvbuf, struct 
> rtw_queue *queue);
> -int rtw_enqueue_recvbuf23a(struct recv_buf *precvbuf, struct rtw_queue 
> *queue);
>  struct recv_buf *rtw_dequeue_recvbuf23a(struct rtw_queue *queue);
>
>  void rtw_reordering_ctrl_timeout_handler23a(unsigned long pcontext);
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Julian Calaby

Email: julian.cal...@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/

[PATCH v2] regulator: s2mps11: Fix invalid selector mask and voltages for buck9

2016-03-27 Thread Krzysztof Kozlowski

The buck9 regulator of S2MPS11 PMIC had incorrect vsel_mask (0xff
instead of 0x1f) thus reading entire register as buck9's voltage. This
effectively caused regulator core to interpret values as higher voltages
than they were and then to set real voltage much lower than intended.

The buck9 provides power to other regulators, including LDO13
and LDO19 which supply the MMC2 (SD card). On Odroid XU3/XU4 the lower
voltage caused SD card detection errors on Odroid XU3/XU4:
mmc1: card never left busy state
mmc1: error -110 whilst initialising SD card

During driver probe the regulator core was checking whether initial
voltage matches the constraints. With incorrect vsel_mask of 0xff and
default value of 0x50, the core interpreted this as 5 V which is outside
of constraints (3-3.775 V). Then the regulator core was adjusting the
voltage to match the constraints. With incorrect vsel_mask this new
voltage mapped to a vere low voltage in the driver.

Fixes: cb74685ecb39 ("regulator: s2mps11: Add samsung s2mps11 regulator driver")
Cc: 
Signed-off-by: Krzysztof Kozlowski 
Reviewed-by: Javier Martinez Canillas 
Tested-by: Javier Martinez Canillas 

---

Changes since v1:
1. The driver did not lack minimal linear selector but the selector mask
   was wrong. The effect was exactly the same - writing values
   corresponding to a lower voltage in reality.
   Instead of setting '.linear_min_sel', set the '.vsel_mask' to a
   proper value.
2. Rewrite title/commit.
3. Minor changes after Javier's review.
4. Add Javier's review/tested by.

The issue can be reproduced on next-20160324 with
bae4fdc88d7f7dda1 (regulator: core: Ensure we are at least in bounds
for our constraints).
---
 drivers/regulator/s2mps11.c | 28 ++--
 include/linux/mfd/samsung/s2mps11.h |  2 ++
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index d24e2c783dc5..6dfa3502e1f1 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -308,7 +308,7 @@ static struct regulator_ops s2mps11_buck_ops = {
.enable_mask= S2MPS11_ENABLE_MASK   \
 }
 
-#define regulator_desc_s2mps11_buck6_10(num, min, step) {  \
+#define regulator_desc_s2mps11_buck67810(num, min, step) { \
.name   = "BUCK"#num,   \
.id = S2MPS11_BUCK##num,\
.ops= _buck_ops,\
@@ -324,6 +324,22 @@ static struct regulator_ops s2mps11_buck_ops = {
.enable_mask= S2MPS11_ENABLE_MASK   \
 }
 
+#define regulator_desc_s2mps11_buck9 { \
+   .name   = "BUCK9",  \
+   .id = S2MPS11_BUCK9,\
+   .ops= _buck_ops,\
+   .type   = REGULATOR_VOLTAGE,\
+   .owner  = THIS_MODULE,  \
+   .min_uV = MIN_3000_MV,  \
+   .uV_step= STEP_25_MV,   \
+   .n_voltages = S2MPS11_BUCK9_N_VOLTAGES, \
+   .ramp_delay = S2MPS11_RAMP_DELAY,   \
+   .vsel_reg   = S2MPS11_REG_B9CTRL2,  \
+   .vsel_mask  = S2MPS11_BUCK9_VSEL_MASK,  \
+   .enable_reg = S2MPS11_REG_B9CTRL1,  \
+   .enable_mask= S2MPS11_ENABLE_MASK   \
+}
+
 static const struct regulator_desc s2mps11_regulators[] = {
regulator_desc_s2mps11_ldo(1, STEP_25_MV),
regulator_desc_s2mps11_ldo(2, STEP_50_MV),
@@ -368,11 +384,11 @@ static const struct regulator_desc s2mps11_regulators[] = 
{
regulator_desc_s2mps11_buck1_4(3),
regulator_desc_s2mps11_buck1_4(4),
regulator_desc_s2mps11_buck5,
-   regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV),
-   regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV),
-   regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV),
-   regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV),
-   regulator_desc_s2mps11_buck6_10(10, MIN_750_MV, STEP_12_5_MV),
+   regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV),
+   regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV),
+   regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV),
+   regulator_desc_s2mps11_buck9,
+   regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV),
 };
 
 static struct regulator_ops s2mps14_reg_ops;
diff --git a/include/linux/mfd/samsung/s2mps11.h 
b/include/linux/mfd/samsung/s2mps11.h
index b288965e8101..2c14eeca46f0 100644
--- a/include/linux/mfd/samsung/s2mps11.h
+++ b/include/linux/mfd/samsung/s2mps11.h
@@ -173,10 +173,12

Re: [PATCH] Staging: rtl8723au: Remove function rtw_enqueue_{recvbuf23a/recvbuf23a_to_head}

2016-03-27 Thread Julian Calaby

Hi All,

On Sat, Mar 26, 2016 at 5:24 PM, Bhumika Goyal  wrote:
> The functions rtw_enqueue_recvbuf23a and rtw_enqueue_recvbuf23a_to_head
> are never used anywhere in the kernel. So, remove their definition and
> prototype.
> Grepped to find occurences.
>
> Signed-off-by: Bhumika Goyal 

Looks right to me.

Reviewed-by: Julian Calaby 

> ---
>  drivers/staging/rtl8723au/core/rtw_recv.c| 25 -
>  drivers/staging/rtl8723au/include/rtw_recv.h |  2 --
>  2 files changed, 27 deletions(-)
>
> diff --git a/drivers/staging/rtl8723au/core/rtw_recv.c 
> b/drivers/staging/rtl8723au/core/rtw_recv.c
> index 989ed07..150dabc 100644
> --- a/drivers/staging/rtl8723au/core/rtw_recv.c
> +++ b/drivers/staging/rtl8723au/core/rtw_recv.c
> @@ -211,31 +211,6 @@ u32 rtw_free_uc_swdec_pending_queue23a(struct 
> rtw_adapter *adapter)
> return cnt;
>  }
>
> -int rtw_enqueue_recvbuf23a_to_head(struct recv_buf *precvbuf, struct 
> rtw_queue *queue)
> -{
> -   spin_lock_bh(>lock);
> -
> -   list_del_init(>list);
> -   list_add(>list, get_list_head(queue));
> -
> -   spin_unlock_bh(>lock);
> -
> -   return _SUCCESS;
> -}
> -
> -int rtw_enqueue_recvbuf23a(struct recv_buf *precvbuf, struct rtw_queue 
> *queue)
> -{
> -   unsigned long irqL;
> -
> -   spin_lock_irqsave(>lock, irqL);
> -
> -   list_del_init(>list);
> -
> -   list_add_tail(>list, get_list_head(queue));
> -   spin_unlock_irqrestore(>lock, irqL);
> -   return _SUCCESS;
> -}
> -
>  struct recv_buf *rtw_dequeue_recvbuf23a (struct rtw_queue *queue)
>  {
> unsigned long irqL;
> diff --git a/drivers/staging/rtl8723au/include/rtw_recv.h 
> b/drivers/staging/rtl8723au/include/rtw_recv.h
> index dc784be..85a5edb 100644
> --- a/drivers/staging/rtl8723au/include/rtw_recv.h
> +++ b/drivers/staging/rtl8723au/include/rtw_recv.h
> @@ -279,8 +279,6 @@ int rtw_enqueue_recvframe23a(struct recv_frame 
> *precvframe, struct rtw_queue *qu
>
>  u32 rtw_free_uc_swdec_pending_queue23a(struct rtw_adapter *adapter);
>
> -int rtw_enqueue_recvbuf23a_to_head(struct recv_buf *precvbuf, struct 
> rtw_queue *queue);
> -int rtw_enqueue_recvbuf23a(struct recv_buf *precvbuf, struct rtw_queue 
> *queue);
>  struct recv_buf *rtw_dequeue_recvbuf23a(struct rtw_queue *queue);
>
>  void rtw_reordering_ctrl_timeout_handler23a(unsigned long pcontext);
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Julian Calaby

Email: julian.cal...@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/

[PATCH v2] regulator: s2mps11: Fix invalid selector mask and voltages for buck9

2016-03-27 Thread Krzysztof Kozlowski

The buck9 regulator of S2MPS11 PMIC had incorrect vsel_mask (0xff
instead of 0x1f) thus reading entire register as buck9's voltage. This
effectively caused regulator core to interpret values as higher voltages
than they were and then to set real voltage much lower than intended.

The buck9 provides power to other regulators, including LDO13
and LDO19 which supply the MMC2 (SD card). On Odroid XU3/XU4 the lower
voltage caused SD card detection errors on Odroid XU3/XU4:
mmc1: card never left busy state
mmc1: error -110 whilst initialising SD card

During driver probe the regulator core was checking whether initial
voltage matches the constraints. With incorrect vsel_mask of 0xff and
default value of 0x50, the core interpreted this as 5 V which is outside
of constraints (3-3.775 V). Then the regulator core was adjusting the
voltage to match the constraints. With incorrect vsel_mask this new
voltage mapped to a vere low voltage in the driver.

Fixes: cb74685ecb39 ("regulator: s2mps11: Add samsung s2mps11 regulator driver")
Cc: 
Signed-off-by: Krzysztof Kozlowski 
Reviewed-by: Javier Martinez Canillas 
Tested-by: Javier Martinez Canillas 

---

Changes since v1:
1. The driver did not lack minimal linear selector but the selector mask
   was wrong. The effect was exactly the same - writing values
   corresponding to a lower voltage in reality.
   Instead of setting '.linear_min_sel', set the '.vsel_mask' to a
   proper value.
2. Rewrite title/commit.
3. Minor changes after Javier's review.
4. Add Javier's review/tested by.

The issue can be reproduced on next-20160324 with
bae4fdc88d7f7dda1 (regulator: core: Ensure we are at least in bounds
for our constraints).
---
 drivers/regulator/s2mps11.c | 28 ++--
 include/linux/mfd/samsung/s2mps11.h |  2 ++
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index d24e2c783dc5..6dfa3502e1f1 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -308,7 +308,7 @@ static struct regulator_ops s2mps11_buck_ops = {
.enable_mask= S2MPS11_ENABLE_MASK   \
 }
 
-#define regulator_desc_s2mps11_buck6_10(num, min, step) {  \
+#define regulator_desc_s2mps11_buck67810(num, min, step) { \
.name   = "BUCK"#num,   \
.id = S2MPS11_BUCK##num,\
.ops= _buck_ops,\
@@ -324,6 +324,22 @@ static struct regulator_ops s2mps11_buck_ops = {
.enable_mask= S2MPS11_ENABLE_MASK   \
 }
 
+#define regulator_desc_s2mps11_buck9 { \
+   .name   = "BUCK9",  \
+   .id = S2MPS11_BUCK9,\
+   .ops= _buck_ops,\
+   .type   = REGULATOR_VOLTAGE,\
+   .owner  = THIS_MODULE,  \
+   .min_uV = MIN_3000_MV,  \
+   .uV_step= STEP_25_MV,   \
+   .n_voltages = S2MPS11_BUCK9_N_VOLTAGES, \
+   .ramp_delay = S2MPS11_RAMP_DELAY,   \
+   .vsel_reg   = S2MPS11_REG_B9CTRL2,  \
+   .vsel_mask  = S2MPS11_BUCK9_VSEL_MASK,  \
+   .enable_reg = S2MPS11_REG_B9CTRL1,  \
+   .enable_mask= S2MPS11_ENABLE_MASK   \
+}
+
 static const struct regulator_desc s2mps11_regulators[] = {
regulator_desc_s2mps11_ldo(1, STEP_25_MV),
regulator_desc_s2mps11_ldo(2, STEP_50_MV),
@@ -368,11 +384,11 @@ static const struct regulator_desc s2mps11_regulators[] = 
{
regulator_desc_s2mps11_buck1_4(3),
regulator_desc_s2mps11_buck1_4(4),
regulator_desc_s2mps11_buck5,
-   regulator_desc_s2mps11_buck6_10(6, MIN_600_MV, STEP_6_25_MV),
-   regulator_desc_s2mps11_buck6_10(7, MIN_600_MV, STEP_6_25_MV),
-   regulator_desc_s2mps11_buck6_10(8, MIN_600_MV, STEP_6_25_MV),
-   regulator_desc_s2mps11_buck6_10(9, MIN_3000_MV, STEP_25_MV),
-   regulator_desc_s2mps11_buck6_10(10, MIN_750_MV, STEP_12_5_MV),
+   regulator_desc_s2mps11_buck67810(6, MIN_600_MV, STEP_6_25_MV),
+   regulator_desc_s2mps11_buck67810(7, MIN_600_MV, STEP_6_25_MV),
+   regulator_desc_s2mps11_buck67810(8, MIN_600_MV, STEP_6_25_MV),
+   regulator_desc_s2mps11_buck9,
+   regulator_desc_s2mps11_buck67810(10, MIN_750_MV, STEP_12_5_MV),
 };
 
 static struct regulator_ops s2mps14_reg_ops;
diff --git a/include/linux/mfd/samsung/s2mps11.h 
b/include/linux/mfd/samsung/s2mps11.h
index b288965e8101..2c14eeca46f0 100644
--- a/include/linux/mfd/samsung/s2mps11.h
+++ b/include/linux/mfd/samsung/s2mps11.h
@@ -173,10 +173,12 @@ enum s2mps11_regulators {
 
 #define S2MPS11_LDO_VSEL_MASK  0x3F
 #define

Re: [PATCH] Staging: rtl8723au: Remove unused functions

2016-03-27 Thread Julian Calaby

Hi All,

On Sat, Mar 26, 2016 at 5:14 PM, Bhumika Goyal  wrote:
> The functions rtw_get_oper_bw23a and rtw_get_oper_ch23aoffset are never
> used anywhere in the kernel. So, remove their definition and prototype.
> Grepped to find occurences.
>
> Signed-off-by: Bhumika Goyal 

Looks right to me.

Reviewed-by: Julian Calaby 


> ---
>  drivers/staging/rtl8723au/core/rtw_wlan_util.c   | 10 --
>  drivers/staging/rtl8723au/include/rtw_mlme_ext.h |  2 --
>  2 files changed, 12 deletions(-)
>
> diff --git a/drivers/staging/rtl8723au/core/rtw_wlan_util.c 
> b/drivers/staging/rtl8723au/core/rtw_wlan_util.c
> index cc2b84b..694cf17 100644
> --- a/drivers/staging/rtl8723au/core/rtw_wlan_util.c
> +++ b/drivers/staging/rtl8723au/core/rtw_wlan_util.c
> @@ -304,21 +304,11 @@ inline void rtw_set_oper_ch23a(struct rtw_adapter 
> *adapter, u8 ch)
> adapter_to_dvobj(adapter)->oper_channel = ch;
>  }
>
> -inline u8 rtw_get_oper_bw23a(struct rtw_adapter *adapter)
> -{
> -   return adapter_to_dvobj(adapter)->oper_bwmode;
> -}
> -
>  inline void rtw_set_oper_bw23a(struct rtw_adapter *adapter, u8 bw)
>  {
> adapter_to_dvobj(adapter)->oper_bwmode = bw;
>  }
>
> -inline u8 rtw_get_oper_ch23aoffset(struct rtw_adapter *adapter)
> -{
> -   return adapter_to_dvobj(adapter)->oper_ch_offset;
> -}
> -
>  inline void rtw_set_oper_ch23aoffset23a(struct rtw_adapter *adapter, u8 
> offset)
>  {
> adapter_to_dvobj(adapter)->oper_ch_offset = offset;
> diff --git a/drivers/staging/rtl8723au/include/rtw_mlme_ext.h 
> b/drivers/staging/rtl8723au/include/rtw_mlme_ext.h
> index ea2a6c9..0e7d3da 100644
> --- a/drivers/staging/rtl8723au/include/rtw_mlme_ext.h
> +++ b/drivers/staging/rtl8723au/include/rtw_mlme_ext.h
> @@ -461,9 +461,7 @@ void Update23aTblForSoftAP(u8 *bssrateset, u32 
> bssratelen);
>
>  u8 rtw_get_oper_ch23a(struct rtw_adapter *adapter);
>  void rtw_set_oper_ch23a(struct rtw_adapter *adapter, u8 ch);
> -u8 rtw_get_oper_bw23a(struct rtw_adapter *adapter);
>  void rtw_set_oper_bw23a(struct rtw_adapter *adapter, u8 bw);
> -u8 rtw_get_oper_ch23aoffset(struct rtw_adapter *adapter);
>  void rtw_set_oper_ch23aoffset23a(struct rtw_adapter *adapter, u8 offset);
>
>  void set_channel_bwmode23a(struct rtw_adapter *padapter, unsigned char 
> channel,
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Julian Calaby

Email: julian.cal...@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/

Re: [PATCH] Staging: rtl8723au: Remove unused functions

2016-03-27 Thread Julian Calaby

Hi All,

On Sat, Mar 26, 2016 at 5:14 PM, Bhumika Goyal  wrote:
> The functions rtw_get_oper_bw23a and rtw_get_oper_ch23aoffset are never
> used anywhere in the kernel. So, remove their definition and prototype.
> Grepped to find occurences.
>
> Signed-off-by: Bhumika Goyal 

Looks right to me.

Reviewed-by: Julian Calaby 


> ---
>  drivers/staging/rtl8723au/core/rtw_wlan_util.c   | 10 --
>  drivers/staging/rtl8723au/include/rtw_mlme_ext.h |  2 --
>  2 files changed, 12 deletions(-)
>
> diff --git a/drivers/staging/rtl8723au/core/rtw_wlan_util.c 
> b/drivers/staging/rtl8723au/core/rtw_wlan_util.c
> index cc2b84b..694cf17 100644
> --- a/drivers/staging/rtl8723au/core/rtw_wlan_util.c
> +++ b/drivers/staging/rtl8723au/core/rtw_wlan_util.c
> @@ -304,21 +304,11 @@ inline void rtw_set_oper_ch23a(struct rtw_adapter 
> *adapter, u8 ch)
> adapter_to_dvobj(adapter)->oper_channel = ch;
>  }
>
> -inline u8 rtw_get_oper_bw23a(struct rtw_adapter *adapter)
> -{
> -   return adapter_to_dvobj(adapter)->oper_bwmode;
> -}
> -
>  inline void rtw_set_oper_bw23a(struct rtw_adapter *adapter, u8 bw)
>  {
> adapter_to_dvobj(adapter)->oper_bwmode = bw;
>  }
>
> -inline u8 rtw_get_oper_ch23aoffset(struct rtw_adapter *adapter)
> -{
> -   return adapter_to_dvobj(adapter)->oper_ch_offset;
> -}
> -
>  inline void rtw_set_oper_ch23aoffset23a(struct rtw_adapter *adapter, u8 
> offset)
>  {
> adapter_to_dvobj(adapter)->oper_ch_offset = offset;
> diff --git a/drivers/staging/rtl8723au/include/rtw_mlme_ext.h 
> b/drivers/staging/rtl8723au/include/rtw_mlme_ext.h
> index ea2a6c9..0e7d3da 100644
> --- a/drivers/staging/rtl8723au/include/rtw_mlme_ext.h
> +++ b/drivers/staging/rtl8723au/include/rtw_mlme_ext.h
> @@ -461,9 +461,7 @@ void Update23aTblForSoftAP(u8 *bssrateset, u32 
> bssratelen);
>
>  u8 rtw_get_oper_ch23a(struct rtw_adapter *adapter);
>  void rtw_set_oper_ch23a(struct rtw_adapter *adapter, u8 ch);
> -u8 rtw_get_oper_bw23a(struct rtw_adapter *adapter);
>  void rtw_set_oper_bw23a(struct rtw_adapter *adapter, u8 bw);
> -u8 rtw_get_oper_ch23aoffset(struct rtw_adapter *adapter);
>  void rtw_set_oper_ch23aoffset23a(struct rtw_adapter *adapter, u8 offset);
>
>  void set_channel_bwmode23a(struct rtw_adapter *padapter, unsigned char 
> channel,
> --
> 1.9.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Julian Calaby

Email: julian.cal...@gmail.com
Profile: http://www.google.com/profiles/julian.calaby/

1 2 3 4 5 6 >

1 - 100 of 578 matches

Mail list logo