Re: linux-next: Signed-off-by missing for commit in the s390 tree

2017-09-26 Thread Martin Schwidefsky
On Wed, 27 Sep 2017 09:27:34 +1000
Stephen Rothwell  wrote:

> Commit
> 
>   fc9abdb8a661 ("samples/kprobes: Add s390 case in kprobe example module")
> 
> is missing a Signed-off-by from its committer.

Fixed, thanks!

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: linux-next: Signed-off-by missing for commit in the s390 tree

2017-09-26 Thread Martin Schwidefsky
On Wed, 27 Sep 2017 09:27:34 +1000
Stephen Rothwell  wrote:

> Commit
> 
>   fc9abdb8a661 ("samples/kprobes: Add s390 case in kprobe example module")
> 
> is missing a Signed-off-by from its committer.

Fixed, thanks!

-- 
blue skies,
   Martin.

"Reality continues to ruin my life." - Calvin.



Re: [PATCH] HID: hid-multitouch: support fine-grain orientation reporting

2017-09-26 Thread Dmitry Torokhov
Hi Wei-Ning,

On Tue, Sep 26, 2017 at 8:03 PM, Wei-Ning Huang  wrote:
>
> The current hid-multitouch driver only allow the report of two
> orientations, vertical and horizontal. We use the Azimuth orientation
> usage 0x5b under the Digitizer usage page to report orientation directly
> from the hid device. A new quirk MT_QUIRK_REPORT_ORIENTATION is added so
> user can enable this only if their device supports it.

The patch description is stale, there is no quirk anymore, and the
usage is 0x3f.

>
> Signed-off-by: Wei-Ning Huang 
> Reviewed-by: Dmitry Torokhov 
> ---
>  drivers/hid/hid-multitouch.c | 38 --
>  include/linux/hid.h  |  1 +
>  2 files changed, 37 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
> index 440b999304a5..4663bf4b2892 100644
> --- a/drivers/hid/hid-multitouch.c
> +++ b/drivers/hid/hid-multitouch.c
> @@ -84,11 +84,12 @@ MODULE_LICENSE("GPL");
>  #define MT_IO_FLAGS_PENDING_SLOTS  2
>
>  struct mt_slot {
> -   __s32 x, y, cx, cy, p, w, h;
> +   __s32 x, y, cx, cy, p, w, h, a;
> __s32 contactid;/* the device ContactID assigned to this slot 
> */
> bool touch_state;   /* is the touch valid? */
> bool inrange_state; /* is the finger in proximity of the sensor? 
> */
> bool confidence_state;  /* is the touch made by a finger? */
> +   bool has_azimuth;   /* the contact reports azimuth */
>  };
>
>  struct mt_class {
> @@ -591,6 +592,23 @@ static int mt_touch_input_mapping(struct hid_device 
> *hdev, struct hid_input *hi,
> td->cc_index = field->index;
> td->cc_value_index = usage->usage_index;
> return 1;
> +   case HID_DG_AZIMUTH:
> +   hid_map_usage(hi, usage, bit, max,
> +   EV_ABS, ABS_MT_ORIENTATION);
> +   /*
> +* Azimuth has the range of [0, MAX) representing a 
> full
> +* revolution. Set ABS_MT_ORIENTATION to a quarter of
> +* MAX according the definition of ABS_MT_ORIENTATION
> +*/
> +   input_set_abs_params(hi->input, ABS_MT_ORIENTATION,
> +   0, field->logical_maximum / 4,
> +   cls->sn_move ?
> +   field->logical_maximum / cls->sn_move : 0, 0);
> +   input_abs_set_res(hi->input, ABS_MT_ORIENTATION,
> +   hidinput_calc_abs_res(field,
> +   ABS_MT_ORIENTATION));

I do not think resolution makes sense for ABS_MT_ORIENTATION. The MAX
always corresponds to 90 degrees.

> +   mt_store_field(usage, td, hi);
> +   return 1;
> case HID_DG_CONTACTMAX:
> /* we don't set td->last_slot_field as contactcount 
> and
>  * contact max are global to the report */
> @@ -683,6 +701,10 @@ static void mt_complete_slot(struct mt_device *td, 
> struct input_dev *input)
> int wide = (s->w > s->h);
> int major = max(s->w, s->h);
> int minor = min(s->w, s->h);
> +   int orientation = wide;
> +
> +   if (s->has_azimuth)
> +   orientation = s->a;
>
> /*
>  * divided by two to match visual scale of touch
> @@ -699,7 +721,8 @@ static void mt_complete_slot(struct mt_device *td, struct 
> input_dev *input)
> input_event(input, EV_ABS, ABS_MT_TOOL_Y, s->cy);
> input_event(input, EV_ABS, ABS_MT_DISTANCE,
> !s->touch_state);
> -   input_event(input, EV_ABS, ABS_MT_ORIENTATION, wide);
> +   input_event(input, EV_ABS, ABS_MT_ORIENTATION,
> +   orientation);
> input_event(input, EV_ABS, ABS_MT_PRESSURE, s->p);
> input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
> input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);
> @@ -789,6 +812,17 @@ static void mt_process_mt_event(struct hid_device *hid, 
> struct hid_field *field,
> break;
> case HID_DG_CONTACTCOUNT:
> break;
> +   case HID_DG_AZIMUTH:
> +   /*
> +* Azimuth is counter-clockwise and ranges from [0, 
> MAX)
> +* (a full revolution). Convert it to clockwise 
> ranging
> +* [-MAX/2, MAX/2].

Benjamin, Henrik, have you worked with 

Re: [PATCH] HID: hid-multitouch: support fine-grain orientation reporting

2017-09-26 Thread Dmitry Torokhov
Hi Wei-Ning,

On Tue, Sep 26, 2017 at 8:03 PM, Wei-Ning Huang  wrote:
>
> The current hid-multitouch driver only allow the report of two
> orientations, vertical and horizontal. We use the Azimuth orientation
> usage 0x5b under the Digitizer usage page to report orientation directly
> from the hid device. A new quirk MT_QUIRK_REPORT_ORIENTATION is added so
> user can enable this only if their device supports it.

The patch description is stale, there is no quirk anymore, and the
usage is 0x3f.

>
> Signed-off-by: Wei-Ning Huang 
> Reviewed-by: Dmitry Torokhov 
> ---
>  drivers/hid/hid-multitouch.c | 38 --
>  include/linux/hid.h  |  1 +
>  2 files changed, 37 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
> index 440b999304a5..4663bf4b2892 100644
> --- a/drivers/hid/hid-multitouch.c
> +++ b/drivers/hid/hid-multitouch.c
> @@ -84,11 +84,12 @@ MODULE_LICENSE("GPL");
>  #define MT_IO_FLAGS_PENDING_SLOTS  2
>
>  struct mt_slot {
> -   __s32 x, y, cx, cy, p, w, h;
> +   __s32 x, y, cx, cy, p, w, h, a;
> __s32 contactid;/* the device ContactID assigned to this slot 
> */
> bool touch_state;   /* is the touch valid? */
> bool inrange_state; /* is the finger in proximity of the sensor? 
> */
> bool confidence_state;  /* is the touch made by a finger? */
> +   bool has_azimuth;   /* the contact reports azimuth */
>  };
>
>  struct mt_class {
> @@ -591,6 +592,23 @@ static int mt_touch_input_mapping(struct hid_device 
> *hdev, struct hid_input *hi,
> td->cc_index = field->index;
> td->cc_value_index = usage->usage_index;
> return 1;
> +   case HID_DG_AZIMUTH:
> +   hid_map_usage(hi, usage, bit, max,
> +   EV_ABS, ABS_MT_ORIENTATION);
> +   /*
> +* Azimuth has the range of [0, MAX) representing a 
> full
> +* revolution. Set ABS_MT_ORIENTATION to a quarter of
> +* MAX according the definition of ABS_MT_ORIENTATION
> +*/
> +   input_set_abs_params(hi->input, ABS_MT_ORIENTATION,
> +   0, field->logical_maximum / 4,
> +   cls->sn_move ?
> +   field->logical_maximum / cls->sn_move : 0, 0);
> +   input_abs_set_res(hi->input, ABS_MT_ORIENTATION,
> +   hidinput_calc_abs_res(field,
> +   ABS_MT_ORIENTATION));

I do not think resolution makes sense for ABS_MT_ORIENTATION. The MAX
always corresponds to 90 degrees.

> +   mt_store_field(usage, td, hi);
> +   return 1;
> case HID_DG_CONTACTMAX:
> /* we don't set td->last_slot_field as contactcount 
> and
>  * contact max are global to the report */
> @@ -683,6 +701,10 @@ static void mt_complete_slot(struct mt_device *td, 
> struct input_dev *input)
> int wide = (s->w > s->h);
> int major = max(s->w, s->h);
> int minor = min(s->w, s->h);
> +   int orientation = wide;
> +
> +   if (s->has_azimuth)
> +   orientation = s->a;
>
> /*
>  * divided by two to match visual scale of touch
> @@ -699,7 +721,8 @@ static void mt_complete_slot(struct mt_device *td, struct 
> input_dev *input)
> input_event(input, EV_ABS, ABS_MT_TOOL_Y, s->cy);
> input_event(input, EV_ABS, ABS_MT_DISTANCE,
> !s->touch_state);
> -   input_event(input, EV_ABS, ABS_MT_ORIENTATION, wide);
> +   input_event(input, EV_ABS, ABS_MT_ORIENTATION,
> +   orientation);
> input_event(input, EV_ABS, ABS_MT_PRESSURE, s->p);
> input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
> input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);
> @@ -789,6 +812,17 @@ static void mt_process_mt_event(struct hid_device *hid, 
> struct hid_field *field,
> break;
> case HID_DG_CONTACTCOUNT:
> break;
> +   case HID_DG_AZIMUTH:
> +   /*
> +* Azimuth is counter-clockwise and ranges from [0, 
> MAX)
> +* (a full revolution). Convert it to clockwise 
> ranging
> +* [-MAX/2, MAX/2].

Benjamin, Henrik, have you worked with devices reporting azimuth? I
assume MS HID spec uses 0 to 

Re: [RFC] a question about mlockall() and mprotect()

2017-09-26 Thread Xishi Qiu
On 2017/9/26 19:00, Michal Hocko wrote:

> On Tue 26-09-17 11:45:16, Vlastimil Babka wrote:
>> On 09/26/2017 11:22 AM, Xishi Qiu wrote:
>>> On 2017/9/26 17:13, Xishi Qiu wrote:
> This is still very fuzzy. What are you actually trying to achieve?

 I don't expect page fault any more after mlock.

>>>
>>> Our apps is some thing like RT, and page-fault maybe cause a lot of time,
>>> e.g. lock, mem reclaim ..., so I use mlock and don't want page fault
>>> any more.
>>
>> Why does your app then have restricted mprotect when calling mlockall()
>> and only later adjusts the mprotect?
> 
> Ahh, OK I see what is goging on. So you have PROT_NONE vma at the time
> mlockall and then later mprotect it something else and want to fault all
> that memory at the mprotect time?
> 
> So basically to do
> ---
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 6d3e2f082290..b665b5d1c544 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -369,7 +369,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct 
> vm_area_struct **pprev,
>* Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
>* fault on access.
>*/
> - if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED &&
> + if ((oldflags & (VM_WRITE | VM_LOCKED)) == VM_LOCKED &&
>   (newflags & VM_WRITE)) {
>   populate_vma_page_range(vma, start, end, NULL);
>   }
> 

Hi Michal,

My kernel is v3.10, and I missed this code, thank you reminding me.

Thanks,
Xishi Qiu



Re: [RFC] a question about mlockall() and mprotect()

2017-09-26 Thread Xishi Qiu
On 2017/9/26 19:00, Michal Hocko wrote:

> On Tue 26-09-17 11:45:16, Vlastimil Babka wrote:
>> On 09/26/2017 11:22 AM, Xishi Qiu wrote:
>>> On 2017/9/26 17:13, Xishi Qiu wrote:
> This is still very fuzzy. What are you actually trying to achieve?

 I don't expect page fault any more after mlock.

>>>
>>> Our apps is some thing like RT, and page-fault maybe cause a lot of time,
>>> e.g. lock, mem reclaim ..., so I use mlock and don't want page fault
>>> any more.
>>
>> Why does your app then have restricted mprotect when calling mlockall()
>> and only later adjusts the mprotect?
> 
> Ahh, OK I see what is goging on. So you have PROT_NONE vma at the time
> mlockall and then later mprotect it something else and want to fault all
> that memory at the mprotect time?
> 
> So basically to do
> ---
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 6d3e2f082290..b665b5d1c544 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -369,7 +369,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct 
> vm_area_struct **pprev,
>* Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
>* fault on access.
>*/
> - if ((oldflags & (VM_WRITE | VM_SHARED | VM_LOCKED)) == VM_LOCKED &&
> + if ((oldflags & (VM_WRITE | VM_LOCKED)) == VM_LOCKED &&
>   (newflags & VM_WRITE)) {
>   populate_vma_page_range(vma, start, end, NULL);
>   }
> 

Hi Michal,

My kernel is v3.10, and I missed this code, thank you reminding me.

Thanks,
Xishi Qiu



[PATCH v2 2/4] mmc: sdhci-msm: Fix HW issue with power IRQ handling during reset

2017-09-26 Thread Vijay Viswanath
From: Sahitya Tummala 

There is a rare scenario in HW, where the first clear pulse could
be lost when the actual reset and clear/read of status register
are happening at the same time. Fix this by retrying upto 10 times
to ensure the status register gets cleared. Otherwise, this will
lead to a spurious power IRQ which results in system instability.

Signed-off-by: Sahitya Tummala 
Signed-off-by: Vijay Viswanath 
---
 drivers/mmc/host/sdhci-msm.c | 46 
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index d636251..42a65ab 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -995,17 +995,52 @@ static void sdhci_msm_set_uhs_signaling(struct sdhci_host 
*host,
sdhci_msm_hs400(host, >ios);
 }
 
-static void sdhci_msm_voltage_switch(struct sdhci_host *host)
+static void sdhci_msm_dump_pwr_ctrl_regs(struct sdhci_host *host)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   pr_err("%s: PWRCTL_STATUS: 0x%08x | PWRCTL_MASK: 0x%08x | PWRCTL_CTL: 
0x%08x\n",
+   mmc_hostname(host->mmc),
+   readl_relaxed(msm_host->core_mem + CORE_PWRCTL_STATUS),
+   readl_relaxed(msm_host->core_mem + CORE_PWRCTL_MASK),
+   readl_relaxed(msm_host->core_mem + CORE_PWRCTL_CTL));
+}
+
+static void sdhci_msm_handle_pwr_irq(struct sdhci_host *host, int irq)
 {
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
u32 irq_status, irq_ack = 0;
+   int retry = 10;
 
irq_status = readl_relaxed(msm_host->core_mem + CORE_PWRCTL_STATUS);
irq_status &= INT_MASK;
 
writel_relaxed(irq_status, msm_host->core_mem + CORE_PWRCTL_CLEAR);
 
+   /*
+* There is a rare HW scenario where the first clear pulse could be
+* lost when actual reset and clear/read of status register is
+* happening at a time. Hence, retry for at least 10 times to make
+* sure status register is cleared. Otherwise, this will result in
+* a spurious power IRQ resulting in system instability.
+*/
+   while (irq_status & readl_relaxed(msm_host->core_mem +
+   CORE_PWRCTL_STATUS)) {
+   if (retry == 0) {
+   pr_err("%s: Timedout clearing (0x%x) pwrctl status 
register\n",
+   mmc_hostname(host->mmc), irq_status);
+   sdhci_msm_dump_pwr_ctrl_regs(host);
+   WARN_ON(1);
+   break;
+   }
+   writel_relaxed(irq_status,
+   msm_host->core_mem + CORE_PWRCTL_CLEAR);
+   retry--;
+   udelay(10);
+   }
+
if (irq_status & (CORE_PWRCTL_BUS_ON | CORE_PWRCTL_BUS_OFF))
irq_ack |= CORE_PWRCTL_BUS_SUCCESS;
if (irq_status & (CORE_PWRCTL_IO_LOW | CORE_PWRCTL_IO_HIGH))
@@ -1017,13 +1052,17 @@ static void sdhci_msm_voltage_switch(struct sdhci_host 
*host)
 * switches are handled by the sdhci core, so just report success.
 */
writel_relaxed(irq_ack, msm_host->core_mem + CORE_PWRCTL_CTL);
+
+   pr_debug("%s: %s: Handled IRQ(%d), irq_status=0x%x, ack=0x%x\n",
+   mmc_hostname(msm_host->mmc), __func__, irq, irq_status,
+   irq_ack);
 }
 
 static irqreturn_t sdhci_msm_pwr_irq(int irq, void *data)
 {
struct sdhci_host *host = (struct sdhci_host *)data;
 
-   sdhci_msm_voltage_switch(host);
+   sdhci_msm_handle_pwr_irq(host, irq);
 
return IRQ_HANDLED;
 }
@@ -1106,7 +1145,6 @@ static void sdhci_msm_set_clock(struct sdhci_host *host, 
unsigned int clock)
.get_max_clock = sdhci_msm_get_max_clock,
.set_bus_width = sdhci_set_bus_width,
.set_uhs_signaling = sdhci_msm_set_uhs_signaling,
-   .voltage_switch = sdhci_msm_voltage_switch,
 };
 
 static const struct sdhci_pltfm_data sdhci_msm_pdata = {
@@ -1257,7 +1295,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
 * acknowledged. Otherwise power irq interrupt handler would be
 * fired prematurely.
 */
-   sdhci_msm_voltage_switch(host);
+   sdhci_msm_handle_pwr_irq(host, 0);
 
/*
 * Ensure that above writes are propogated before interrupt enablement
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH v2 2/4] mmc: sdhci-msm: Fix HW issue with power IRQ handling during reset

2017-09-26 Thread Vijay Viswanath
From: Sahitya Tummala 

There is a rare scenario in HW, where the first clear pulse could
be lost when the actual reset and clear/read of status register
are happening at the same time. Fix this by retrying upto 10 times
to ensure the status register gets cleared. Otherwise, this will
lead to a spurious power IRQ which results in system instability.

Signed-off-by: Sahitya Tummala 
Signed-off-by: Vijay Viswanath 
---
 drivers/mmc/host/sdhci-msm.c | 46 
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index d636251..42a65ab 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -995,17 +995,52 @@ static void sdhci_msm_set_uhs_signaling(struct sdhci_host 
*host,
sdhci_msm_hs400(host, >ios);
 }
 
-static void sdhci_msm_voltage_switch(struct sdhci_host *host)
+static void sdhci_msm_dump_pwr_ctrl_regs(struct sdhci_host *host)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+
+   pr_err("%s: PWRCTL_STATUS: 0x%08x | PWRCTL_MASK: 0x%08x | PWRCTL_CTL: 
0x%08x\n",
+   mmc_hostname(host->mmc),
+   readl_relaxed(msm_host->core_mem + CORE_PWRCTL_STATUS),
+   readl_relaxed(msm_host->core_mem + CORE_PWRCTL_MASK),
+   readl_relaxed(msm_host->core_mem + CORE_PWRCTL_CTL));
+}
+
+static void sdhci_msm_handle_pwr_irq(struct sdhci_host *host, int irq)
 {
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
u32 irq_status, irq_ack = 0;
+   int retry = 10;
 
irq_status = readl_relaxed(msm_host->core_mem + CORE_PWRCTL_STATUS);
irq_status &= INT_MASK;
 
writel_relaxed(irq_status, msm_host->core_mem + CORE_PWRCTL_CLEAR);
 
+   /*
+* There is a rare HW scenario where the first clear pulse could be
+* lost when actual reset and clear/read of status register is
+* happening at a time. Hence, retry for at least 10 times to make
+* sure status register is cleared. Otherwise, this will result in
+* a spurious power IRQ resulting in system instability.
+*/
+   while (irq_status & readl_relaxed(msm_host->core_mem +
+   CORE_PWRCTL_STATUS)) {
+   if (retry == 0) {
+   pr_err("%s: Timedout clearing (0x%x) pwrctl status 
register\n",
+   mmc_hostname(host->mmc), irq_status);
+   sdhci_msm_dump_pwr_ctrl_regs(host);
+   WARN_ON(1);
+   break;
+   }
+   writel_relaxed(irq_status,
+   msm_host->core_mem + CORE_PWRCTL_CLEAR);
+   retry--;
+   udelay(10);
+   }
+
if (irq_status & (CORE_PWRCTL_BUS_ON | CORE_PWRCTL_BUS_OFF))
irq_ack |= CORE_PWRCTL_BUS_SUCCESS;
if (irq_status & (CORE_PWRCTL_IO_LOW | CORE_PWRCTL_IO_HIGH))
@@ -1017,13 +1052,17 @@ static void sdhci_msm_voltage_switch(struct sdhci_host 
*host)
 * switches are handled by the sdhci core, so just report success.
 */
writel_relaxed(irq_ack, msm_host->core_mem + CORE_PWRCTL_CTL);
+
+   pr_debug("%s: %s: Handled IRQ(%d), irq_status=0x%x, ack=0x%x\n",
+   mmc_hostname(msm_host->mmc), __func__, irq, irq_status,
+   irq_ack);
 }
 
 static irqreturn_t sdhci_msm_pwr_irq(int irq, void *data)
 {
struct sdhci_host *host = (struct sdhci_host *)data;
 
-   sdhci_msm_voltage_switch(host);
+   sdhci_msm_handle_pwr_irq(host, irq);
 
return IRQ_HANDLED;
 }
@@ -1106,7 +1145,6 @@ static void sdhci_msm_set_clock(struct sdhci_host *host, 
unsigned int clock)
.get_max_clock = sdhci_msm_get_max_clock,
.set_bus_width = sdhci_set_bus_width,
.set_uhs_signaling = sdhci_msm_set_uhs_signaling,
-   .voltage_switch = sdhci_msm_voltage_switch,
 };
 
 static const struct sdhci_pltfm_data sdhci_msm_pdata = {
@@ -1257,7 +1295,7 @@ static int sdhci_msm_probe(struct platform_device *pdev)
 * acknowledged. Otherwise power irq interrupt handler would be
 * fired prematurely.
 */
-   sdhci_msm_voltage_switch(host);
+   sdhci_msm_handle_pwr_irq(host, 0);
 
/*
 * Ensure that above writes are propogated before interrupt enablement
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH v2 4/4] mmc: sdhci-msm: Add sdhci msm register write APIs which wait for pwr irq

2017-09-26 Thread Vijay Viswanath
Register writes which change voltage of IO lines or turn the IO bus
on/off require controller to be ready before progressing further. When
the controller is ready, it will generate a power irq which needs to be
handled. The thread which initiated the register write should wait for
power irq to complete. This will be done through the new sdhc msm write
APIs which will check whether the particular write can trigger a power
irq and wait for it with a timeout if it is expected.
The SDHC core power control IRQ gets triggered when -
* There is a state change in power control bit (bit 0)
  of SDHCI_POWER_CONTROL register.
* There is a state change in 1.8V enable bit (bit 3) of
  SDHCI_HOST_CONTROL2 register.
* Bit 1 of SDHCI_SOFTWARE_RESET is set.

Also add support APIs which are used by sdhc msm write APIs to check
if power irq is expected to be generated and wait for the power irq
to come and complete if the irq is expected.

This patch requires CONFIG_MMC_SDHCI_IO_ACCESSORS to be enabled.

Signed-off-by: Sahitya Tummala 
Signed-off-by: Vijay Viswanath 
---
 drivers/mmc/host/sdhci-msm.c | 173 ++-
 1 file changed, 171 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 42a65ab..b72a487 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -123,6 +123,10 @@
 #define CMUX_SHIFT_PHASE_MASK  (7 << CMUX_SHIFT_PHASE_SHIFT)
 
 #define MSM_MMC_AUTOSUSPEND_DELAY_MS   50
+
+/* Timeout value to avoid infinite waiting for pwr_irq */
+#define MSM_PWR_IRQ_TIMEOUT_MS 5000
+
 struct sdhci_msm_host {
struct platform_device *pdev;
void __iomem *core_mem; /* MSM SDCC mapped address */
@@ -138,6 +142,10 @@ struct sdhci_msm_host {
bool calibration_done;
u8 saved_tuning_phase;
bool use_cdclp533;
+   u32 curr_pwr_state;
+   u32 curr_io_level;
+   wait_queue_head_t pwr_irq_wait;
+   bool pwr_irq_flag;
 };
 
 static unsigned int msm_get_clock_rate_for_bus_mode(struct sdhci_host *host,
@@ -995,6 +1003,73 @@ static void sdhci_msm_set_uhs_signaling(struct sdhci_host 
*host,
sdhci_msm_hs400(host, >ios);
 }
 
+static inline void sdhci_msm_init_pwr_irq_wait(struct sdhci_msm_host *msm_host)
+{
+   init_waitqueue_head(_host->pwr_irq_wait);
+}
+
+static inline void sdhci_msm_complete_pwr_irq_wait(
+   struct sdhci_msm_host *msm_host)
+{
+   wake_up(_host->pwr_irq_wait);
+}
+
+/*
+ * sdhci_msm_check_power_status API should be called when registers writes
+ * which can toggle sdhci IO bus ON/OFF or change IO lines HIGH/LOW happens.
+ * To what state the register writes will change the IO lines should be passed
+ * as the argument req_type. This API will check whether the IO line's state
+ * is already the expected state and will wait for power irq only if
+ * power irq is expected to be trigerred based on the current IO line state
+ * and expected IO line state.
+ */
+static void sdhci_msm_check_power_status(struct sdhci_host *host, u32 req_type)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+   bool done = false;
+
+   pr_debug("%s: %s: request %d curr_pwr_state %x curr_io_level %x\n",
+   mmc_hostname(host->mmc), __func__, req_type,
+   msm_host->curr_pwr_state, msm_host->curr_io_level);
+
+   /*
+* The IRQ for request type IO High/LOW will be generated when -
+* there is a state change in 1.8V enable bit (bit 3) of
+* SDHCI_HOST_CONTROL2 register. The reset state of that bit is 0
+* which indicates 3.3V IO voltage. So, when MMC core layer tries
+* to set it to 3.3V before card detection happens, the
+* IRQ doesn't get triggered as there is no state change in this bit.
+* The driver already handles this case by changing the IO voltage
+* level to high as part of controller power up sequence. Hence, check
+* for host->pwr to handle a case where IO voltage high request is
+* issued even before controller power up.
+*/
+   if ((req_type & REQ_IO_HIGH) && !host->pwr) {
+   pr_debug("%s: do not wait for power IRQ that never comes, 
req_type: %d\n",
+   mmc_hostname(host->mmc), req_type);
+   return;
+   }
+   if ((req_type & msm_host->curr_pwr_state) ||
+   (req_type & msm_host->curr_io_level))
+   done = true;
+   /*
+* This is needed here to handle cases where register writes will
+* not change the current bus state or io level of the controller.
+* In this case, no power irq will be triggerred and we should
+* not wait.
+*/
+   if (!done) {
+   if (!wait_event_timeout(msm_host->pwr_irq_wait,
+

[PATCH v2 3/4] mmc: Kconfig: Enable CONFIG_MMC_SDHCI_IO_ACCESSORS

2017-09-26 Thread Vijay Viswanath
Enable CONFIG_MMC_SDHCI_IO_ACCESSORS so that SDHC controller specific
register read and write APIs, if registered, can be used.

Signed-off-by: Vijay Viswanath 
---
 drivers/mmc/host/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 2db84dd..11bfb82 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -420,6 +420,7 @@ config MMC_SDHCI_MSM
tristate "Qualcomm SDHCI Controller Support"
depends on ARCH_QCOM || (ARM && COMPILE_TEST)
depends on MMC_SDHCI_PLTFM
+   select MMC_SDHCI_IO_ACCESSORS
help
  This selects the Secure Digital Host Controller Interface (SDHCI)
  support present in Qualcomm SOCs. The controller supports
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH v2 1/4] mmc: sdhci-msm: fix issue with power irq

2017-09-26 Thread Vijay Viswanath
From: Subhash Jadavani 

SDCC controller reset (SW_RST) during probe may trigger power irq if
previous status of PWRCTL was either BUS_ON or IO_HIGH_V. So before we
enable the power irq interrupt in GIC (by registering the interrupt
handler), we need to ensure that any pending power irq interrupt status
is acknowledged otherwise power irq interrupt handler would be fired
prematurely.

Signed-off-by: Subhash Jadavani 
Signed-off-by: Vijay Viswanath 
---
 drivers/mmc/host/sdhci-msm.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 9d601dc..d636251 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -1250,6 +1250,21 @@ static int sdhci_msm_probe(struct platform_device *pdev)
   CORE_VENDOR_SPEC_CAPABILITIES0);
}
 
+   /*
+* Power on reset state may trigger power irq if previous status of
+* PWRCTL was either BUS_ON or IO_HIGH_V. So before enabling pwr irq
+* interrupt in GIC, any pending power irq interrupt should be
+* acknowledged. Otherwise power irq interrupt handler would be
+* fired prematurely.
+*/
+   sdhci_msm_voltage_switch(host);
+
+   /*
+* Ensure that above writes are propogated before interrupt enablement
+* in GIC.
+*/
+   mb();
+
/* Setup IRQ for handling power/voltage tasks with PMIC */
msm_host->pwr_irq = platform_get_irq_byname(pdev, "pwr_irq");
if (msm_host->pwr_irq < 0) {
@@ -1259,6 +1274,9 @@ static int sdhci_msm_probe(struct platform_device *pdev)
goto clk_disable;
}
 
+   /* Enable pwr irq interrupts */
+   writel_relaxed(INT_MASK, msm_host->core_mem + CORE_PWRCTL_MASK);
+
ret = devm_request_threaded_irq(>dev, msm_host->pwr_irq, NULL,
sdhci_msm_pwr_irq, IRQF_ONESHOT,
dev_name(>dev), host);
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH v2 4/4] mmc: sdhci-msm: Add sdhci msm register write APIs which wait for pwr irq

2017-09-26 Thread Vijay Viswanath
Register writes which change voltage of IO lines or turn the IO bus
on/off require controller to be ready before progressing further. When
the controller is ready, it will generate a power irq which needs to be
handled. The thread which initiated the register write should wait for
power irq to complete. This will be done through the new sdhc msm write
APIs which will check whether the particular write can trigger a power
irq and wait for it with a timeout if it is expected.
The SDHC core power control IRQ gets triggered when -
* There is a state change in power control bit (bit 0)
  of SDHCI_POWER_CONTROL register.
* There is a state change in 1.8V enable bit (bit 3) of
  SDHCI_HOST_CONTROL2 register.
* Bit 1 of SDHCI_SOFTWARE_RESET is set.

Also add support APIs which are used by sdhc msm write APIs to check
if power irq is expected to be generated and wait for the power irq
to come and complete if the irq is expected.

This patch requires CONFIG_MMC_SDHCI_IO_ACCESSORS to be enabled.

Signed-off-by: Sahitya Tummala 
Signed-off-by: Vijay Viswanath 
---
 drivers/mmc/host/sdhci-msm.c | 173 ++-
 1 file changed, 171 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 42a65ab..b72a487 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -123,6 +123,10 @@
 #define CMUX_SHIFT_PHASE_MASK  (7 << CMUX_SHIFT_PHASE_SHIFT)
 
 #define MSM_MMC_AUTOSUSPEND_DELAY_MS   50
+
+/* Timeout value to avoid infinite waiting for pwr_irq */
+#define MSM_PWR_IRQ_TIMEOUT_MS 5000
+
 struct sdhci_msm_host {
struct platform_device *pdev;
void __iomem *core_mem; /* MSM SDCC mapped address */
@@ -138,6 +142,10 @@ struct sdhci_msm_host {
bool calibration_done;
u8 saved_tuning_phase;
bool use_cdclp533;
+   u32 curr_pwr_state;
+   u32 curr_io_level;
+   wait_queue_head_t pwr_irq_wait;
+   bool pwr_irq_flag;
 };
 
 static unsigned int msm_get_clock_rate_for_bus_mode(struct sdhci_host *host,
@@ -995,6 +1003,73 @@ static void sdhci_msm_set_uhs_signaling(struct sdhci_host 
*host,
sdhci_msm_hs400(host, >ios);
 }
 
+static inline void sdhci_msm_init_pwr_irq_wait(struct sdhci_msm_host *msm_host)
+{
+   init_waitqueue_head(_host->pwr_irq_wait);
+}
+
+static inline void sdhci_msm_complete_pwr_irq_wait(
+   struct sdhci_msm_host *msm_host)
+{
+   wake_up(_host->pwr_irq_wait);
+}
+
+/*
+ * sdhci_msm_check_power_status API should be called when registers writes
+ * which can toggle sdhci IO bus ON/OFF or change IO lines HIGH/LOW happens.
+ * To what state the register writes will change the IO lines should be passed
+ * as the argument req_type. This API will check whether the IO line's state
+ * is already the expected state and will wait for power irq only if
+ * power irq is expected to be trigerred based on the current IO line state
+ * and expected IO line state.
+ */
+static void sdhci_msm_check_power_status(struct sdhci_host *host, u32 req_type)
+{
+   struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+   struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
+   bool done = false;
+
+   pr_debug("%s: %s: request %d curr_pwr_state %x curr_io_level %x\n",
+   mmc_hostname(host->mmc), __func__, req_type,
+   msm_host->curr_pwr_state, msm_host->curr_io_level);
+
+   /*
+* The IRQ for request type IO High/LOW will be generated when -
+* there is a state change in 1.8V enable bit (bit 3) of
+* SDHCI_HOST_CONTROL2 register. The reset state of that bit is 0
+* which indicates 3.3V IO voltage. So, when MMC core layer tries
+* to set it to 3.3V before card detection happens, the
+* IRQ doesn't get triggered as there is no state change in this bit.
+* The driver already handles this case by changing the IO voltage
+* level to high as part of controller power up sequence. Hence, check
+* for host->pwr to handle a case where IO voltage high request is
+* issued even before controller power up.
+*/
+   if ((req_type & REQ_IO_HIGH) && !host->pwr) {
+   pr_debug("%s: do not wait for power IRQ that never comes, 
req_type: %d\n",
+   mmc_hostname(host->mmc), req_type);
+   return;
+   }
+   if ((req_type & msm_host->curr_pwr_state) ||
+   (req_type & msm_host->curr_io_level))
+   done = true;
+   /*
+* This is needed here to handle cases where register writes will
+* not change the current bus state or io level of the controller.
+* In this case, no power irq will be triggerred and we should
+* not wait.
+*/
+   if (!done) {
+   if (!wait_event_timeout(msm_host->pwr_irq_wait,
+   msm_host->pwr_irq_flag,
+  

[PATCH v2 3/4] mmc: Kconfig: Enable CONFIG_MMC_SDHCI_IO_ACCESSORS

2017-09-26 Thread Vijay Viswanath
Enable CONFIG_MMC_SDHCI_IO_ACCESSORS so that SDHC controller specific
register read and write APIs, if registered, can be used.

Signed-off-by: Vijay Viswanath 
---
 drivers/mmc/host/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 2db84dd..11bfb82 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -420,6 +420,7 @@ config MMC_SDHCI_MSM
tristate "Qualcomm SDHCI Controller Support"
depends on ARCH_QCOM || (ARM && COMPILE_TEST)
depends on MMC_SDHCI_PLTFM
+   select MMC_SDHCI_IO_ACCESSORS
help
  This selects the Secure Digital Host Controller Interface (SDHCI)
  support present in Qualcomm SOCs. The controller supports
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH v2 1/4] mmc: sdhci-msm: fix issue with power irq

2017-09-26 Thread Vijay Viswanath
From: Subhash Jadavani 

SDCC controller reset (SW_RST) during probe may trigger power irq if
previous status of PWRCTL was either BUS_ON or IO_HIGH_V. So before we
enable the power irq interrupt in GIC (by registering the interrupt
handler), we need to ensure that any pending power irq interrupt status
is acknowledged otherwise power irq interrupt handler would be fired
prematurely.

Signed-off-by: Subhash Jadavani 
Signed-off-by: Vijay Viswanath 
---
 drivers/mmc/host/sdhci-msm.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 9d601dc..d636251 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -1250,6 +1250,21 @@ static int sdhci_msm_probe(struct platform_device *pdev)
   CORE_VENDOR_SPEC_CAPABILITIES0);
}
 
+   /*
+* Power on reset state may trigger power irq if previous status of
+* PWRCTL was either BUS_ON or IO_HIGH_V. So before enabling pwr irq
+* interrupt in GIC, any pending power irq interrupt should be
+* acknowledged. Otherwise power irq interrupt handler would be
+* fired prematurely.
+*/
+   sdhci_msm_voltage_switch(host);
+
+   /*
+* Ensure that above writes are propogated before interrupt enablement
+* in GIC.
+*/
+   mb();
+
/* Setup IRQ for handling power/voltage tasks with PMIC */
msm_host->pwr_irq = platform_get_irq_byname(pdev, "pwr_irq");
if (msm_host->pwr_irq < 0) {
@@ -1259,6 +1274,9 @@ static int sdhci_msm_probe(struct platform_device *pdev)
goto clk_disable;
}
 
+   /* Enable pwr irq interrupts */
+   writel_relaxed(INT_MASK, msm_host->core_mem + CORE_PWRCTL_MASK);
+
ret = devm_request_threaded_irq(>dev, msm_host->pwr_irq, NULL,
sdhci_msm_pwr_irq, IRQF_ONESHOT,
dev_name(>dev), host);
-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH v2 0/4] mmc: sdhci-msm: Corrections to implementation of power irq

2017-09-26 Thread Vijay Viswanath
Register writes which change voltage of IO lines or turn the IO bus on/off
require sdhc controller to be ready before progressing further. Once a
register write which affects IO lines is done, the driver should wait for
power irq from controller. Once the irq comes, the driver should acknowledge
the irq by writing to power control register. If the acknowledgement is not
given to controller, the controller may not complete the corresponding
register write action and this can mess up the controller if drivers proceeds
without power irq completing.

Changes since v1:
Patch enabling MMC_IO_ACCESSORS in Kconfig moved up the patch list.
Also corrected a mistake in the patch.
Removed all ifdef CONFIG_MMC_IO_ACCESSORS since the patches using it
will come after MMC_IO_ACCESSORS are enabled.
Merged the patches 3 & 4 of earlier series into 1 patch (patch 4 in
current series).
Corrected a mistake in a comment text in patch 3 of previous series. 

Changes since RFC:
wait_for_completion_timeout replaced with wait_event_timeout when
waiting for power irq.
Removed the spinlock within power irq handler and API which waits
for power irq.
Added comments to sdhci msm register write functions, warning that they
can sleep.
Sdhci msm register write functions will do a memory barrier before
writing to the register if the particular register can trigger
power irq.
Instead of enabling SDHCI IO ACCESSORS config in arm64/defconfig, it
will be selected in mmc/host/Kconfig if the platform is MMC_SDHCI_MSM.

Sahitya Tummala (1):
  mmc: sdhci-msm: Fix HW issue with power IRQ handling during reset

Subhash Jadavani (1):
  mmc: sdhci-msm: fix issue with power irq

Vijay Viswanath (2):
  mmc: Kconfig: Enable CONFIG_MMC_SDHCI_IO_ACCESSORS
  mmc: sdhci-msm: Add sdhci msm register write APIs which wait for pwr
irq

 drivers/mmc/host/Kconfig |   1 +
 drivers/mmc/host/sdhci-msm.c | 235 ++-
 2 files changed, 231 insertions(+), 5 deletions(-)

-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



[PATCH v2 0/4] mmc: sdhci-msm: Corrections to implementation of power irq

2017-09-26 Thread Vijay Viswanath
Register writes which change voltage of IO lines or turn the IO bus on/off
require sdhc controller to be ready before progressing further. Once a
register write which affects IO lines is done, the driver should wait for
power irq from controller. Once the irq comes, the driver should acknowledge
the irq by writing to power control register. If the acknowledgement is not
given to controller, the controller may not complete the corresponding
register write action and this can mess up the controller if drivers proceeds
without power irq completing.

Changes since v1:
Patch enabling MMC_IO_ACCESSORS in Kconfig moved up the patch list.
Also corrected a mistake in the patch.
Removed all ifdef CONFIG_MMC_IO_ACCESSORS since the patches using it
will come after MMC_IO_ACCESSORS are enabled.
Merged the patches 3 & 4 of earlier series into 1 patch (patch 4 in
current series).
Corrected a mistake in a comment text in patch 3 of previous series. 

Changes since RFC:
wait_for_completion_timeout replaced with wait_event_timeout when
waiting for power irq.
Removed the spinlock within power irq handler and API which waits
for power irq.
Added comments to sdhci msm register write functions, warning that they
can sleep.
Sdhci msm register write functions will do a memory barrier before
writing to the register if the particular register can trigger
power irq.
Instead of enabling SDHCI IO ACCESSORS config in arm64/defconfig, it
will be selected in mmc/host/Kconfig if the platform is MMC_SDHCI_MSM.

Sahitya Tummala (1):
  mmc: sdhci-msm: Fix HW issue with power IRQ handling during reset

Subhash Jadavani (1):
  mmc: sdhci-msm: fix issue with power irq

Vijay Viswanath (2):
  mmc: Kconfig: Enable CONFIG_MMC_SDHCI_IO_ACCESSORS
  mmc: sdhci-msm: Add sdhci msm register write APIs which wait for pwr
irq

 drivers/mmc/host/Kconfig |   1 +
 drivers/mmc/host/sdhci-msm.c | 235 ++-
 2 files changed, 231 insertions(+), 5 deletions(-)

-- 
 Qualcomm India Private Limited, on behalf of Qualcomm Innovation Center, Inc. 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project.



Re: [PATCH] net: stmmac: Meet alignment requirements for DMA

2017-09-26 Thread Paul Burton
Hi David,

On Tuesday, 26 September 2017 21:53:21 PDT David Miller wrote:
> From: Paul Burton 
> Date: Tue, 26 Sep 2017 21:30:56 -0700
> 
> > Nobody said that you are required to do anything, I suggested that
> > it would be beneficial if you were to suggest a change to the
> > documented DMA API such that it allows your usage where it currently
> > does not.
> 
> Documentation is often wrong and it is here.  What 200+ drivers
> actually do and depend upon trumps a simple text document.

Agreed - but if the documented API is wrong then it should change.

> The requirement is that the memory remains quiescent on the cpu side
> while the device messes with it.  And that this quiescence requirement
> may or may not be on a cache line basis.
> 
> There is absolutely no requirement that the buffers themselves are
> cache line aligned.
> 
> In fact, receive buffers for networking are intentionally 2-byte
> aligned in order for the ipv4 headers to be naturally 32-bit aligned.
> 
> Cache line aligning receive buffers will actually make some
> architectures trap because of the bad alignment.
> 
> So see, this cache line alignment requirement is pure madness from
> just about any perspective whatsoever.

Thank you - that's more constructive.

I understand that the network code doesn't suffer from a problem with using 
non-cacheline-aligned buffers, because you guarantee that the CPU will not be 
writing to anything on either side of the memory mapped for DMA up to at least 
a cache line boundary. That is all well & good (though still, I think that 
ought to be documented somewhere even if just by a comment somewhere in linux/
sk_buff.h).

There is still a problem though in other cases which do not provide such a 
guarantee - for example the MMC issue I pointed out previously - which it 
would be useful to be able to catch rather than allowing silent memory 
corruption which can be difficult to track down. Whilst the particular case of 
mapping a struct sk_buff's data for DMA might not trigger this issue the issue 
does still exist in other cases for which aligning data to a cache line 
boundary is not always "pure madness".

So whilst it sounds like you'd happily just change or remove the paragraph 
about cache-line alignment in Documentation/DMA-API.txt, and I agree that 
would be a good start, I wonder whether we could do something better. One 
option might be for the warning in the MIPS DMA code to be predicated on one 
of the cache lines only partially covered by a DMA mapping actually being 
dirty - though this would probably be a more expensive check than we'd want in 
the general case so might have to be conditional upon some new debug entry in 
Kconfig. I'll give this some thought.

Thanks,
Paul

signature.asc
Description: This is a digitally signed message part.


Re: [PATCH] net: stmmac: Meet alignment requirements for DMA

2017-09-26 Thread Paul Burton
Hi David,

On Tuesday, 26 September 2017 21:53:21 PDT David Miller wrote:
> From: Paul Burton 
> Date: Tue, 26 Sep 2017 21:30:56 -0700
> 
> > Nobody said that you are required to do anything, I suggested that
> > it would be beneficial if you were to suggest a change to the
> > documented DMA API such that it allows your usage where it currently
> > does not.
> 
> Documentation is often wrong and it is here.  What 200+ drivers
> actually do and depend upon trumps a simple text document.

Agreed - but if the documented API is wrong then it should change.

> The requirement is that the memory remains quiescent on the cpu side
> while the device messes with it.  And that this quiescence requirement
> may or may not be on a cache line basis.
> 
> There is absolutely no requirement that the buffers themselves are
> cache line aligned.
> 
> In fact, receive buffers for networking are intentionally 2-byte
> aligned in order for the ipv4 headers to be naturally 32-bit aligned.
> 
> Cache line aligning receive buffers will actually make some
> architectures trap because of the bad alignment.
> 
> So see, this cache line alignment requirement is pure madness from
> just about any perspective whatsoever.

Thank you - that's more constructive.

I understand that the network code doesn't suffer from a problem with using 
non-cacheline-aligned buffers, because you guarantee that the CPU will not be 
writing to anything on either side of the memory mapped for DMA up to at least 
a cache line boundary. That is all well & good (though still, I think that 
ought to be documented somewhere even if just by a comment somewhere in linux/
sk_buff.h).

There is still a problem though in other cases which do not provide such a 
guarantee - for example the MMC issue I pointed out previously - which it 
would be useful to be able to catch rather than allowing silent memory 
corruption which can be difficult to track down. Whilst the particular case of 
mapping a struct sk_buff's data for DMA might not trigger this issue the issue 
does still exist in other cases for which aligning data to a cache line 
boundary is not always "pure madness".

So whilst it sounds like you'd happily just change or remove the paragraph 
about cache-line alignment in Documentation/DMA-API.txt, and I agree that 
would be a good start, I wonder whether we could do something better. One 
option might be for the warning in the MIPS DMA code to be predicated on one 
of the cache lines only partially covered by a DMA mapping actually being 
dirty - though this would probably be a more expensive check than we'd want in 
the general case so might have to be conditional upon some new debug entry in 
Kconfig. I'll give this some thought.

Thanks,
Paul

signature.asc
Description: This is a digitally signed message part.


Re: [PATCH v2 5/5] media: atmel-isc: Rework the format list

2017-09-26 Thread Yang, Wenyou

Hi Hans,

Thank  you very much for your review.

On 2017/9/25 21:24, Hans Verkuil wrote:

Hi Wenyou,

On 18/09/17 08:39, Wenyou Yang wrote:

To improve the readability of code, split the format array into two,
one for the format description, other for the register configuration.
Meanwhile, add the flag member to indicate the format can be achieved
from the sensor or be produced by the controller, and rename members
related to the register configuration.

Also add more formats support: GREY, ARGB444, ARGB555 and ARGB32.

Signed-off-by: Wenyou Yang 

This looks better. Just a few comments, see below.


---

Changes in v2:
  - Add the new patch to remove the unnecessary member from
isc_subdev_entity struct.
  - Rebase on the patch set,
 [PATCH 0/6] [media] Atmel: Adjustments for seven function 
implementations
 https://www.mail-archive.com/linux-media@vger.kernel.org/msg118342.html

  drivers/media/platform/atmel/atmel-isc.c | 524 ---
  1 file changed, 405 insertions(+), 119 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc.c 
b/drivers/media/platform/atmel/atmel-isc.c
index 2d876903da71..90bd0b28a975 100644
--- a/drivers/media/platform/atmel/atmel-isc.c
+++ b/drivers/media/platform/atmel/atmel-isc.c
@@ -89,34 +89,56 @@ struct isc_subdev_entity {
struct list_head list;
  };
  
+#define FMT_FLAG_FROM_SENSOR		BIT(0)

+#define FMT_FLAG_FROM_CONTROLLER   BIT(1)

Document the meaning of these flags.

Will add it in next version.



+
  /*
   * struct isc_format - ISC media bus format information
   * @fourcc:   Fourcc code for this format
   * @mbus_code:V4L2 media bus format code.
+ * flags:  Indicate format from sensor or converted by controller
   * @bpp:  Bits per pixel (when stored in memory)
- * @reg_bps:   reg value for bits per sample
   *(when transferred over a bus)
- * @pipeline:  pipeline switch
   * @sd_support:   Subdev supports this format
   * @isc_support:  ISC can convert raw format to this format
   */
+
  struct isc_format {
u32 fourcc;
u32 mbus_code;
+   u32 flags;
u8  bpp;
  
-	u32	reg_bps;

-   u32 reg_bay_cfg;
-   u32 reg_rlp_mode;
-   u32 reg_dcfg_imode;
-   u32 reg_dctrl_dview;
-
-   u32 pipeline;
-
boolsd_support;
boolisc_support;
  };
  
+/* Pipeline bitmap */

+#define WB_ENABLE  BIT(0)
+#define CFA_ENABLE BIT(1)
+#define CC_ENABLE  BIT(2)
+#define GAM_ENABLE BIT(3)
+#define GAM_BENABLEBIT(4)
+#define GAM_GENABLEBIT(5)
+#define GAM_RENABLEBIT(6)
+#define CSC_ENABLE BIT(7)
+#define CBC_ENABLE BIT(8)
+#define SUB422_ENABLE  BIT(9)
+#define SUB420_ENABLE  BIT(10)
+
+#define GAM_ENABLES(GAM_RENABLE | GAM_GENABLE | GAM_BENABLE | GAM_ENABLE)
+
+struct fmt_config {
+   u32 fourcc;
+
+   u32 pfe_cfg0_bps;
+   u32 cfa_baycfg;
+   u32 rlp_cfg_mode;
+   u32 dcfg_imode;
+   u32 dctrl_dview;
+
+   u32 bits_pipeline;
+};
  
  #define HIST_ENTRIES		512

  #define HIST_BAYER(ISC_HIS_CFG_MODE_B + 1)
@@ -181,80 +203,321 @@ struct isc_device {
struct list_headsubdev_entities;
  };
  
-#define RAW_FMT_IND_START0

-#define RAW_FMT_IND_END  11
-#define ISC_FMT_IND_START12
-#define ISC_FMT_IND_END  14
-
-static struct isc_format isc_formats[] = {
-   { V4L2_PIX_FMT_SBGGR8, MEDIA_BUS_FMT_SBGGR8_1X8, 8,
- ISC_PFE_CFG0_BPS_EIGHT, ISC_BAY_CFG_BGBG, ISC_RLP_CFG_MODE_DAT8,
- ISC_DCFG_IMODE_PACKED8, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SGBRG8, MEDIA_BUS_FMT_SGBRG8_1X8, 8,
- ISC_PFE_CFG0_BPS_EIGHT, ISC_BAY_CFG_GBGB, ISC_RLP_CFG_MODE_DAT8,
- ISC_DCFG_IMODE_PACKED8, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SGRBG8, MEDIA_BUS_FMT_SGRBG8_1X8, 8,
- ISC_PFE_CFG0_BPS_EIGHT, ISC_BAY_CFG_GRGR, ISC_RLP_CFG_MODE_DAT8,
- ISC_DCFG_IMODE_PACKED8, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SRGGB8, MEDIA_BUS_FMT_SRGGB8_1X8, 8,
- ISC_PFE_CFG0_BPS_EIGHT, ISC_BAY_CFG_RGRG, ISC_RLP_CFG_MODE_DAT8,
- ISC_DCFG_IMODE_PACKED8, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-
-   { V4L2_PIX_FMT_SBGGR10, MEDIA_BUS_FMT_SBGGR10_1X10, 16,
- ISC_PFG_CFG0_BPS_TEN, ISC_BAY_CFG_BGBG, ISC_RLP_CFG_MODE_DAT10,
- ISC_DCFG_IMODE_PACKED16, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SGBRG10, MEDIA_BUS_FMT_SGBRG10_1X10, 16,
- ISC_PFG_CFG0_BPS_TEN, ISC_BAY_CFG_GBGB, ISC_RLP_CFG_MODE_DAT10,
- ISC_DCFG_IMODE_PACKED16, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SGRBG10, MEDIA_BUS_FMT_SGRBG10_1X10, 16,
- 

Re: [PATCH v2 5/5] media: atmel-isc: Rework the format list

2017-09-26 Thread Yang, Wenyou

Hi Hans,

Thank  you very much for your review.

On 2017/9/25 21:24, Hans Verkuil wrote:

Hi Wenyou,

On 18/09/17 08:39, Wenyou Yang wrote:

To improve the readability of code, split the format array into two,
one for the format description, other for the register configuration.
Meanwhile, add the flag member to indicate the format can be achieved
from the sensor or be produced by the controller, and rename members
related to the register configuration.

Also add more formats support: GREY, ARGB444, ARGB555 and ARGB32.

Signed-off-by: Wenyou Yang 

This looks better. Just a few comments, see below.


---

Changes in v2:
  - Add the new patch to remove the unnecessary member from
isc_subdev_entity struct.
  - Rebase on the patch set,
 [PATCH 0/6] [media] Atmel: Adjustments for seven function 
implementations
 https://www.mail-archive.com/linux-media@vger.kernel.org/msg118342.html

  drivers/media/platform/atmel/atmel-isc.c | 524 ---
  1 file changed, 405 insertions(+), 119 deletions(-)

diff --git a/drivers/media/platform/atmel/atmel-isc.c 
b/drivers/media/platform/atmel/atmel-isc.c
index 2d876903da71..90bd0b28a975 100644
--- a/drivers/media/platform/atmel/atmel-isc.c
+++ b/drivers/media/platform/atmel/atmel-isc.c
@@ -89,34 +89,56 @@ struct isc_subdev_entity {
struct list_head list;
  };
  
+#define FMT_FLAG_FROM_SENSOR		BIT(0)

+#define FMT_FLAG_FROM_CONTROLLER   BIT(1)

Document the meaning of these flags.

Will add it in next version.



+
  /*
   * struct isc_format - ISC media bus format information
   * @fourcc:   Fourcc code for this format
   * @mbus_code:V4L2 media bus format code.
+ * flags:  Indicate format from sensor or converted by controller
   * @bpp:  Bits per pixel (when stored in memory)
- * @reg_bps:   reg value for bits per sample
   *(when transferred over a bus)
- * @pipeline:  pipeline switch
   * @sd_support:   Subdev supports this format
   * @isc_support:  ISC can convert raw format to this format
   */
+
  struct isc_format {
u32 fourcc;
u32 mbus_code;
+   u32 flags;
u8  bpp;
  
-	u32	reg_bps;

-   u32 reg_bay_cfg;
-   u32 reg_rlp_mode;
-   u32 reg_dcfg_imode;
-   u32 reg_dctrl_dview;
-
-   u32 pipeline;
-
boolsd_support;
boolisc_support;
  };
  
+/* Pipeline bitmap */

+#define WB_ENABLE  BIT(0)
+#define CFA_ENABLE BIT(1)
+#define CC_ENABLE  BIT(2)
+#define GAM_ENABLE BIT(3)
+#define GAM_BENABLEBIT(4)
+#define GAM_GENABLEBIT(5)
+#define GAM_RENABLEBIT(6)
+#define CSC_ENABLE BIT(7)
+#define CBC_ENABLE BIT(8)
+#define SUB422_ENABLE  BIT(9)
+#define SUB420_ENABLE  BIT(10)
+
+#define GAM_ENABLES(GAM_RENABLE | GAM_GENABLE | GAM_BENABLE | GAM_ENABLE)
+
+struct fmt_config {
+   u32 fourcc;
+
+   u32 pfe_cfg0_bps;
+   u32 cfa_baycfg;
+   u32 rlp_cfg_mode;
+   u32 dcfg_imode;
+   u32 dctrl_dview;
+
+   u32 bits_pipeline;
+};
  
  #define HIST_ENTRIES		512

  #define HIST_BAYER(ISC_HIS_CFG_MODE_B + 1)
@@ -181,80 +203,321 @@ struct isc_device {
struct list_headsubdev_entities;
  };
  
-#define RAW_FMT_IND_START0

-#define RAW_FMT_IND_END  11
-#define ISC_FMT_IND_START12
-#define ISC_FMT_IND_END  14
-
-static struct isc_format isc_formats[] = {
-   { V4L2_PIX_FMT_SBGGR8, MEDIA_BUS_FMT_SBGGR8_1X8, 8,
- ISC_PFE_CFG0_BPS_EIGHT, ISC_BAY_CFG_BGBG, ISC_RLP_CFG_MODE_DAT8,
- ISC_DCFG_IMODE_PACKED8, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SGBRG8, MEDIA_BUS_FMT_SGBRG8_1X8, 8,
- ISC_PFE_CFG0_BPS_EIGHT, ISC_BAY_CFG_GBGB, ISC_RLP_CFG_MODE_DAT8,
- ISC_DCFG_IMODE_PACKED8, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SGRBG8, MEDIA_BUS_FMT_SGRBG8_1X8, 8,
- ISC_PFE_CFG0_BPS_EIGHT, ISC_BAY_CFG_GRGR, ISC_RLP_CFG_MODE_DAT8,
- ISC_DCFG_IMODE_PACKED8, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SRGGB8, MEDIA_BUS_FMT_SRGGB8_1X8, 8,
- ISC_PFE_CFG0_BPS_EIGHT, ISC_BAY_CFG_RGRG, ISC_RLP_CFG_MODE_DAT8,
- ISC_DCFG_IMODE_PACKED8, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-
-   { V4L2_PIX_FMT_SBGGR10, MEDIA_BUS_FMT_SBGGR10_1X10, 16,
- ISC_PFG_CFG0_BPS_TEN, ISC_BAY_CFG_BGBG, ISC_RLP_CFG_MODE_DAT10,
- ISC_DCFG_IMODE_PACKED16, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SGBRG10, MEDIA_BUS_FMT_SGBRG10_1X10, 16,
- ISC_PFG_CFG0_BPS_TEN, ISC_BAY_CFG_GBGB, ISC_RLP_CFG_MODE_DAT10,
- ISC_DCFG_IMODE_PACKED16, ISC_DCTRL_DVIEW_PACKED, 0x0,
- false, false },
-   { V4L2_PIX_FMT_SGRBG10, MEDIA_BUS_FMT_SGRBG10_1X10, 16,
- ISC_PFG_CFG0_BPS_TEN, 

[PATCH v2 2/2] pid: Remove pidhash

2017-09-26 Thread Gargi Sharma
pidhash is no longer required as all the information
can be looked up from idr tree. nr_hashed represented
the number of pids that had been hashed. Since, nr_hashed and
PIDNS_HASH_ADDING are no longer relevant, it has been renamed
to pid_allocated and PIDNS_ADDING respectively.

Signed-off-by: Gargi Sharma 
Reviewed-by: Rik van Riel 
---
 include/linux/init_task.h |  1 -
 include/linux/pid.h   |  2 --
 include/linux/pid_namespace.h |  4 ++--
 init/main.c   |  1 -
 kernel/fork.c |  2 +-
 kernel/pid.c  | 52 ---
 kernel/pid_namespace.c|  7 +++---
 7 files changed, 16 insertions(+), 53 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3c07ace..cc45798 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -104,7 +104,6 @@ extern struct group_info init_groups;
.numbers= { {   \
.nr = 0,\
.ns = _pid_ns, \
-   .pid_chain  = { .next = NULL, .pprev = NULL },  \
}, }\
 }
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 7195827..3915664 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -50,10 +50,8 @@ enum pid_type
  */
 
 struct upid {
-   /* Try to keep pid_chain in the same cacheline as nr for find_vpid */
int nr;
struct pid_namespace *ns;
-   struct hlist_node pid_chain;
 };
 
 struct pid
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index f4db4a7..7911b58 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -24,7 +24,7 @@ struct pid_namespace {
struct kref kref;
struct idr idr;
struct rcu_head rcu;
-   unsigned int nr_hashed;
+   unsigned int pid_allocated;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
unsigned int level;
@@ -48,7 +48,7 @@ struct pid_namespace {
 
 extern struct pid_namespace init_pid_ns;
 
-#define PIDNS_HASH_ADDING (1U << 31)
+#define PIDNS_ADDING (1U << 31)
 
 #ifdef CONFIG_PID_NS
 static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
diff --git a/init/main.c b/init/main.c
index 9f4db20..c17a21b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -562,7 +562,6 @@ asmlinkage __visible void __init start_kernel(void)
 * kmem_cache_init()
 */
setup_log_buf(0);
-   pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init();
diff --git a/kernel/fork.c b/kernel/fork.c
index 1064618..c3518b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1853,7 +1853,7 @@ static __latent_entropy struct task_struct *copy_process(
retval = -ERESTARTNOINTR;
goto bad_fork_cancel_cgroup;
}
-   if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+   if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
retval = -ENOMEM;
goto bad_fork_cancel_cgroup;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 207c49a..ae31f7d1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -41,10 +41,6 @@
 #include 
 #include 
 
-#define pid_hashfn(nr, ns) \
-   hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
-static struct hlist_head *pid_hash;
-static unsigned int pidhash_shift = 4;
 struct pid init_struct_pid = INIT_STRUCT_PID;
 
 int pid_max = PID_MAX_DEFAULT;
@@ -54,9 +50,6 @@ int pid_max = PID_MAX_DEFAULT;
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
 
-#define find_next_offset(map, off) \
-   find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
-
 /*
  * PID-map pages start out as NULL, they get allocated upon
  * first use and are never deallocated. This way a low pid_max
@@ -66,7 +59,7 @@ int pid_max_max = PID_MAX_LIMIT;
 struct pid_namespace init_pid_ns = {
.kref = KREF_INIT(2),
.idr = IDR_INIT,
-   .nr_hashed = PIDNS_HASH_ADDING,
+   .pid_allocated = PIDNS_ADDING,
.level = 0,
.child_reaper = _task,
.user_ns = _user_ns,
@@ -125,8 +118,7 @@ void free_pid(struct pid *pid)
for (i = 0; i <= pid->level; i++) {
struct upid *upid = pid->numbers + i;
struct pid_namespace *ns = upid->ns;
-   hlist_del_rcu(>pid_chain);
-   switch(--ns->nr_hashed) {
+   switch(--ns->pid_allocated) {
case 2:
case 1:
/* When all that is left in the pid namespace
@@ -135,10 +127,10 @@ void free_pid(struct pid *pid)
 */

[PATCH v2 2/2] pid: Remove pidhash

2017-09-26 Thread Gargi Sharma
pidhash is no longer required as all the information
can be looked up from idr tree. nr_hashed represented
the number of pids that had been hashed. Since, nr_hashed and
PIDNS_HASH_ADDING are no longer relevant, it has been renamed
to pid_allocated and PIDNS_ADDING respectively.

Signed-off-by: Gargi Sharma 
Reviewed-by: Rik van Riel 
---
 include/linux/init_task.h |  1 -
 include/linux/pid.h   |  2 --
 include/linux/pid_namespace.h |  4 ++--
 init/main.c   |  1 -
 kernel/fork.c |  2 +-
 kernel/pid.c  | 52 ---
 kernel/pid_namespace.c|  7 +++---
 7 files changed, 16 insertions(+), 53 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 3c07ace..cc45798 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -104,7 +104,6 @@ extern struct group_info init_groups;
.numbers= { {   \
.nr = 0,\
.ns = _pid_ns, \
-   .pid_chain  = { .next = NULL, .pprev = NULL },  \
}, }\
 }
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index 7195827..3915664 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -50,10 +50,8 @@ enum pid_type
  */
 
 struct upid {
-   /* Try to keep pid_chain in the same cacheline as nr for find_vpid */
int nr;
struct pid_namespace *ns;
-   struct hlist_node pid_chain;
 };
 
 struct pid
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index f4db4a7..7911b58 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -24,7 +24,7 @@ struct pid_namespace {
struct kref kref;
struct idr idr;
struct rcu_head rcu;
-   unsigned int nr_hashed;
+   unsigned int pid_allocated;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
unsigned int level;
@@ -48,7 +48,7 @@ struct pid_namespace {
 
 extern struct pid_namespace init_pid_ns;
 
-#define PIDNS_HASH_ADDING (1U << 31)
+#define PIDNS_ADDING (1U << 31)
 
 #ifdef CONFIG_PID_NS
 static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
diff --git a/init/main.c b/init/main.c
index 9f4db20..c17a21b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -562,7 +562,6 @@ asmlinkage __visible void __init start_kernel(void)
 * kmem_cache_init()
 */
setup_log_buf(0);
-   pidhash_init();
vfs_caches_init_early();
sort_main_extable();
trap_init();
diff --git a/kernel/fork.c b/kernel/fork.c
index 1064618..c3518b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1853,7 +1853,7 @@ static __latent_entropy struct task_struct *copy_process(
retval = -ERESTARTNOINTR;
goto bad_fork_cancel_cgroup;
}
-   if (unlikely(!(ns_of_pid(pid)->nr_hashed & PIDNS_HASH_ADDING))) {
+   if (unlikely(!(ns_of_pid(pid)->pid_allocated & PIDNS_ADDING))) {
retval = -ENOMEM;
goto bad_fork_cancel_cgroup;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index 207c49a..ae31f7d1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -41,10 +41,6 @@
 #include 
 #include 
 
-#define pid_hashfn(nr, ns) \
-   hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
-static struct hlist_head *pid_hash;
-static unsigned int pidhash_shift = 4;
 struct pid init_struct_pid = INIT_STRUCT_PID;
 
 int pid_max = PID_MAX_DEFAULT;
@@ -54,9 +50,6 @@ int pid_max = PID_MAX_DEFAULT;
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
 
-#define find_next_offset(map, off) \
-   find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
-
 /*
  * PID-map pages start out as NULL, they get allocated upon
  * first use and are never deallocated. This way a low pid_max
@@ -66,7 +59,7 @@ int pid_max_max = PID_MAX_LIMIT;
 struct pid_namespace init_pid_ns = {
.kref = KREF_INIT(2),
.idr = IDR_INIT,
-   .nr_hashed = PIDNS_HASH_ADDING,
+   .pid_allocated = PIDNS_ADDING,
.level = 0,
.child_reaper = _task,
.user_ns = _user_ns,
@@ -125,8 +118,7 @@ void free_pid(struct pid *pid)
for (i = 0; i <= pid->level; i++) {
struct upid *upid = pid->numbers + i;
struct pid_namespace *ns = upid->ns;
-   hlist_del_rcu(>pid_chain);
-   switch(--ns->nr_hashed) {
+   switch(--ns->pid_allocated) {
case 2:
case 1:
/* When all that is left in the pid namespace
@@ -135,10 +127,10 @@ void free_pid(struct pid *pid)
 */
wake_up_process(ns->child_reaper);
  

[PATCH v2 1/2] pid: Replace pid bitmap implementation with IDR API

2017-09-26 Thread Gargi Sharma
This patch replaces the current bitmap implemetation for
Process ID allocation. Functions that are no longer required,
for example, free_pidmap(), alloc_pidmap(), etc. are removed.
The rest of the functions are modified to use the IDR API.
The change was made to make the PID allocation less complex by
replacing custom code with calls to generic API.

Signed-off-by: Gargi Sharma 
---
 arch/powerpc/platforms/cell/spufs/sched.c |   2 +-
 fs/proc/loadavg.c |   2 +-
 include/linux/pid_namespace.h |  14 +--
 init/main.c   |   2 +-
 kernel/pid.c  | 193 +-
 kernel/pid_namespace.c|  47 +++-
 6 files changed, 55 insertions(+), 205 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c 
b/arch/powerpc/platforms/cell/spufs/sched.c
index 1fbb5da..d285aef 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void 
*private)
LOAD_INT(c), LOAD_FRAC(c),
count_active_contexts(),
atomic_read(_spu_contexts),
-   task_active_pid_ns(current)->last_pid);
+   task_active_pid_ns(current)->idr.idr_next-1);
return 0;
 }
 
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 983fce5..9355f4d 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -23,7 +23,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
nr_running(), nr_threads,
-   task_active_pid_ns(current)->last_pid);
+   task_active_pid_ns(current)->idr.idr_next-1);
return 0;
 }
 
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index b09136f..f4db4a7 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -9,15 +9,8 @@
 #include 
 #include 
 #include 
+#include 
 
-struct pidmap {
-   atomic_t nr_free;
-   void *page;
-};
-
-#define BITS_PER_PAGE  (PAGE_SIZE * 8)
-#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
-#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE)
 
 struct fs_pin;
 
@@ -29,9 +22,8 @@ enum { /* definitions for pid_namespace's hide_pid field */
 
 struct pid_namespace {
struct kref kref;
-   struct pidmap pidmap[PIDMAP_ENTRIES];
+   struct idr idr;
struct rcu_head rcu;
-   int last_pid;
unsigned int nr_hashed;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
@@ -105,6 +97,6 @@ static inline int reboot_pid_ns(struct pid_namespace 
*pid_ns, int cmd)
 
 extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
 void pidhash_init(void);
-void pidmap_init(void);
+void pid_idr_init(void);
 
 #endif /* _LINUX_PID_NS_H */
diff --git a/init/main.c b/init/main.c
index 0ee9c686..9f4db20 100644
--- a/init/main.c
+++ b/init/main.c
@@ -667,7 +667,7 @@ asmlinkage __visible void __init start_kernel(void)
if (late_time_init)
late_time_init();
calibrate_delay();
-   pidmap_init();
+   pid_idr_init();
anon_vma_init();
acpi_early_init();
 #ifdef CONFIG_X86
diff --git a/kernel/pid.c b/kernel/pid.c
index 020dedb..207c49a 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -53,12 +54,6 @@ int pid_max = PID_MAX_DEFAULT;
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
 
-static inline int mk_pid(struct pid_namespace *pid_ns,
-   struct pidmap *map, int off)
-{
-   return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
-}
-
 #define find_next_offset(map, off) \
find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
 
@@ -70,10 +65,7 @@ static inline int mk_pid(struct pid_namespace *pid_ns,
  */
 struct pid_namespace init_pid_ns = {
.kref = KREF_INIT(2),
-   .pidmap = {
-   [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
-   },
-   .last_pid = 0,
+   .idr = IDR_INIT,
.nr_hashed = PIDNS_HASH_ADDING,
.level = 0,
.child_reaper = _task,
@@ -101,138 +93,6 @@ EXPORT_SYMBOL_GPL(init_pid_ns);
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
-static void free_pidmap(struct upid *upid)
-{
-   int nr = upid->nr;
-   struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE;
-   int offset = nr & BITS_PER_PAGE_MASK;
-
-   clear_bit(offset, map->page);
-   atomic_inc(>nr_free);
-}
-
-/*
- * If we started walking pids at 'base', is 'a' seen before 'b'?
- 

[PATCH v2 1/2] pid: Replace pid bitmap implementation with IDR API

2017-09-26 Thread Gargi Sharma
This patch replaces the current bitmap implemetation for
Process ID allocation. Functions that are no longer required,
for example, free_pidmap(), alloc_pidmap(), etc. are removed.
The rest of the functions are modified to use the IDR API.
The change was made to make the PID allocation less complex by
replacing custom code with calls to generic API.

Signed-off-by: Gargi Sharma 
---
 arch/powerpc/platforms/cell/spufs/sched.c |   2 +-
 fs/proc/loadavg.c |   2 +-
 include/linux/pid_namespace.h |  14 +--
 init/main.c   |   2 +-
 kernel/pid.c  | 193 +-
 kernel/pid_namespace.c|  47 +++-
 6 files changed, 55 insertions(+), 205 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c 
b/arch/powerpc/platforms/cell/spufs/sched.c
index 1fbb5da..d285aef 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void 
*private)
LOAD_INT(c), LOAD_FRAC(c),
count_active_contexts(),
atomic_read(_spu_contexts),
-   task_active_pid_ns(current)->last_pid);
+   task_active_pid_ns(current)->idr.idr_next-1);
return 0;
 }
 
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index 983fce5..9355f4d 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -23,7 +23,7 @@ static int loadavg_proc_show(struct seq_file *m, void *v)
LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
nr_running(), nr_threads,
-   task_active_pid_ns(current)->last_pid);
+   task_active_pid_ns(current)->idr.idr_next-1);
return 0;
 }
 
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index b09136f..f4db4a7 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -9,15 +9,8 @@
 #include 
 #include 
 #include 
+#include 
 
-struct pidmap {
-   atomic_t nr_free;
-   void *page;
-};
-
-#define BITS_PER_PAGE  (PAGE_SIZE * 8)
-#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
-#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE)
 
 struct fs_pin;
 
@@ -29,9 +22,8 @@ enum { /* definitions for pid_namespace's hide_pid field */
 
 struct pid_namespace {
struct kref kref;
-   struct pidmap pidmap[PIDMAP_ENTRIES];
+   struct idr idr;
struct rcu_head rcu;
-   int last_pid;
unsigned int nr_hashed;
struct task_struct *child_reaper;
struct kmem_cache *pid_cachep;
@@ -105,6 +97,6 @@ static inline int reboot_pid_ns(struct pid_namespace 
*pid_ns, int cmd)
 
 extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk);
 void pidhash_init(void);
-void pidmap_init(void);
+void pid_idr_init(void);
 
 #endif /* _LINUX_PID_NS_H */
diff --git a/init/main.c b/init/main.c
index 0ee9c686..9f4db20 100644
--- a/init/main.c
+++ b/init/main.c
@@ -667,7 +667,7 @@ asmlinkage __visible void __init start_kernel(void)
if (late_time_init)
late_time_init();
calibrate_delay();
-   pidmap_init();
+   pid_idr_init();
anon_vma_init();
acpi_early_init();
 #ifdef CONFIG_X86
diff --git a/kernel/pid.c b/kernel/pid.c
index 020dedb..207c49a 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -39,6 +39,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -53,12 +54,6 @@ int pid_max = PID_MAX_DEFAULT;
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
 
-static inline int mk_pid(struct pid_namespace *pid_ns,
-   struct pidmap *map, int off)
-{
-   return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
-}
-
 #define find_next_offset(map, off) \
find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
 
@@ -70,10 +65,7 @@ static inline int mk_pid(struct pid_namespace *pid_ns,
  */
 struct pid_namespace init_pid_ns = {
.kref = KREF_INIT(2),
-   .pidmap = {
-   [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
-   },
-   .last_pid = 0,
+   .idr = IDR_INIT,
.nr_hashed = PIDNS_HASH_ADDING,
.level = 0,
.child_reaper = _task,
@@ -101,138 +93,6 @@ EXPORT_SYMBOL_GPL(init_pid_ns);
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
-static void free_pidmap(struct upid *upid)
-{
-   int nr = upid->nr;
-   struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE;
-   int offset = nr & BITS_PER_PAGE_MASK;
-
-   clear_bit(offset, map->page);
-   atomic_inc(>nr_free);
-}
-
-/*
- * If we started walking pids at 'base', is 'a' seen before 'b'?
- */
-static int 

[PATCH v2 0/2] Replace PID bitmap allocation with IDR API

2017-09-26 Thread Gargi Sharma
This patch series replaces kernel bitmap implementation of PID allocation
with IDR API. These patches are written to simplify the kernel by replacing 
custom code with calls to generic code.

The following are the stats for pid and pid_namespace object files
before and after the replacement. There is a noteworthy change between
the IDR and bitmap implementation.

Before
text   databssdechexfilename
   8447   3894 64  12405   3075kernel/pid.o
After
text   databssdechexfilename
   3301304  0   3605e15kernel/pid.o

Before
 text   databssdechexfilename
   5692   1842192   7726   1e2ekernel/pid_namespace.o
After
text   databssdechexfilename
   2870216 16   3102c1ekernel/pid_namespace.o

There wasn't a considerable difference between the time required for
allocation of PIDs to the processes.

---
Changes in v2:
- Removed redundant  IDR function that was introduced
  in the previous patchset.
- Renamed PIDNS_HASH_ADDING
- Used idr_for_each_entry_continue()
- Used idr_find() to lookup pids

Gargi Sharma (2):
  pid: Replace pid bitmap implementation with IDR API
  pid: Remove pidhash

 arch/powerpc/platforms/cell/spufs/sched.c |   2 +-
 fs/proc/loadavg.c |   2 +-
 include/linux/init_task.h |   1 -
 include/linux/pid.h   |   2 -
 include/linux/pid_namespace.h |  18 +--
 init/main.c   |   3 +-
 kernel/fork.c |   2 +-
 kernel/pid.c  | 239 +-
 kernel/pid_namespace.c|  54 +++
 9 files changed, 68 insertions(+), 255 deletions(-)

-- 
2.7.4



[PATCH v2 0/2] Replace PID bitmap allocation with IDR API

2017-09-26 Thread Gargi Sharma
This patch series replaces kernel bitmap implementation of PID allocation
with IDR API. These patches are written to simplify the kernel by replacing 
custom code with calls to generic code.

The following are the stats for pid and pid_namespace object files
before and after the replacement. There is a noteworthy change between
the IDR and bitmap implementation.

Before
text   databssdechexfilename
   8447   3894 64  12405   3075kernel/pid.o
After
text   databssdechexfilename
   3301304  0   3605e15kernel/pid.o

Before
 text   databssdechexfilename
   5692   1842192   7726   1e2ekernel/pid_namespace.o
After
text   databssdechexfilename
   2870216 16   3102c1ekernel/pid_namespace.o

There wasn't a considerable difference between the time required for
allocation of PIDs to the processes.

---
Changes in v2:
- Removed redundant  IDR function that was introduced
  in the previous patchset.
- Renamed PIDNS_HASH_ADDING
- Used idr_for_each_entry_continue()
- Used idr_find() to lookup pids

Gargi Sharma (2):
  pid: Replace pid bitmap implementation with IDR API
  pid: Remove pidhash

 arch/powerpc/platforms/cell/spufs/sched.c |   2 +-
 fs/proc/loadavg.c |   2 +-
 include/linux/init_task.h |   1 -
 include/linux/pid.h   |   2 -
 include/linux/pid_namespace.h |  18 +--
 init/main.c   |   3 +-
 kernel/fork.c |   2 +-
 kernel/pid.c  | 239 +-
 kernel/pid_namespace.c|  54 +++
 9 files changed, 68 insertions(+), 255 deletions(-)

-- 
2.7.4



Re: [PATCH] mm, swap: Make VMA based swap readahead configurable

2017-09-26 Thread Minchan Kim
On Tue, Sep 26, 2017 at 03:21:29PM +0200, Michal Hocko wrote:
> On Thu 21-09-17 09:33:10, Huang, Ying wrote:
> > From: Huang Ying 
> > 
> > This patch adds a new Kconfig option VMA_SWAP_READAHEAD and wraps VMA
> > based swap readahead code inside #ifdef CONFIG_VMA_SWAP_READAHEAD/#endif.
> > This is more friendly for tiny kernels.
> 
> How (much)?
> 
> > And as pointed to by Minchan
> > Kim, give people who want to disable the swap readahead an opportunity
> > to notice the changes to the swap readahead algorithm and the
> > corresponding knobs.
> 
> Why would anyone want that?
> 
> Please note that adding new config options make the already complicated
> config space even more problematic so there should be a good reason to
> add one. Please make sure your justification is clear on why this is
> worth the future maintenance and configurability burden.

The problem is users have disabled swap readahead by echo 0 > /proc/sys/
vm/page-cluster are regressed by this new interface /sys/kernel/mm/swap/
vma_ra_max_order. Because for disabling readahead completely, they should
disable vma_ra_max_order as well as page-cluster from now on.

So, goal of new config to notice new feature to admins so they can be aware
of new konb vma_ra_max_order as well as page-cluster.
I canont think other better idea to preventing such regression.

http://lkml.kernel.org/r/%3C20170913014019.GB29422@bbox%3E


Re: [PATCH] mm, swap: Make VMA based swap readahead configurable

2017-09-26 Thread Minchan Kim
On Tue, Sep 26, 2017 at 03:21:29PM +0200, Michal Hocko wrote:
> On Thu 21-09-17 09:33:10, Huang, Ying wrote:
> > From: Huang Ying 
> > 
> > This patch adds a new Kconfig option VMA_SWAP_READAHEAD and wraps VMA
> > based swap readahead code inside #ifdef CONFIG_VMA_SWAP_READAHEAD/#endif.
> > This is more friendly for tiny kernels.
> 
> How (much)?
> 
> > And as pointed to by Minchan
> > Kim, give people who want to disable the swap readahead an opportunity
> > to notice the changes to the swap readahead algorithm and the
> > corresponding knobs.
> 
> Why would anyone want that?
> 
> Please note that adding new config options make the already complicated
> config space even more problematic so there should be a good reason to
> add one. Please make sure your justification is clear on why this is
> worth the future maintenance and configurability burden.

The problem is users have disabled swap readahead by echo 0 > /proc/sys/
vm/page-cluster are regressed by this new interface /sys/kernel/mm/swap/
vma_ra_max_order. Because for disabling readahead completely, they should
disable vma_ra_max_order as well as page-cluster from now on.

So, goal of new config to notice new feature to admins so they can be aware
of new konb vma_ra_max_order as well as page-cluster.
I canont think other better idea to preventing such regression.

http://lkml.kernel.org/r/%3C20170913014019.GB29422@bbox%3E


Re: [RFC][PATCH v2 0/7] printk/ia64/ppc64/parisc64: let's deprecate %pF/%pf printk specifiers

2017-09-26 Thread Michael Ellerman
Sergey Senozhatsky  writes:

> On (09/22/17 16:48), Luck, Tony wrote:
> [..]
>> Tested patch series on ia64 successfully.
>> 
>> Tested-by: Tony Luck 
>
> thanks!
>
>> After this goes upstream, you should submit a patch to get rid of
>> all uses of %pF (70 instances in 35 files) and %pf (63 in 34)
>> 
>> Perhaps break the patch by top-level directory (e.g. get all the %pF
>> and %pF in the 17 files under drivers/ in one patch).
>
> frankly, I was going to have some sort of a lazy deprecation process:
> didn't plan to send out a patch set that would hunt down all pf/pF-s.
> hm...

That never works though, we have lots of cruft left over from times when
that's happened and the conversion never quite got finished.

At least if you send out the patches to do the removal they might
eventually get merged.

> speaking of upstream, any objections if this patch set will go through
> the printk tree, in one piece?

Do you mind putting it in a topic branch (based on rc2) and then merge
that into the printk tree? That way I can merge the topic branch iff
there are conflicts later down the line towards 4.15.

cheers


Re: [RFC][PATCH v2 0/7] printk/ia64/ppc64/parisc64: let's deprecate %pF/%pf printk specifiers

2017-09-26 Thread Michael Ellerman
Sergey Senozhatsky  writes:

> On (09/22/17 16:48), Luck, Tony wrote:
> [..]
>> Tested patch series on ia64 successfully.
>> 
>> Tested-by: Tony Luck 
>
> thanks!
>
>> After this goes upstream, you should submit a patch to get rid of
>> all uses of %pF (70 instances in 35 files) and %pf (63 in 34)
>> 
>> Perhaps break the patch by top-level directory (e.g. get all the %pF
>> and %pF in the 17 files under drivers/ in one patch).
>
> frankly, I was going to have some sort of a lazy deprecation process:
> didn't plan to send out a patch set that would hunt down all pf/pF-s.
> hm...

That never works though, we have lots of cruft left over from times when
that's happened and the conversion never quite got finished.

At least if you send out the patches to do the removal they might
eventually get merged.

> speaking of upstream, any objections if this patch set will go through
> the printk tree, in one piece?

Do you mind putting it in a topic branch (based on rc2) and then merge
that into the printk tree? That way I can merge the topic branch iff
there are conflicts later down the line towards 4.15.

cheers


Re: [PATCH] net: stmmac: Meet alignment requirements for DMA

2017-09-26 Thread David Miller
From: Paul Burton 
Date: Tue, 26 Sep 2017 21:30:56 -0700

> Nobody said that you are required to do anything, I suggested that
> it would be beneficial if you were to suggest a change to the
> documented DMA API such that it allows your usage where it currently
> does not.

Documentation is often wrong and it is here.  What 200+ drivers
actually do and depend upon trumps a simple text document.

The requirement is that the memory remains quiescent on the cpu side
while the device messes with it.  And that this quiescence requirement
may or may not be on a cache line basis.

There is absolutely no requirement that the buffers themselves are
cache line aligned.

In fact, receive buffers for networking are intentionally 2-byte
aligned in order for the ipv4 headers to be naturally 32-bit aligned.

Cache line aligning receive buffers will actually make some
architectures trap because of the bad alignment.

So see, this cache line alignment requirement is pure madness from
just about any perspective whatsoever.


Re: [PATCH] net: stmmac: Meet alignment requirements for DMA

2017-09-26 Thread David Miller
From: Paul Burton 
Date: Tue, 26 Sep 2017 21:30:56 -0700

> Nobody said that you are required to do anything, I suggested that
> it would be beneficial if you were to suggest a change to the
> documented DMA API such that it allows your usage where it currently
> does not.

Documentation is often wrong and it is here.  What 200+ drivers
actually do and depend upon trumps a simple text document.

The requirement is that the memory remains quiescent on the cpu side
while the device messes with it.  And that this quiescence requirement
may or may not be on a cache line basis.

There is absolutely no requirement that the buffers themselves are
cache line aligned.

In fact, receive buffers for networking are intentionally 2-byte
aligned in order for the ipv4 headers to be naturally 32-bit aligned.

Cache line aligning receive buffers will actually make some
architectures trap because of the bad alignment.

So see, this cache line alignment requirement is pure madness from
just about any perspective whatsoever.


Re: [PATCH v2 16/16] net: Add support for networking over Thunderbolt cable

2017-09-26 Thread David Miller
From: Mika Westerberg 
Date: Mon, 25 Sep 2017 14:07:38 +0300

> +struct thunderbolt_ip_header {
> + u32 route_hi;
> + u32 route_lo;
> + u32 length_sn;
> + uuid_t uuid;
> + uuid_t initiator_uuid;
> + uuid_t target_uuid;
> + u32 type;
> + u32 command_id;
> +} __packed;

Again, the __packed attribute should not be necessary and needs to be
removed.

> +static void tbnet_pull_tail(struct sk_buff *skb)
> +{
> + skb_frag_t *frag = _shinfo(skb)->frags[0];
> + unsigned int pull_len;
> + void *hdr;
> +
> + hdr = skb_frag_address(frag);
> + pull_len = eth_get_headlen(hdr, TBNET_RX_HDR_SIZE);
> +
> + /* Align pull length to size of long to optimize memcpy performance */
> + skb_copy_to_linear_data(skb, hdr, ALIGN(pull_len, sizeof(long)));

You do not need to copy here, instead you can build SKB's where the
skb->data points directly at the head of your first frag page memory.

See build_skb().

> + skb = net->skb;
> + if (!skb) {
> + skb = netdev_alloc_skb_ip_align(net->dev,
> + TBNET_RX_HDR_SIZE);
> + net->skb = skb;
> + }
> + if (!skb)
> + break;
> +
> + /* Single small buffer we can copy directly to the
> +  * header part of the skb.
> +  */
> + if (hdr->frame_count == 1 && frame_size <= TBNET_RX_HDR_SIZE) {

Here you would use build_skb() instead of netdev_alloc_skb*() for the first
frag, and keep the existing code tacking on subsequent frags using
skb_add_Rx_frag().

> + ret = register_netdev(dev);
> + if (ret) {
> + free_netdev(dev);
> + return ret;
> + }
> +
> + net->handler.uuid = _svc_uuid;
> + net->handler.callback = tbnet_handle_packet,
> + net->handler.data = net;
> + tb_register_protocol_handler(>handler);
> +
> + tb_service_set_drvdata(svc, net);

There could be races here.

At the exact moment you call register_netdev(), your device can be
brought UP, packets transmitted, etc.  You entire set of driver code
paths can be executed.

The rest of those initializations after register_netdev() probably
are needed by the rest of the driver to function properly, so may
need to happen before register_netdev() publishes the device to the
entire world.


Re: [PATCH v2 16/16] net: Add support for networking over Thunderbolt cable

2017-09-26 Thread David Miller
From: Mika Westerberg 
Date: Mon, 25 Sep 2017 14:07:38 +0300

> +struct thunderbolt_ip_header {
> + u32 route_hi;
> + u32 route_lo;
> + u32 length_sn;
> + uuid_t uuid;
> + uuid_t initiator_uuid;
> + uuid_t target_uuid;
> + u32 type;
> + u32 command_id;
> +} __packed;

Again, the __packed attribute should not be necessary and needs to be
removed.

> +static void tbnet_pull_tail(struct sk_buff *skb)
> +{
> + skb_frag_t *frag = _shinfo(skb)->frags[0];
> + unsigned int pull_len;
> + void *hdr;
> +
> + hdr = skb_frag_address(frag);
> + pull_len = eth_get_headlen(hdr, TBNET_RX_HDR_SIZE);
> +
> + /* Align pull length to size of long to optimize memcpy performance */
> + skb_copy_to_linear_data(skb, hdr, ALIGN(pull_len, sizeof(long)));

You do not need to copy here, instead you can build SKB's where the
skb->data points directly at the head of your first frag page memory.

See build_skb().

> + skb = net->skb;
> + if (!skb) {
> + skb = netdev_alloc_skb_ip_align(net->dev,
> + TBNET_RX_HDR_SIZE);
> + net->skb = skb;
> + }
> + if (!skb)
> + break;
> +
> + /* Single small buffer we can copy directly to the
> +  * header part of the skb.
> +  */
> + if (hdr->frame_count == 1 && frame_size <= TBNET_RX_HDR_SIZE) {

Here you would use build_skb() instead of netdev_alloc_skb*() for the first
frag, and keep the existing code tacking on subsequent frags using
skb_add_Rx_frag().

> + ret = register_netdev(dev);
> + if (ret) {
> + free_netdev(dev);
> + return ret;
> + }
> +
> + net->handler.uuid = _svc_uuid;
> + net->handler.callback = tbnet_handle_packet,
> + net->handler.data = net;
> + tb_register_protocol_handler(>handler);
> +
> + tb_service_set_drvdata(svc, net);

There could be races here.

At the exact moment you call register_netdev(), your device can be
brought UP, packets transmitted, etc.  You entire set of driver code
paths can be executed.

The rest of those initializations after register_netdev() probably
are needed by the rest of the driver to function properly, so may
need to happen before register_netdev() publishes the device to the
entire world.


Re: [PATCH v2 02/16] thunderbolt: Add support for XDomain properties

2017-09-26 Thread David Miller
From: Mika Westerberg 
Date: Mon, 25 Sep 2017 14:07:24 +0300

> +struct tb_property_entry {
> + u32 key_hi;
> + u32 key_lo;
> + u16 length;
> + u8 reserved;
> + u8 type;
> + u32 value;
> +} __packed;
> +
> +struct tb_property_rootdir_entry {
> + u32 magic;
> + u32 length;
> + struct tb_property_entry entries[];
> +} __packed;
> +
> +struct tb_property_dir_entry {
> + u32 uuid[4];
> + struct tb_property_entry entries[];
> +} __packed;

There is no apparent need for __packed here, and __packed should be
avoided unless absolutely necessary as it pessimizes the code
significantly on some architectures.

Please remove __packed from these datastructures unless you can
prove it is absolutely needed and, in such case, please document
in a comment why that requirement exists.  Because from the layout
of these types, everything will be packed in just fine without
__packed.

Thank you.


Re: [PATCH v2 02/16] thunderbolt: Add support for XDomain properties

2017-09-26 Thread David Miller
From: Mika Westerberg 
Date: Mon, 25 Sep 2017 14:07:24 +0300

> +struct tb_property_entry {
> + u32 key_hi;
> + u32 key_lo;
> + u16 length;
> + u8 reserved;
> + u8 type;
> + u32 value;
> +} __packed;
> +
> +struct tb_property_rootdir_entry {
> + u32 magic;
> + u32 length;
> + struct tb_property_entry entries[];
> +} __packed;
> +
> +struct tb_property_dir_entry {
> + u32 uuid[4];
> + struct tb_property_entry entries[];
> +} __packed;

There is no apparent need for __packed here, and __packed should be
avoided unless absolutely necessary as it pessimizes the code
significantly on some architectures.

Please remove __packed from these datastructures unless you can
prove it is absolutely needed and, in such case, please document
in a comment why that requirement exists.  Because from the layout
of these types, everything will be packed in just fine without
__packed.

Thank you.


Re: [PATCH v2 06/16] thunderbolt: Add support for XDomain discovery protocol

2017-09-26 Thread David Miller
From: Mika Westerberg 
Date: Mon, 25 Sep 2017 14:07:28 +0300

> +struct icm_fr_event_xdomain_connected {
> + struct icm_pkg_header hdr;
> + u16 reserved;
> + u16 link_info;
> + uuid_t remote_uuid;
> + uuid_t local_uuid;
> + u32 local_route_hi;
> + u32 local_route_lo;
> + u32 remote_route_hi;
> + u32 remote_route_lo;
> +} __packed;
> +
> +struct icm_fr_event_xdomain_disconnected {
> + struct icm_pkg_header hdr;
> + u16 reserved;
> + u16 link_info;
> + uuid_t remote_uuid;
> +} __packed;
> +
>  struct icm_fr_pkg_add_device_key {
>   struct icm_pkg_header hdr;
>   uuid_t ep_uuid;

Again, __packed should be avoided unless absolutely necessary.

Thank you.


Re: [PATCH v2 06/16] thunderbolt: Add support for XDomain discovery protocol

2017-09-26 Thread David Miller
From: Mika Westerberg 
Date: Mon, 25 Sep 2017 14:07:28 +0300

> +struct icm_fr_event_xdomain_connected {
> + struct icm_pkg_header hdr;
> + u16 reserved;
> + u16 link_info;
> + uuid_t remote_uuid;
> + uuid_t local_uuid;
> + u32 local_route_hi;
> + u32 local_route_lo;
> + u32 remote_route_hi;
> + u32 remote_route_lo;
> +} __packed;
> +
> +struct icm_fr_event_xdomain_disconnected {
> + struct icm_pkg_header hdr;
> + u16 reserved;
> + u16 link_info;
> + uuid_t remote_uuid;
> +} __packed;
> +
>  struct icm_fr_pkg_add_device_key {
>   struct icm_pkg_header hdr;
>   uuid_t ep_uuid;

Again, __packed should be avoided unless absolutely necessary.

Thank you.


Re: [PATCH] net: stmmac: Meet alignment requirements for DMA

2017-09-26 Thread Paul Burton
Hi David,

On Tuesday, 26 September 2017 19:52:44 PDT David Miller wrote:
> From: Paul Burton 
> Date: Tue, 26 Sep 2017 14:30:33 -0700
> 
> > I'd suggest that at a minimum if you're unwilling to obey the API as
> > described in Documentation/DMA-API.txt then it would be beneficial
> > if you could propose a change to it such that it works for you, and
> > perhaps we can extend the API & its documentation to allow your
> > usage whilst also allowing us to catch broken uses.
> 
> The networking driver code works fine as is.
> 
> I also didn't write that ill-advised documentation in the DMA docs,
> nor the non-merged new MIPS assertion.
> 
> So I'm trying to figure out on what basis I am required to do
> anything.
> 
> Thank you.

Nobody said you wrote the documentation, but you do maintain code which 
disobeys the documented DMA API & now you're being an ass about it 
unnecessarily.

Nobody said that you are required to do anything, I suggested that it would be 
beneficial if you were to suggest a change to the documented DMA API such that 
it allows your usage where it currently does not. If you don't want to have 
any input into that, and you actually think that your current approach of 
ignoring the documented API is the best path forwards, then we're probably 
done here & I'll be making a note to avoid yourself & anything under net/ to 
whatever extent is possible...

Thanks,
Paul

signature.asc
Description: This is a digitally signed message part.


Re: [PATCH] net: stmmac: Meet alignment requirements for DMA

2017-09-26 Thread Paul Burton
Hi David,

On Tuesday, 26 September 2017 19:52:44 PDT David Miller wrote:
> From: Paul Burton 
> Date: Tue, 26 Sep 2017 14:30:33 -0700
> 
> > I'd suggest that at a minimum if you're unwilling to obey the API as
> > described in Documentation/DMA-API.txt then it would be beneficial
> > if you could propose a change to it such that it works for you, and
> > perhaps we can extend the API & its documentation to allow your
> > usage whilst also allowing us to catch broken uses.
> 
> The networking driver code works fine as is.
> 
> I also didn't write that ill-advised documentation in the DMA docs,
> nor the non-merged new MIPS assertion.
> 
> So I'm trying to figure out on what basis I am required to do
> anything.
> 
> Thank you.

Nobody said you wrote the documentation, but you do maintain code which 
disobeys the documented DMA API & now you're being an ass about it 
unnecessarily.

Nobody said that you are required to do anything, I suggested that it would be 
beneficial if you were to suggest a change to the documented DMA API such that 
it allows your usage where it currently does not. If you don't want to have 
any input into that, and you actually think that your current approach of 
ignoring the documented API is the best path forwards, then we're probably 
done here & I'll be making a note to avoid yourself & anything under net/ to 
whatever extent is possible...

Thanks,
Paul

signature.asc
Description: This is a digitally signed message part.


Re: [PATCH v8 12/28] x86/insn-eval: Add utility functions to get segment selector

2017-09-26 Thread Ricardo Neri
On Tue, 2017-09-26 at 12:43 +0200, Borislav Petkov wrote:
> Hi,
> 
> On Fri, Aug 18, 2017 at 05:27:53PM -0700, Ricardo Neri wrote:
> > 
> > When computing a linear address and segmentation is used, we need
> > to know
> > the base address of the segment involved in the computation. In
> > most of
> > the cases, the segment base address will be zero as in
> > USER_DS/USER32_DS.
> ...
> 
> > 
> >  arch/x86/include/asm/inat.h |  10 ++
> >  arch/x86/lib/insn-eval.c| 278
> > 
> >  2 files changed, 288 insertions(+)
> so I did a bunch of simplifications on top, see if you agree:
> 
> * we should always test for if (!insn) first because otherwise we
> can't talk
> about a segment at all.

This is true except when we don't have an insn at all (well, it may be
non-NULL but it will only contain garbage). The case to which I am
referring is when we begin decoding our instruction. The first step is
to copy_from_user the instruction and populate insn. For this we must
calculate the linear address from where we copy using CS and rIP.

Furthermore, in this only case we don't need to look at insn at all as
the only register involved is rIP no segment override prefixes are
allowed.

Please see my comment below.

> 
> * the nomenclature should be clear: if we return INAT_SEG_REG_* those
> are own
> defined indices and not registers or prefixes or whatever else, so
> everywhere we
> state that we're returning an *index*.

I agree.

> 
> * and then shorten local variables' names as reading "reg" every
> other line doesn't make it clearer :)

I agree.
> 
> * also some comments formatting for better readability.

Thanks!
> 
> * and prefixing register names with "r" in the comments means then
> all
> register widths, not only 32-bit. Dunno, is "(E)" SDM nomenclature
> for
> the different register widths?

A quick look at the section 3.1.1.3 of the Intel Software Development
Manual Vol2 reveals that r/m16 operands are referred as [ACDB]X,
[SB]P,and [SD]I. r/m32 operands are referred as E[ACDB]X, E[SB]P and
E[SD]I. r/m64 operands are referred as R[ACDB]X, R[SB]P, R[SD]I and
R[8-15].

Also, some instructions (e.g., string structions) do use the
nomenclature (E)[DI]I protected mode and (R|E)[DI]I for long mode.

I only used "(E)" (i.e., not the "(R|)" part) as these utility
functions will deal mostly with protected mode, unless FS or GS are
used in long mode.

> 
> ---
> diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
> index 86f58ce6c302..720529573d72 100644
> --- a/arch/x86/lib/insn-eval.c
> +++ b/arch/x86/lib/insn-eval.c
> @@ -44,50 +44,45 @@ static bool is_string_insn(struct insn *insn)
>  }
>  
>  /**
> - * get_overridden_seg_reg() - obtain segment register to use from
> prefixes
> - * @insn:Instruction structure with segment override
> prefixes
> - * @regs:Structure with register values as seen when
> entering kernel mode
> + * get_seg_reg_idx() - obtain segment register index to use from
> prefixes
> + * @insn:Instruction with segment override prefixes
> + * @regs:Register values as seen when entering kernel mode
>   * @regoff:  Operand offset, in pt_regs, used to deterimine
> segment register
>   *
> - * The segment register to which an effective address refers depends
> on
> - * a) whether running in long mode (in such a case semgment override
> prefixes
> - * are ignored. b) Whether segment override prefixes must be ignored
> for certain
> - * registers: always use CS when the register is (R|E)IP; always use
> ES when
> - * operand register is (E)DI with a string instruction as defined in
> the Intel
> - * documentation. c) If segment overrides prefixes are found in the
> instruction
> - * prefixes. d) Use the default segment register associated with the
> operand
> - * register.
> + * The segment register to which an effective address refers,
> depends on:
> + *
> + * a) whether running in long mode (in such a case segment override
> prefixes
> + * are ignored).
> + *
> + * b) Whether segment override prefixes must be ignored for certain
> + * registers: always use CS when the register is rIP; always use ES
> when
> + * operand register is rDI with a string instruction as defined in
> the Intel
> + * documentation.
>   *
> - * This function returns the overridden segment register to use, if
> any, as per
> - * the conditions described above. Please note that this function
> + * c) If segment overrides prefixes are found in the instruction
> prefixes.
> + *
> + * d) Use the default segment register associated with the operand
> register.
> + *
> + * This function returns the segment register override to use, if
> any,
> + * as per the conditions described above. Please note that this
> function
>   * does not return the value in the segment register (i.e., the
> segment
> - * selector). The segment selector needs to be obtained using
> - * get_segment_selector() and passing the segment register resolved
> by
> + * selector) but our defined 

Re: [PATCH v8 12/28] x86/insn-eval: Add utility functions to get segment selector

2017-09-26 Thread Ricardo Neri
On Tue, 2017-09-26 at 12:43 +0200, Borislav Petkov wrote:
> Hi,
> 
> On Fri, Aug 18, 2017 at 05:27:53PM -0700, Ricardo Neri wrote:
> > 
> > When computing a linear address and segmentation is used, we need
> > to know
> > the base address of the segment involved in the computation. In
> > most of
> > the cases, the segment base address will be zero as in
> > USER_DS/USER32_DS.
> ...
> 
> > 
> >  arch/x86/include/asm/inat.h |  10 ++
> >  arch/x86/lib/insn-eval.c| 278
> > 
> >  2 files changed, 288 insertions(+)
> so I did a bunch of simplifications on top, see if you agree:
> 
> * we should always test for if (!insn) first because otherwise we
> can't talk
> about a segment at all.

This is true except when we don't have an insn at all (well, it may be
non-NULL but it will only contain garbage). The case to which I am
referring is when we begin decoding our instruction. The first step is
to copy_from_user the instruction and populate insn. For this we must
calculate the linear address from where we copy using CS and rIP.

Furthermore, in this only case we don't need to look at insn at all as
the only register involved is rIP no segment override prefixes are
allowed.

Please see my comment below.

> 
> * the nomenclature should be clear: if we return INAT_SEG_REG_* those
> are own
> defined indices and not registers or prefixes or whatever else, so
> everywhere we
> state that we're returning an *index*.

I agree.

> 
> * and then shorten local variables' names as reading "reg" every
> other line doesn't make it clearer :)

I agree.
> 
> * also some comments formatting for better readability.

Thanks!
> 
> * and prefixing register names with "r" in the comments means then
> all
> register widths, not only 32-bit. Dunno, is "(E)" SDM nomenclature
> for
> the different register widths?

A quick look at the section 3.1.1.3 of the Intel Software Development
Manual Vol2 reveals that r/m16 operands are referred as [ACDB]X,
[SB]P,and [SD]I. r/m32 operands are referred as E[ACDB]X, E[SB]P and
E[SD]I. r/m64 operands are referred as R[ACDB]X, R[SB]P, R[SD]I and
R[8-15].

Also, some instructions (e.g., string structions) do use the
nomenclature (E)[DI]I protected mode and (R|E)[DI]I for long mode.

I only used "(E)" (i.e., not the "(R|)" part) as these utility
functions will deal mostly with protected mode, unless FS or GS are
used in long mode.

> 
> ---
> diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
> index 86f58ce6c302..720529573d72 100644
> --- a/arch/x86/lib/insn-eval.c
> +++ b/arch/x86/lib/insn-eval.c
> @@ -44,50 +44,45 @@ static bool is_string_insn(struct insn *insn)
>  }
>  
>  /**
> - * get_overridden_seg_reg() - obtain segment register to use from
> prefixes
> - * @insn:Instruction structure with segment override
> prefixes
> - * @regs:Structure with register values as seen when
> entering kernel mode
> + * get_seg_reg_idx() - obtain segment register index to use from
> prefixes
> + * @insn:Instruction with segment override prefixes
> + * @regs:Register values as seen when entering kernel mode
>   * @regoff:  Operand offset, in pt_regs, used to deterimine
> segment register
>   *
> - * The segment register to which an effective address refers depends
> on
> - * a) whether running in long mode (in such a case semgment override
> prefixes
> - * are ignored. b) Whether segment override prefixes must be ignored
> for certain
> - * registers: always use CS when the register is (R|E)IP; always use
> ES when
> - * operand register is (E)DI with a string instruction as defined in
> the Intel
> - * documentation. c) If segment overrides prefixes are found in the
> instruction
> - * prefixes. d) Use the default segment register associated with the
> operand
> - * register.
> + * The segment register to which an effective address refers,
> depends on:
> + *
> + * a) whether running in long mode (in such a case segment override
> prefixes
> + * are ignored).
> + *
> + * b) Whether segment override prefixes must be ignored for certain
> + * registers: always use CS when the register is rIP; always use ES
> when
> + * operand register is rDI with a string instruction as defined in
> the Intel
> + * documentation.
>   *
> - * This function returns the overridden segment register to use, if
> any, as per
> - * the conditions described above. Please note that this function
> + * c) If segment overrides prefixes are found in the instruction
> prefixes.
> + *
> + * d) Use the default segment register associated with the operand
> register.
> + *
> + * This function returns the segment register override to use, if
> any,
> + * as per the conditions described above. Please note that this
> function
>   * does not return the value in the segment register (i.e., the
> segment
> - * selector). The segment selector needs to be obtained using
> - * get_segment_selector() and passing the segment register resolved
> by
> + * selector) but our defined 

Re: [PATCH net] net/ncsi: Don't assume last available channel exists

2017-09-26 Thread Samuel Mendoza-Jonas
On Thu, 2017-09-21 at 18:11 -0700, David Miller wrote:
> From: Samuel Mendoza-Jonas 
> Date: Fri, 22 Sep 2017 11:00:00 +1000
> 
> > If we haven't configured a channel yet (or are in the process of doing
> > so) we won't have a hot_channel - does it make more sense to
> > - check against the hot_channel as currently done,
> > - only check the filter size at configure time for /each/ channel,
> > - only conditionally enable the .ndo_vlan_rx_add_vid net_device callback
> > once we've configured a channel (eg. for ftgmac100 in the
> > ftgmac100_ncsi_handler() callback?)
> 
> The last isn't so feasible.
> 
> The device shouldn't be marked attached until a channel is available,
> because it seems like communication cannot occur until one is.  Right?

Yes that's right.

> 
> You could experiment with netif_device_detach()/netif_device_attach().
> 
> When the device is in the detached state, callbacks such as
> ->ndo_vlan_rx_add_vid() will not be invoked.

This looked like the way at first, but _detach() ceases any tx/rx on the
interface right?
NCSI still needs the interface to be active since the 'channels' are on a
separate network controller that the interface is connected to, eg on the
machines I'm using:

BMC  'Host' network controller
--   
|ftgmac100 interface | < NCSI Link > | BCM5719 interface| --> 
external interface
--   

Looking at the NCSI init path I believe we're guaranteed to have an ndp
struct by the time ndo_vlan_rx_add_vid() is called, making some of those
checks overly cautious. It might be easiest to just track new vids as we
see them (up to the NCSI spec limit), and then deal with configured
channels on a case by case basis since their limits can be different.
I'll work on a V2 but hopefully I haven't misinterpreted
_detach()/_attach() :)

Sam


Re: [PATCH net] net/ncsi: Don't assume last available channel exists

2017-09-26 Thread Samuel Mendoza-Jonas
On Thu, 2017-09-21 at 18:11 -0700, David Miller wrote:
> From: Samuel Mendoza-Jonas 
> Date: Fri, 22 Sep 2017 11:00:00 +1000
> 
> > If we haven't configured a channel yet (or are in the process of doing
> > so) we won't have a hot_channel - does it make more sense to
> > - check against the hot_channel as currently done,
> > - only check the filter size at configure time for /each/ channel,
> > - only conditionally enable the .ndo_vlan_rx_add_vid net_device callback
> > once we've configured a channel (eg. for ftgmac100 in the
> > ftgmac100_ncsi_handler() callback?)
> 
> The last isn't so feasible.
> 
> The device shouldn't be marked attached until a channel is available,
> because it seems like communication cannot occur until one is.  Right?

Yes that's right.

> 
> You could experiment with netif_device_detach()/netif_device_attach().
> 
> When the device is in the detached state, callbacks such as
> ->ndo_vlan_rx_add_vid() will not be invoked.

This looked like the way at first, but _detach() ceases any tx/rx on the
interface right?
NCSI still needs the interface to be active since the 'channels' are on a
separate network controller that the interface is connected to, eg on the
machines I'm using:

BMC  'Host' network controller
--   
|ftgmac100 interface | < NCSI Link > | BCM5719 interface| --> 
external interface
--   

Looking at the NCSI init path I believe we're guaranteed to have an ndp
struct by the time ndo_vlan_rx_add_vid() is called, making some of those
checks overly cautious. It might be easiest to just track new vids as we
see them (up to the NCSI spec limit), and then deal with configured
channels on a case by case basis since their limits can be different.
I'll work on a V2 but hopefully I haven't misinterpreted
_detach()/_attach() :)

Sam


[PATCH v2] mm: update comments for struct page.mapping

2017-09-26 Thread changbin . du
From: Changbin Du 

The struct page.mapping can NULL or points to one object of type
address_space, anon_vma or KSM private structure.

Signed-off-by: Changbin Du 
---
v2: add back flag reference.

---
 include/linux/mm_types.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 46f4ecf5..6d4c51a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -47,8 +47,10 @@ struct page {
 * inode address_space, or NULL.
 * If page mapped as anonymous
 * memory, low bit is set, and
-* it points to anon_vma object:
-* see PAGE_MAPPING_ANON below.
+* it points to anon_vma object
+* or KSM private structure. See
+* PAGE_MAPPING_ANON and
+* PAGE_MAPPING_KSM.
 */
void *s_mem;/* slab first object */
atomic_t compound_mapcount; /* first tail page */
-- 
2.7.4



[PATCH v2] mm: update comments for struct page.mapping

2017-09-26 Thread changbin . du
From: Changbin Du 

The struct page.mapping can NULL or points to one object of type
address_space, anon_vma or KSM private structure.

Signed-off-by: Changbin Du 
---
v2: add back flag reference.

---
 include/linux/mm_types.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 46f4ecf5..6d4c51a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -47,8 +47,10 @@ struct page {
 * inode address_space, or NULL.
 * If page mapped as anonymous
 * memory, low bit is set, and
-* it points to anon_vma object:
-* see PAGE_MAPPING_ANON below.
+* it points to anon_vma object
+* or KSM private structure. See
+* PAGE_MAPPING_ANON and
+* PAGE_MAPPING_KSM.
 */
void *s_mem;/* slab first object */
atomic_t compound_mapcount; /* first tail page */
-- 
2.7.4



Re: [PATCH] mm: update comments for struct page.mapping

2017-09-26 Thread Du, Changbin
On Tue, Sep 26, 2017 at 04:30:27PM -0700, Andrew Morton wrote:
> On Tue, 26 Sep 2017 15:14:17 +0800 changbin...@intel.com wrote:
> 
> > From: Changbin Du 
> > 
> > The struct page.mapping can NULL or points to one object of type
> > address_space, anon_vma or KSM private structure.
> > 
> > ...
> >
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -47,8 +47,8 @@ struct page {
> >  * inode address_space, or NULL.
> >  * If page mapped as anonymous
> >  * memory, low bit is set, and
> > -* it points to anon_vma object:
> > -* see PAGE_MAPPING_ANON below.
> > +* it points to anon_vma object
> > +* or KSM private structure.
> >  */
> > void *s_mem;/* slab first object */
> > atomic_t compound_mapcount; /* first tail page */
> 
> Why did you remove the (useful) reference to PAGE_MAPPING_ANON?

There are two flags now, let me add them back. thanks.

-- 
Thanks,
Changbin Du


signature.asc
Description: PGP signature


Re: [PATCH] mm: update comments for struct page.mapping

2017-09-26 Thread Du, Changbin
On Tue, Sep 26, 2017 at 04:30:27PM -0700, Andrew Morton wrote:
> On Tue, 26 Sep 2017 15:14:17 +0800 changbin...@intel.com wrote:
> 
> > From: Changbin Du 
> > 
> > The struct page.mapping can NULL or points to one object of type
> > address_space, anon_vma or KSM private structure.
> > 
> > ...
> >
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -47,8 +47,8 @@ struct page {
> >  * inode address_space, or NULL.
> >  * If page mapped as anonymous
> >  * memory, low bit is set, and
> > -* it points to anon_vma object:
> > -* see PAGE_MAPPING_ANON below.
> > +* it points to anon_vma object
> > +* or KSM private structure.
> >  */
> > void *s_mem;/* slab first object */
> > atomic_t compound_mapcount; /* first tail page */
> 
> Why did you remove the (useful) reference to PAGE_MAPPING_ANON?

There are two flags now, let me add them back. thanks.

-- 
Thanks,
Changbin Du


signature.asc
Description: PGP signature


Re: [PATCH v2 03/13] drm/sun4i: tcon: Add support for demuxing TCON output on A31

2017-09-26 Thread Chen-Yu Tsai
On Tue, Sep 26, 2017 at 5:56 PM, Maxime Ripard
 wrote:
> Hi,
>
> On Tue, Sep 26, 2017 at 06:59:09AM +, Chen-Yu Tsai wrote:
>> On systems with 2 TCONs such as the A31, it is possible to demux the
>> output of the TCONs to one encoder.
>>
>> Add support for this for the A31.
>>
>> Signed-off-by: Chen-Yu Tsai 
>> ---
>>  drivers/gpu/drm/sun4i/sun4i_tcon.c | 61 
>> ++
>>  1 file changed, 61 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c 
>> b/drivers/gpu/drm/sun4i/sun4i_tcon.c
>> index e853dfe51389..770b843a6fa9 100644
>> --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
>> +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
>> @@ -14,9 +14,12 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>
>> +#include 
>> +
>>  #include 
>>  #include 
>>  #include 
>> @@ -109,11 +112,69 @@ void sun4i_tcon_enable_vblank(struct sun4i_tcon *tcon, 
>> bool enable)
>>  }
>>  EXPORT_SYMBOL(sun4i_tcon_enable_vblank);
>>
>> +static struct sun4i_tcon *sun4i_get_first_tcon(struct drm_device *drm)
>> +{
>> + struct sun4i_drv *drv = drm->dev_private;
>> + struct sun4i_tcon *tcon;
>> +
>> + list_for_each_entry(tcon, >tcon_list, list)
>> + if (tcon->id == 0)
>> + return tcon;
>> +
>> + dev_warn(drm->dev,
>> +  "TCON0 not found, display output muxing may not work\n");
>> +
>> + return tcon;
>> +}
>> +
>> +static int _sun6i_tcon_set_mux(struct drm_encoder *encoder)
>> +{
>> + struct sun4i_tcon *tcon = sun4i_get_first_tcon(encoder->dev);
>> + int tcon_id = drm_crtc_to_sun4i_crtc(encoder->crtc)->tcon->id;
>> + u32 shift;
>> +
>> + DRM_DEBUG_DRIVER("Muxing encoder %s to CRTC %s (TCON %d)\n",
>> +  encoder->name, encoder->crtc->name, tcon_id);
>> +
>> + /* Only 2 TCONs */
>> + if (tcon_id >= 2)
>> + return -EINVAL;
>> +
>> + switch (encoder->encoder_type) {
>> + case DRM_MODE_ENCODER_TMDS:
>> + /* HDMI */
>> + shift = 8;
>> + break;
>> + case DRM_MODE_ENCODER_DSI:
>> + /* No MIPI DSI on A31s */
>> + if (of_device_is_compatible(tcon->dev->of_node,
>> + "allwinner,sun6i-a31s-tcon"))
>
> I'm not sure that test is needed.
>
> We won't end up in that case if we don't have a connected DSI block,
> which isn't going to be the case on the A31. And I guess we can tackle
> DSI later (when I'll send my patches...).

OK. I'll leave a comment instead.

>
>> + return -EINVAL;
>> + shift = 0;
>> + break;
>> + default:
>> + return -EINVAL;
>> + }
>> +
>> + regmap_update_bits(tcon->regs, SUN4I_TCON_MUX_CTRL_REG,
>> +0x3 << shift, tcon_id << shift);
>> +
>> + return 0;
>> +}
>> +
>>  void sun4i_tcon_set_mux(struct sun4i_tcon *tcon, int channel,
>>   struct drm_encoder *encoder)
>>  {
>> + /* Get the device node of the display engine */
>> + struct device_node *node = encoder->dev->dev->of_node;
>>   u32 val;
>>
>> + if (of_device_is_compatible(node, 
>> "allwinner,sun6i-a31-display-engine") ||
>> + of_device_is_compatible(node, 
>> "allwinner,sun6i-a31s-display-engine")) {
>> + _sun6i_tcon_set_mux(encoder);
>> + return;
>> + }
>> +
>
> I'd really like to avoid mix and matching the structure defined
> behaviour and those of_device_is_compatible calls spread out
> everywhere.
>
> You can either add a flag or a function pointer.

Function pointer it is!

ChenYu


Re: [PATCH v2 03/13] drm/sun4i: tcon: Add support for demuxing TCON output on A31

2017-09-26 Thread Chen-Yu Tsai
On Tue, Sep 26, 2017 at 5:56 PM, Maxime Ripard
 wrote:
> Hi,
>
> On Tue, Sep 26, 2017 at 06:59:09AM +, Chen-Yu Tsai wrote:
>> On systems with 2 TCONs such as the A31, it is possible to demux the
>> output of the TCONs to one encoder.
>>
>> Add support for this for the A31.
>>
>> Signed-off-by: Chen-Yu Tsai 
>> ---
>>  drivers/gpu/drm/sun4i/sun4i_tcon.c | 61 
>> ++
>>  1 file changed, 61 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c 
>> b/drivers/gpu/drm/sun4i/sun4i_tcon.c
>> index e853dfe51389..770b843a6fa9 100644
>> --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
>> +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
>> @@ -14,9 +14,12 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>
>> +#include 
>> +
>>  #include 
>>  #include 
>>  #include 
>> @@ -109,11 +112,69 @@ void sun4i_tcon_enable_vblank(struct sun4i_tcon *tcon, 
>> bool enable)
>>  }
>>  EXPORT_SYMBOL(sun4i_tcon_enable_vblank);
>>
>> +static struct sun4i_tcon *sun4i_get_first_tcon(struct drm_device *drm)
>> +{
>> + struct sun4i_drv *drv = drm->dev_private;
>> + struct sun4i_tcon *tcon;
>> +
>> + list_for_each_entry(tcon, >tcon_list, list)
>> + if (tcon->id == 0)
>> + return tcon;
>> +
>> + dev_warn(drm->dev,
>> +  "TCON0 not found, display output muxing may not work\n");
>> +
>> + return tcon;
>> +}
>> +
>> +static int _sun6i_tcon_set_mux(struct drm_encoder *encoder)
>> +{
>> + struct sun4i_tcon *tcon = sun4i_get_first_tcon(encoder->dev);
>> + int tcon_id = drm_crtc_to_sun4i_crtc(encoder->crtc)->tcon->id;
>> + u32 shift;
>> +
>> + DRM_DEBUG_DRIVER("Muxing encoder %s to CRTC %s (TCON %d)\n",
>> +  encoder->name, encoder->crtc->name, tcon_id);
>> +
>> + /* Only 2 TCONs */
>> + if (tcon_id >= 2)
>> + return -EINVAL;
>> +
>> + switch (encoder->encoder_type) {
>> + case DRM_MODE_ENCODER_TMDS:
>> + /* HDMI */
>> + shift = 8;
>> + break;
>> + case DRM_MODE_ENCODER_DSI:
>> + /* No MIPI DSI on A31s */
>> + if (of_device_is_compatible(tcon->dev->of_node,
>> + "allwinner,sun6i-a31s-tcon"))
>
> I'm not sure that test is needed.
>
> We won't end up in that case if we don't have a connected DSI block,
> which isn't going to be the case on the A31. And I guess we can tackle
> DSI later (when I'll send my patches...).

OK. I'll leave a comment instead.

>
>> + return -EINVAL;
>> + shift = 0;
>> + break;
>> + default:
>> + return -EINVAL;
>> + }
>> +
>> + regmap_update_bits(tcon->regs, SUN4I_TCON_MUX_CTRL_REG,
>> +0x3 << shift, tcon_id << shift);
>> +
>> + return 0;
>> +}
>> +
>>  void sun4i_tcon_set_mux(struct sun4i_tcon *tcon, int channel,
>>   struct drm_encoder *encoder)
>>  {
>> + /* Get the device node of the display engine */
>> + struct device_node *node = encoder->dev->dev->of_node;
>>   u32 val;
>>
>> + if (of_device_is_compatible(node, 
>> "allwinner,sun6i-a31-display-engine") ||
>> + of_device_is_compatible(node, 
>> "allwinner,sun6i-a31s-display-engine")) {
>> + _sun6i_tcon_set_mux(encoder);
>> + return;
>> + }
>> +
>
> I'd really like to avoid mix and matching the structure defined
> behaviour and those of_device_is_compatible calls spread out
> everywhere.
>
> You can either add a flag or a function pointer.

Function pointer it is!

ChenYu


Re: [PATCH] mmc:host:sdhci-pci: Addition of Arasan PCI controller with integrated phy.

2017-09-26 Thread Shawn Lin


On 2017/9/27 2:59, Atul Garg wrote:

The Arasan controller is based on a FPGA platform and has integrated phy
with specific phy registers used during the initialization and
management of different modes. The phy and the controller are integrated
and registers are very specific to Arasan.

Arasan being an IP provider licenses these IPs to various companies for
integration of IP in custom SOCs. The custom SOCs define own register
map depending on how bits are tied inside the SOC for phy registers,
depending on SOC memory plan and hence will require own platform drivers.
If more details on phy registers are required, an interfacce document is
hosted at https://arasan.com/NF/eMMC5.1 PHY Programming in Linux.pdf.

Signed-off-by: Atul Garg 
---
  drivers/mmc/host/sdhci-pci-core.c | 372 ++


Fundamentally maybe you need a sdhci-pci-arasan.c

And the arasan PHY isn't new here as sdhci-of-arasan already added
arasan PHY support for eMMC 5.1, but just with different register
layout and content. So could you also use generic PHY framework?


  drivers/mmc/host/sdhci-pci.h  |   4 +
  2 files changed, 376 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pci-core.c 
b/drivers/mmc/host/sdhci-pci-core.c
index 5f3f7b5..52efd63 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -1112,6 +1112,377 @@ static const struct sdhci_pci_fixes sdhci_rtsx = {
.probe_slot = rtsx_probe_slot,
  };
  
+/* Extra registers for Arasan SD Host Controller for eMMC5.1 PHY */

+#define HOST_PHY_ADDR_REG  0x300
+#define HOST_PHY_DATA_REG  0x304
+
+#define PHY_DLLCONTROL 0x00
+#define PHY_IOPADCONTROL1  0x01
+#define PHY_IOPADCONTROL2  0x02
+#define PHY_IOPADSTATUS0x03
+#define PHY_IOODCONTROL1   0x04
+#define PHY_IOODCONTROL2   0x05
+#define PHY_IORENCONTROL1  0x06
+#define PHY_IORENCONTROL2  0x07
+#define PHY_IOPUCONTROL1   0x08
+#define PHY_IOPUCONTROL2   0x09
+#define PHY_IOODRELCONTROL10x0A
+#define PHY_IOODRELCONTROL20x0B
+#define PHY_INPUTTAPDELAY  0x0C
+#define PHY_OUTPUTTAPDELAY 0x0D
+#define PHY_STROBESELECT   0x0E
+#define PHY_CLKBUFSELECT   0x0F
+#define PHY_MODECONTROL0x11
+#define PHY_DLLTRIM0x12
+#define PHY_LDOCONTROL 0x1F
+#define PHY_CMDCONTROL 0x20
+#define PHY_DATACONTROL0x21
+#define PHY_STROBECONTROL  0x22
+#define PHY_CLKCONTROL 0x23
+#define PHY_CONTROL0x24
+
+#define DLL_ENABLE BIT(3)
+#define RTRIM_ENABLE   BIT(1)
+#define PDB_ENABLE BIT(1)
+#define RETB_ENABLEBIT(6)
+#define ODEN_CMD   BIT(1)
+#define ODEN_DAT   0xFF
+#define REN_STRB   BIT(0)
+#define REN_CMDBIT(1)
+#define REN_DAT0xFF
+#define PU_CMD BIT(1)
+#define PU_DAT 0xFF
+#define ITAP_DLY_ENBIT(0)
+#define OTAP_DLY_ENBIT(0)
+#define OD_REL_CMD BIT(1)
+#define OD_REL_DAT 0xFF
+#define DLL_TRIM   0x8
+#define PDB_CMDBIT(0)
+#define PDB_DAT0xFF
+#define PDB_STRB   BIT(0)
+#define PDB_CLKBIT(0)
+#define LDO_RDYB   0xFE
+#define CALDONE_MASK   0x10
+
+int arasan_phy_write(struct sdhci_host *host, u8 data, u8 offset)
+{
+   u32 timeout;
+   u8 busy;
+
+   sdhci_writew(host, data, HOST_PHY_DATA_REG);
+   sdhci_writew(host, ((1<<8) | offset), HOST_PHY_ADDR_REG);
+   timeout = 20;
+   do {
+   busy = sdhci_readw(host, HOST_PHY_ADDR_REG) & (1<<9);
+   if (!busy)
+   break;
+   mdelay(1);
+   } while (timeout--);
+   if (!timeout)
+   return -ENODEV;
+   return 0;
+}
+
+static int arasan_phy_read(struct sdhci_host *host, u8 offset, u8 *data)
+{
+   u32 timeout;
+   u8 busy;
+
+   sdhci_writew(host, 0, HOST_PHY_DATA_REG);
+   *data = sdhci_readw(host, HOST_PHY_DATA_REG) & 0xFF;
+   sdhci_writew(host, ((0<<8) | offset), HOST_PHY_ADDR_REG);
+   timeout = 20;
+   do {
+   busy = sdhci_readw(host, HOST_PHY_ADDR_REG) & (1<<9);
+   if (!busy)
+   break;
+   mdelay(1);
+   } while (timeout--);
+   if (!timeout)
+   return -ENODEV;
+   *data = sdhci_readw(host, HOST_PHY_DATA_REG) & 0xFF;
+   return 0;
+}
+
+/* Initialize the Arasan PHY */
+int arasan_phy_init(struct sdhci_host *host)
+{
+   struct sdhci_pci_slot *slot = sdhci_priv(host);
+   struct pci_dev *pdev = slot->chip->pdev;
+   u8 val;
+
+   if (arasan_phy_write(host, 0, PHY_CONTROL))
+   return -ENODEV;
+   if (arasan_phy_read(host, PHY_IOPADCONTROL1, ))
+   return -ENODEV;
+   if (arasan_phy_write(host, val | RETB_ENABLE, PHY_IOPADCONTROL1))
+   return -ENODEV;
+   if (arasan_phy_read(host, PHY_IOPADCONTROL2, ))
+   return -ENODEV;
+   if (arasan_phy_write(host, val | 

Re: [PATCH] mmc:host:sdhci-pci: Addition of Arasan PCI controller with integrated phy.

2017-09-26 Thread Shawn Lin


On 2017/9/27 2:59, Atul Garg wrote:

The Arasan controller is based on a FPGA platform and has integrated phy
with specific phy registers used during the initialization and
management of different modes. The phy and the controller are integrated
and registers are very specific to Arasan.

Arasan being an IP provider licenses these IPs to various companies for
integration of IP in custom SOCs. The custom SOCs define own register
map depending on how bits are tied inside the SOC for phy registers,
depending on SOC memory plan and hence will require own platform drivers.
If more details on phy registers are required, an interfacce document is
hosted at https://arasan.com/NF/eMMC5.1 PHY Programming in Linux.pdf.

Signed-off-by: Atul Garg 
---
  drivers/mmc/host/sdhci-pci-core.c | 372 ++


Fundamentally maybe you need a sdhci-pci-arasan.c

And the arasan PHY isn't new here as sdhci-of-arasan already added
arasan PHY support for eMMC 5.1, but just with different register
layout and content. So could you also use generic PHY framework?


  drivers/mmc/host/sdhci-pci.h  |   4 +
  2 files changed, 376 insertions(+)

diff --git a/drivers/mmc/host/sdhci-pci-core.c 
b/drivers/mmc/host/sdhci-pci-core.c
index 5f3f7b5..52efd63 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -1112,6 +1112,377 @@ static const struct sdhci_pci_fixes sdhci_rtsx = {
.probe_slot = rtsx_probe_slot,
  };
  
+/* Extra registers for Arasan SD Host Controller for eMMC5.1 PHY */

+#define HOST_PHY_ADDR_REG  0x300
+#define HOST_PHY_DATA_REG  0x304
+
+#define PHY_DLLCONTROL 0x00
+#define PHY_IOPADCONTROL1  0x01
+#define PHY_IOPADCONTROL2  0x02
+#define PHY_IOPADSTATUS0x03
+#define PHY_IOODCONTROL1   0x04
+#define PHY_IOODCONTROL2   0x05
+#define PHY_IORENCONTROL1  0x06
+#define PHY_IORENCONTROL2  0x07
+#define PHY_IOPUCONTROL1   0x08
+#define PHY_IOPUCONTROL2   0x09
+#define PHY_IOODRELCONTROL10x0A
+#define PHY_IOODRELCONTROL20x0B
+#define PHY_INPUTTAPDELAY  0x0C
+#define PHY_OUTPUTTAPDELAY 0x0D
+#define PHY_STROBESELECT   0x0E
+#define PHY_CLKBUFSELECT   0x0F
+#define PHY_MODECONTROL0x11
+#define PHY_DLLTRIM0x12
+#define PHY_LDOCONTROL 0x1F
+#define PHY_CMDCONTROL 0x20
+#define PHY_DATACONTROL0x21
+#define PHY_STROBECONTROL  0x22
+#define PHY_CLKCONTROL 0x23
+#define PHY_CONTROL0x24
+
+#define DLL_ENABLE BIT(3)
+#define RTRIM_ENABLE   BIT(1)
+#define PDB_ENABLE BIT(1)
+#define RETB_ENABLEBIT(6)
+#define ODEN_CMD   BIT(1)
+#define ODEN_DAT   0xFF
+#define REN_STRB   BIT(0)
+#define REN_CMDBIT(1)
+#define REN_DAT0xFF
+#define PU_CMD BIT(1)
+#define PU_DAT 0xFF
+#define ITAP_DLY_ENBIT(0)
+#define OTAP_DLY_ENBIT(0)
+#define OD_REL_CMD BIT(1)
+#define OD_REL_DAT 0xFF
+#define DLL_TRIM   0x8
+#define PDB_CMDBIT(0)
+#define PDB_DAT0xFF
+#define PDB_STRB   BIT(0)
+#define PDB_CLKBIT(0)
+#define LDO_RDYB   0xFE
+#define CALDONE_MASK   0x10
+
+int arasan_phy_write(struct sdhci_host *host, u8 data, u8 offset)
+{
+   u32 timeout;
+   u8 busy;
+
+   sdhci_writew(host, data, HOST_PHY_DATA_REG);
+   sdhci_writew(host, ((1<<8) | offset), HOST_PHY_ADDR_REG);
+   timeout = 20;
+   do {
+   busy = sdhci_readw(host, HOST_PHY_ADDR_REG) & (1<<9);
+   if (!busy)
+   break;
+   mdelay(1);
+   } while (timeout--);
+   if (!timeout)
+   return -ENODEV;
+   return 0;
+}
+
+static int arasan_phy_read(struct sdhci_host *host, u8 offset, u8 *data)
+{
+   u32 timeout;
+   u8 busy;
+
+   sdhci_writew(host, 0, HOST_PHY_DATA_REG);
+   *data = sdhci_readw(host, HOST_PHY_DATA_REG) & 0xFF;
+   sdhci_writew(host, ((0<<8) | offset), HOST_PHY_ADDR_REG);
+   timeout = 20;
+   do {
+   busy = sdhci_readw(host, HOST_PHY_ADDR_REG) & (1<<9);
+   if (!busy)
+   break;
+   mdelay(1);
+   } while (timeout--);
+   if (!timeout)
+   return -ENODEV;
+   *data = sdhci_readw(host, HOST_PHY_DATA_REG) & 0xFF;
+   return 0;
+}
+
+/* Initialize the Arasan PHY */
+int arasan_phy_init(struct sdhci_host *host)
+{
+   struct sdhci_pci_slot *slot = sdhci_priv(host);
+   struct pci_dev *pdev = slot->chip->pdev;
+   u8 val;
+
+   if (arasan_phy_write(host, 0, PHY_CONTROL))
+   return -ENODEV;
+   if (arasan_phy_read(host, PHY_IOPADCONTROL1, ))
+   return -ENODEV;
+   if (arasan_phy_write(host, val | RETB_ENABLE, PHY_IOPADCONTROL1))
+   return -ENODEV;
+   if (arasan_phy_read(host, PHY_IOPADCONTROL2, ))
+   return -ENODEV;
+   if (arasan_phy_write(host, val | RTRIM_ENABLE, 

[PATCH v3] dma-debug: fix incorrect pfn calculation

2017-09-26 Thread miles.chen
From: Miles Chen 

dma-debug reports the following warning:

[name:panic&]WARNING: CPU: 3 PID: 298 at kernel-4.4/lib/dma-debug.c:604
debug _dma_assert_idle+0x1a8/0x230()
DMA-API: cpu touching an active dma mapped cacheline [cln=0x0882300]
CPU: 3 PID: 298 Comm: vold Tainted: GW  O4.4.22+ #1
Hardware name: MT6739 (DT)
Call trace:
[] dump_backtrace+0x0/0x1d4
[] show_stack+0x14/0x1c
[] dump_stack+0xa8/0xe0
[] warn_slowpath_common+0xf4/0x11c
[] warn_slowpath_fmt+0x60/0x80
[] debug_dma_assert_idle+0x1a8/0x230
[] wp_page_copy.isra.96+0x118/0x520
[] do_wp_page+0x4fc/0x534
[] handle_mm_fault+0xd4c/0x1310
[] do_page_fault+0x1c8/0x394
[] do_mem_abort+0x50/0xec

I found that debug_dma_alloc_coherent() and debug_dma_free_coherent()
assume that dma_alloc_coherent() always returns a linear address.
However it's possible that dma_alloc_coherent() returns a non-linear
address. In this case, page_to_pfn(virt_to_page(virt)) will return an
incorrect pfn. If the pfn is valid and mapped as a COW page,
we will hit the warning when doing wp_page_copy().

Fix this by calculating pfn for linear and non-linear addresses.

Signed-off-by: Miles Chen 
---
 lib/dma-debug.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index ea4cc3d..e5b4237 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1497,7 +1497,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t 
size,
 
entry->type  = dma_debug_coherent;
entry->dev   = dev;
-   entry->pfn   = page_to_pfn(virt_to_page(virt));
+   entry->pfn   = is_vmalloc_addr(virt) ? vmalloc_to_pfn(virt) :
+   page_to_pfn(virt_to_page(virt));
entry->offset= offset_in_page(virt);
entry->size  = size;
entry->dev_addr  = dma_addr;
@@ -1513,7 +1514,8 @@ void debug_dma_free_coherent(struct device *dev, size_t 
size,
struct dma_debug_entry ref = {
.type   = dma_debug_coherent,
.dev= dev,
-   .pfn= page_to_pfn(virt_to_page(virt)),
+   .pfn= is_vmalloc_addr(virt) ? vmalloc_to_pfn(virt) :
+   page_to_pfn(virt_to_page(virt)),
.offset = offset_in_page(virt),
.dev_addr   = addr,
.size   = size,
-- 
1.9.1



[PATCH v3] dma-debug: fix incorrect pfn calculation

2017-09-26 Thread miles.chen
From: Miles Chen 

dma-debug reports the following warning:

[name:panic&]WARNING: CPU: 3 PID: 298 at kernel-4.4/lib/dma-debug.c:604
debug _dma_assert_idle+0x1a8/0x230()
DMA-API: cpu touching an active dma mapped cacheline [cln=0x0882300]
CPU: 3 PID: 298 Comm: vold Tainted: GW  O4.4.22+ #1
Hardware name: MT6739 (DT)
Call trace:
[] dump_backtrace+0x0/0x1d4
[] show_stack+0x14/0x1c
[] dump_stack+0xa8/0xe0
[] warn_slowpath_common+0xf4/0x11c
[] warn_slowpath_fmt+0x60/0x80
[] debug_dma_assert_idle+0x1a8/0x230
[] wp_page_copy.isra.96+0x118/0x520
[] do_wp_page+0x4fc/0x534
[] handle_mm_fault+0xd4c/0x1310
[] do_page_fault+0x1c8/0x394
[] do_mem_abort+0x50/0xec

I found that debug_dma_alloc_coherent() and debug_dma_free_coherent()
assume that dma_alloc_coherent() always returns a linear address.
However it's possible that dma_alloc_coherent() returns a non-linear
address. In this case, page_to_pfn(virt_to_page(virt)) will return an
incorrect pfn. If the pfn is valid and mapped as a COW page,
we will hit the warning when doing wp_page_copy().

Fix this by calculating pfn for linear and non-linear addresses.

Signed-off-by: Miles Chen 
---
 lib/dma-debug.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index ea4cc3d..e5b4237 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1497,7 +1497,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t 
size,
 
entry->type  = dma_debug_coherent;
entry->dev   = dev;
-   entry->pfn   = page_to_pfn(virt_to_page(virt));
+   entry->pfn   = is_vmalloc_addr(virt) ? vmalloc_to_pfn(virt) :
+   page_to_pfn(virt_to_page(virt));
entry->offset= offset_in_page(virt);
entry->size  = size;
entry->dev_addr  = dma_addr;
@@ -1513,7 +1514,8 @@ void debug_dma_free_coherent(struct device *dev, size_t 
size,
struct dma_debug_entry ref = {
.type   = dma_debug_coherent,
.dev= dev,
-   .pfn= page_to_pfn(virt_to_page(virt)),
+   .pfn= is_vmalloc_addr(virt) ? vmalloc_to_pfn(virt) :
+   page_to_pfn(virt_to_page(virt)),
.offset = offset_in_page(virt),
.dev_addr   = addr,
.size   = size,
-- 
1.9.1



Re: [PATCH v2 02/13] clk: sunxi-ng: sun6i: Rename HDMI DDC clock to avoid name collision

2017-09-26 Thread Chen-Yu Tsai
On Tue, Sep 26, 2017 at 5:32 PM, Maxime Ripard
 wrote:
> On Tue, Sep 26, 2017 at 06:59:08AM +, Chen-Yu Tsai wrote:
>> The HDMI DDC clock found in the CCU is the parent of the actual DDC
>> clock within the HDMI controller. That clock is also named "hdmi-ddc".
>>
>> Rename the one in the CCU to "hdmi-ddc-parent". This makes more sense
>> than renaming the one in the HDMI controller to something else.
>>
>> Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks")
>> Signed-off-by: Chen-Yu Tsai 
>
> I'd rather stick to the datasheet names. What about "DDC" ?

Works for me.

Thanks
ChenYu


Re: [PATCH v2 02/13] clk: sunxi-ng: sun6i: Rename HDMI DDC clock to avoid name collision

2017-09-26 Thread Chen-Yu Tsai
On Tue, Sep 26, 2017 at 5:32 PM, Maxime Ripard
 wrote:
> On Tue, Sep 26, 2017 at 06:59:08AM +, Chen-Yu Tsai wrote:
>> The HDMI DDC clock found in the CCU is the parent of the actual DDC
>> clock within the HDMI controller. That clock is also named "hdmi-ddc".
>>
>> Rename the one in the CCU to "hdmi-ddc-parent". This makes more sense
>> than renaming the one in the HDMI controller to something else.
>>
>> Fixes: c6e6c96d8fa6 ("clk: sunxi-ng: Add A31/A31s clocks")
>> Signed-off-by: Chen-Yu Tsai 
>
> I'd rather stick to the datasheet names. What about "DDC" ?

Works for me.

Thanks
ChenYu


linux-next: build warning after merge of the tip tree

2017-09-26 Thread Stephen Rothwell
Hi all,

After merging the tip tree, today's linux-next build (x86_64 allnoconfig)
produced this warning:

kernel/printk/printk.c:1983:12: warning: 'printk_time' defined but not used 
[-Wunused-variable]
 static int printk_time;
^

Introduced by commit

  310b454a8653 ("printk: Add monotonic, boottime, and realtime timestamps")

-- 
Cheers,
Stephen Rothwell


linux-next: build warning after merge of the tip tree

2017-09-26 Thread Stephen Rothwell
Hi all,

After merging the tip tree, today's linux-next build (x86_64 allnoconfig)
produced this warning:

kernel/printk/printk.c:1983:12: warning: 'printk_time' defined but not used 
[-Wunused-variable]
 static int printk_time;
^

Introduced by commit

  310b454a8653 ("printk: Add monotonic, boottime, and realtime timestamps")

-- 
Cheers,
Stephen Rothwell


Re: [v8 0/4] cgroup-aware OOM killer

2017-09-26 Thread Tim Hockin
I'm excited to see this being discussed again - it's been years since
the last attempt.  I've tried to stay out of the conversation, but I
feel obligated say something and then go back to lurking.

On Tue, Sep 26, 2017 at 10:26 AM, Johannes Weiner  wrote:
> On Tue, Sep 26, 2017 at 03:30:40PM +0200, Michal Hocko wrote:
>> On Tue 26-09-17 13:13:00, Roman Gushchin wrote:
>> > On Tue, Sep 26, 2017 at 01:21:34PM +0200, Michal Hocko wrote:
>> > > On Tue 26-09-17 11:59:25, Roman Gushchin wrote:
>> > > > On Mon, Sep 25, 2017 at 10:25:21PM +0200, Michal Hocko wrote:
>> > > > > On Mon 25-09-17 19:15:33, Roman Gushchin wrote:
>> > > > > [...]
>> > > > > > I'm not against this model, as I've said before. It feels logical,
>> > > > > > and will work fine in most cases.
>> > > > > >
>> > > > > > In this case we can drop any mount/boot options, because it 
>> > > > > > preserves
>> > > > > > the existing behavior in the default configuration. A big 
>> > > > > > advantage.
>> > > > >
>> > > > > I am not sure about this. We still need an opt-in, ragardless, 
>> > > > > because
>> > > > > selecting the largest process from the largest memcg != selecting the
>> > > > > largest task (just consider memcgs with many processes example).
>> > > >
>> > > > As I understand Johannes, he suggested to compare individual processes 
>> > > > with
>> > > > group_oom mem cgroups. In other words, always select a killable entity 
>> > > > with
>> > > > the biggest memory footprint.
>> > > >
>> > > > This is slightly different from my v8 approach, where I treat leaf 
>> > > > memcgs
>> > > > as indivisible memory consumers independent on group_oom setting, so
>> > > > by default I'm selecting the biggest task in the biggest memcg.
>> > >
>> > > My reading is that he is actually proposing the same thing I've been
>> > > mentioning. Simply select the biggest killable entity (leaf memcg or
>> > > group_oom hierarchy) and either kill the largest task in that entity
>> > > (for !group_oom) or the whole memcg/hierarchy otherwise.
>> >
>> > He wrote the following:
>> > "So I'm leaning toward the second model: compare all oomgroups and
>> > standalone tasks in the system with each other, independent of the
>> > failed hierarchical control structure. Then kill the biggest of them."
>>
>> I will let Johannes to comment but I believe this is just a
>> misunderstanding. If we compared only the biggest task from each memcg
>> then we are basically losing our fairness objective, aren't we?
>
> Sorry about the confusion.
>
> Yeah I was making the case for what Michal proposed, to kill the
> biggest terminal consumer, which is either a task or an oomgroup.
>
> You'd basically iterate through all the tasks and cgroups in the
> system and pick the biggest task that isn't in an oom group or the
> biggest oom group and then kill that.
>
> Yeah, you'd have to compare the memory footprints of tasks with the
> memory footprints of cgroups. These aren't defined identically, and
> tasks don't get attributed every type of allocation that a cgroup
> would. But it should get us in the ballpark, and I cannot picture a
> scenario where this would lead to a completely undesirable outcome.

That last sentence:

> I cannot picture a scenario where this would lead to a completely undesirable 
> outcome.

I feel like David has offered examples here, and many of us at Google
have offered examples as long ago as 2013 (if I recall) of cases where
the proposed heuristic is EXACTLY WRONG.  We need OOM behavior to kill
in a deterministic order configured by policy.  Sometimes, I would
literally prefer to kill every other cgroup before killing "the big
one".  The policy is *all* that matters for shared clusters of varying
users and priorities.

We did this in Borg, and it works REALLY well.  Has for years.  Now
that the world is adopting Kubernetes we need it again, only it's much
harder to carry a kernel patch in this case.


Re: [v8 0/4] cgroup-aware OOM killer

2017-09-26 Thread Tim Hockin
I'm excited to see this being discussed again - it's been years since
the last attempt.  I've tried to stay out of the conversation, but I
feel obligated say something and then go back to lurking.

On Tue, Sep 26, 2017 at 10:26 AM, Johannes Weiner  wrote:
> On Tue, Sep 26, 2017 at 03:30:40PM +0200, Michal Hocko wrote:
>> On Tue 26-09-17 13:13:00, Roman Gushchin wrote:
>> > On Tue, Sep 26, 2017 at 01:21:34PM +0200, Michal Hocko wrote:
>> > > On Tue 26-09-17 11:59:25, Roman Gushchin wrote:
>> > > > On Mon, Sep 25, 2017 at 10:25:21PM +0200, Michal Hocko wrote:
>> > > > > On Mon 25-09-17 19:15:33, Roman Gushchin wrote:
>> > > > > [...]
>> > > > > > I'm not against this model, as I've said before. It feels logical,
>> > > > > > and will work fine in most cases.
>> > > > > >
>> > > > > > In this case we can drop any mount/boot options, because it 
>> > > > > > preserves
>> > > > > > the existing behavior in the default configuration. A big 
>> > > > > > advantage.
>> > > > >
>> > > > > I am not sure about this. We still need an opt-in, ragardless, 
>> > > > > because
>> > > > > selecting the largest process from the largest memcg != selecting the
>> > > > > largest task (just consider memcgs with many processes example).
>> > > >
>> > > > As I understand Johannes, he suggested to compare individual processes 
>> > > > with
>> > > > group_oom mem cgroups. In other words, always select a killable entity 
>> > > > with
>> > > > the biggest memory footprint.
>> > > >
>> > > > This is slightly different from my v8 approach, where I treat leaf 
>> > > > memcgs
>> > > > as indivisible memory consumers independent on group_oom setting, so
>> > > > by default I'm selecting the biggest task in the biggest memcg.
>> > >
>> > > My reading is that he is actually proposing the same thing I've been
>> > > mentioning. Simply select the biggest killable entity (leaf memcg or
>> > > group_oom hierarchy) and either kill the largest task in that entity
>> > > (for !group_oom) or the whole memcg/hierarchy otherwise.
>> >
>> > He wrote the following:
>> > "So I'm leaning toward the second model: compare all oomgroups and
>> > standalone tasks in the system with each other, independent of the
>> > failed hierarchical control structure. Then kill the biggest of them."
>>
>> I will let Johannes to comment but I believe this is just a
>> misunderstanding. If we compared only the biggest task from each memcg
>> then we are basically losing our fairness objective, aren't we?
>
> Sorry about the confusion.
>
> Yeah I was making the case for what Michal proposed, to kill the
> biggest terminal consumer, which is either a task or an oomgroup.
>
> You'd basically iterate through all the tasks and cgroups in the
> system and pick the biggest task that isn't in an oom group or the
> biggest oom group and then kill that.
>
> Yeah, you'd have to compare the memory footprints of tasks with the
> memory footprints of cgroups. These aren't defined identically, and
> tasks don't get attributed every type of allocation that a cgroup
> would. But it should get us in the ballpark, and I cannot picture a
> scenario where this would lead to a completely undesirable outcome.

That last sentence:

> I cannot picture a scenario where this would lead to a completely undesirable 
> outcome.

I feel like David has offered examples here, and many of us at Google
have offered examples as long ago as 2013 (if I recall) of cases where
the proposed heuristic is EXACTLY WRONG.  We need OOM behavior to kill
in a deterministic order configured by policy.  Sometimes, I would
literally prefer to kill every other cgroup before killing "the big
one".  The policy is *all* that matters for shared clusters of varying
users and priorities.

We did this in Borg, and it works REALLY well.  Has for years.  Now
that the world is adopting Kubernetes we need it again, only it's much
harder to carry a kernel patch in this case.


[PATCH v5 1/4] ipv4: Namespaceify tcp_fastopen knob

2017-09-26 Thread Haishuang Yan
Different namespace application might require enable TCP Fast Open
feature independently of the host.

This patch series continues making more of the TCP Fast Open related
sysctl knobs be per net-namespace.

Reported-by: Luca BRUNO 
Signed-off-by: Haishuang Yan 

---
Changes since v5:
  * Splite patch #2 into two changes
  * Fix potential leak issue
  * Fix typo error
  * Split the patches to be per-sysctl
  * Remove unrelated change by mistake
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/af_inet.c |  7 ---
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp.c |  4 ++--
 net/ipv4/tcp_fastopen.c| 11 +--
 net/ipv4/tcp_ipv4.c|  2 ++
 7 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 20d061c..ce6dde0 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -127,6 +127,7 @@ struct netns_ipv4 {
int sysctl_tcp_timestamps;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
+   int sysctl_tcp_fastopen;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b510f28..f628967 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_fastopen;
 extern int sysctl_tcp_retrans_collapse;
 extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e..ddd126d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
 {
struct sock *sk = sock->sk;
unsigned char old_state;
-   int err;
+   int err, tcp_fastopen;
 
lock_sock(sk);
 
@@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog)
 * because the socket was in TCP_LISTEN state previously but
 * was shutdown() rather than close().
 */
-   if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
-   (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+   tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+   if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+   (tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
tcp_fastopen_init_key_once(true);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0d3c038..e31e853c 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -401,13 +401,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler   = proc_dointvec
},
{
-   .procname   = "tcp_fastopen",
-   .data   = _tcp_fastopen,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec,
-   },
-   {
.procname   = "tcp_fastopen_key",
.mode   = 0600,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
@@ -1085,6 +1078,13 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_fastopen",
+   .data   = _net.ipv4.sysctl_tcp_fastopen,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec,
+   },
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname   = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5091402..dac56c4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1126,7 +1126,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct 
msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
 
-   if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+   if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
 uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -2759,7 +2759,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
-   } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+   } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
  

[PATCH v5 1/4] ipv4: Namespaceify tcp_fastopen knob

2017-09-26 Thread Haishuang Yan
Different namespace application might require enable TCP Fast Open
feature independently of the host.

This patch series continues making more of the TCP Fast Open related
sysctl knobs be per net-namespace.

Reported-by: Luca BRUNO 
Signed-off-by: Haishuang Yan 

---
Changes since v5:
  * Splite patch #2 into two changes
  * Fix potential leak issue
  * Fix typo error
  * Split the patches to be per-sysctl
  * Remove unrelated change by mistake
---
 include/net/netns/ipv4.h   |  1 +
 include/net/tcp.h  |  1 -
 net/ipv4/af_inet.c |  7 ---
 net/ipv4/sysctl_net_ipv4.c | 14 +++---
 net/ipv4/tcp.c |  4 ++--
 net/ipv4/tcp_fastopen.c| 11 +--
 net/ipv4/tcp_ipv4.c|  2 ++
 7 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 20d061c..ce6dde0 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -127,6 +127,7 @@ struct netns_ipv4 {
int sysctl_tcp_timestamps;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
+   int sysctl_tcp_fastopen;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b510f28..f628967 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@
 
 
 /* sysctl variables for tcp */
-extern int sysctl_tcp_fastopen;
 extern int sysctl_tcp_retrans_collapse;
 extern int sysctl_tcp_stdurg;
 extern int sysctl_tcp_rfc1337;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e..ddd126d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
 {
struct sock *sk = sock->sk;
unsigned char old_state;
-   int err;
+   int err, tcp_fastopen;
 
lock_sock(sk);
 
@@ -217,8 +217,9 @@ int inet_listen(struct socket *sock, int backlog)
 * because the socket was in TCP_LISTEN state previously but
 * was shutdown() rather than close().
 */
-   if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
-   (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+   tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+   if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+   (tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
tcp_fastopen_init_key_once(true);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0d3c038..e31e853c 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -401,13 +401,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler   = proc_dointvec
},
{
-   .procname   = "tcp_fastopen",
-   .data   = _tcp_fastopen,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_dointvec,
-   },
-   {
.procname   = "tcp_fastopen_key",
.mode   = 0600,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
@@ -1085,6 +1078,13 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.mode   = 0644,
.proc_handler   = proc_dointvec
},
+   {
+   .procname   = "tcp_fastopen",
+   .data   = _net.ipv4.sysctl_tcp_fastopen,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec,
+   },
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname   = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5091402..dac56c4 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1126,7 +1126,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct 
msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
 
-   if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+   if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
 uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -2759,7 +2759,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
-   } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+   } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
diff --git 

[PATCH v5 2/4] ipv4: Remove the 'publish' logic in tcp_fastopen_init_key_once

2017-09-26 Thread Haishuang Yan
The 'publish' logic is not necessary after commit dfea2aa65424 ("tcp:
Do not call tcp_fastopen_reset_cipher from interrupt context"), because
in tcp_fastopen_cookie_gen,it wouldn't call tcp_fastopen_init_key_once.

Signed-off-by: Haishuang Yan 
---
 include/net/tcp.h  | 2 +-
 net/ipv4/af_inet.c | 2 +-
 net/ipv4/sysctl_net_ipv4.c | 5 -
 net/ipv4/tcp.c | 2 +-
 net/ipv4/tcp_fastopen.c| 4 ++--
 5 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f628967..061c128 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1562,7 +1562,7 @@ struct tcp_fastopen_request {
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
  struct request_sock *req,
  struct tcp_fastopen_cookie *foc);
-void tcp_fastopen_init_key_once(bool publish);
+void tcp_fastopen_init_key_once(void);
 bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 struct tcp_fastopen_cookie *cookie);
 bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ddd126d..e73ce79 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog)
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
-   tcp_fastopen_init_key_once(true);
+   tcp_fastopen_init_key_once();
}
 
err = inet_csk_listen_start(sk, backlog);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e31e853c..f6324ea 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -282,11 +282,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, 
int write,
ret = -EINVAL;
goto bad_key;
}
-   /* Generate a dummy secret but don't publish it. This
-* is needed so we don't regenerate a new key on the
-* first invocation of tcp_fastopen_cookie_gen
-*/
-   tcp_fastopen_init_key_once(false);
tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
}
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dac56c4..4e39545 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
-   tcp_fastopen_init_key_once(true);
+   tcp_fastopen_init_key_once();
 
fastopen_queue_tune(sk, val);
} else {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 31b08ec..8c8f0f0 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -13,7 +13,7 @@
 
 static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
 
-void tcp_fastopen_init_key_once(bool publish)
+void tcp_fastopen_init_key_once(void)
 {
static u8 key[TCP_FASTOPEN_KEY_LENGTH];
 
@@ -23,7 +23,7 @@ void tcp_fastopen_init_key_once(bool publish)
 * All call sites of tcp_fastopen_cookie_gen also check
 * for a valid cookie, so this is an acceptable risk.
 */
-   if (net_get_random_once(key, sizeof(key)) && publish)
+   if (net_get_random_once(key, sizeof(key)))
tcp_fastopen_reset_cipher(key, sizeof(key));
 }
 
-- 
1.8.3.1





[PATCH v5 2/4] ipv4: Remove the 'publish' logic in tcp_fastopen_init_key_once

2017-09-26 Thread Haishuang Yan
The 'publish' logic is not necessary after commit dfea2aa65424 ("tcp:
Do not call tcp_fastopen_reset_cipher from interrupt context"), because
in tcp_fastopen_cookie_gen,it wouldn't call tcp_fastopen_init_key_once.

Signed-off-by: Haishuang Yan 
---
 include/net/tcp.h  | 2 +-
 net/ipv4/af_inet.c | 2 +-
 net/ipv4/sysctl_net_ipv4.c | 5 -
 net/ipv4/tcp.c | 2 +-
 net/ipv4/tcp_fastopen.c| 4 ++--
 5 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index f628967..061c128 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1562,7 +1562,7 @@ struct tcp_fastopen_request {
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
  struct request_sock *req,
  struct tcp_fastopen_cookie *foc);
-void tcp_fastopen_init_key_once(bool publish);
+void tcp_fastopen_init_key_once(void);
 bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 struct tcp_fastopen_cookie *cookie);
 bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ddd126d..e73ce79 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog)
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
-   tcp_fastopen_init_key_once(true);
+   tcp_fastopen_init_key_once();
}
 
err = inet_csk_listen_start(sk, backlog);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e31e853c..f6324ea 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -282,11 +282,6 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, 
int write,
ret = -EINVAL;
goto bad_key;
}
-   /* Generate a dummy secret but don't publish it. This
-* is needed so we don't regenerate a new key on the
-* first invocation of tcp_fastopen_cookie_gen
-*/
-   tcp_fastopen_init_key_once(false);
tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
}
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dac56c4..4e39545 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2749,7 +2749,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
-   tcp_fastopen_init_key_once(true);
+   tcp_fastopen_init_key_once();
 
fastopen_queue_tune(sk, val);
} else {
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 31b08ec..8c8f0f0 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -13,7 +13,7 @@
 
 static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
 
-void tcp_fastopen_init_key_once(bool publish)
+void tcp_fastopen_init_key_once(void)
 {
static u8 key[TCP_FASTOPEN_KEY_LENGTH];
 
@@ -23,7 +23,7 @@ void tcp_fastopen_init_key_once(bool publish)
 * All call sites of tcp_fastopen_cookie_gen also check
 * for a valid cookie, so this is an acceptable risk.
 */
-   if (net_get_random_once(key, sizeof(key)) && publish)
+   if (net_get_random_once(key, sizeof(key)))
tcp_fastopen_reset_cipher(key, sizeof(key));
 }
 
-- 
1.8.3.1





[PATCH v5 4/4] ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob

2017-09-26 Thread Haishuang Yan
Different namespace application might require different time period in
second to disable Fastopen on active TCP sockets.

Tested:
Simulate following similar situation that the server's data gets dropped
after 3WHS.
C  syn-data ---> S
C <--- syn/ack - S
C  ack > S
S (accept & write)
C?  X <- data -- S
[retry and timeout]

And then print netstat of TCPFastOpenBlackhole, the counter increased as
expected when the firewall blackhole issue is detected and active TFO is
disabled.
# cat /proc/net/netstat | awk '{print $91}'
TCPFastOpenBlackhole
1

Signed-off-by: Haishuang Yan 
---
 include/net/netns/ipv4.h   |  3 +++
 net/ipv4/sysctl_net_ipv4.c | 20 +++-
 net/ipv4/tcp_fastopen.c| 30 +++---
 net/ipv4/tcp_ipv4.c|  2 ++
 4 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 66b8335..d76edde 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,9 @@ struct netns_ipv4 {
int sysctl_tcp_fastopen;
struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
spinlock_t tcp_fastopen_ctx_lock;
+   unsigned int sysctl_tcp_fastopen_blackhole_timeout;
+   atomic_t tfo_active_disable_times;
+   unsigned long tfo_active_disable_stamp;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 20e19fe..cac8dd3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -355,11 +355,13 @@ static int proc_tfo_blackhole_detect_timeout(struct 
ctl_table *table,
 void __user *buffer,
 size_t *lenp, loff_t *ppos)
 {
+   struct net *net = container_of(table->data, struct net,
+   ipv4.sysctl_tcp_fastopen_blackhole_timeout);
int ret;
 
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
-   tcp_fastopen_active_timeout_reset();
+   atomic_set(>ipv4.tfo_active_disable_times, 0);
 
return ret;
 }
@@ -398,14 +400,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler   = proc_dointvec
},
{
-   .procname   = "tcp_fastopen_blackhole_timeout_sec",
-   .data   = _tcp_fastopen_blackhole_timeout,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_tfo_blackhole_detect_timeout,
-   .extra1 = ,
-   },
-   {
.procname   = "tcp_abort_on_overflow",
.data   = _tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -1083,6 +1077,14 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
.proc_handler   = proc_tcp_fastopen_key,
},
+   {
+   .procname   = "tcp_fastopen_blackhole_timeout_sec",
+   .data   = 
_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_tfo_blackhole_detect_timeout,
+   .extra1 = ,
+   },
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname   = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 4eae44a..de470e7 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -422,25 +422,16 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
  * TFO connection with data exchanges.
  */
 
-/* Default to 1hr */
-unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
-static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
-static unsigned long tfo_active_disable_stamp __read_mostly;
-
 /* Disable active TFO and record current jiffies and
  * tfo_active_disable_times
  */
 void tcp_fastopen_active_disable(struct sock *sk)
 {
-   atomic_inc(_active_disable_times);
-   tfo_active_disable_stamp = jiffies;
-   NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
-}
+   struct net *net = sock_net(sk);
 
-/* Reset tfo_active_disable_times to 0 */
-void tcp_fastopen_active_timeout_reset(void)
-{
-   atomic_set(_active_disable_times, 0);
+   atomic_inc(>ipv4.tfo_active_disable_times);
+   net->ipv4.tfo_active_disable_stamp = jiffies;
+   NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
 }
 
 /* Calculate timeout for tfo active disable
@@ -449,17 +440,18 @@ void tcp_fastopen_active_timeout_reset(void)
  */
 bool tcp_fastopen_active_should_disable(struct sock *sk)
 {
-   int tfo_da_times = atomic_read(_active_disable_times);

[PATCH v5 3/4] ipv4: Namespaceify tcp_fastopen_key knob

2017-09-26 Thread Haishuang Yan
Different namespace application might require different tcp_fastopen_key
independently of the host.

David Miller pointed out there is a leak without releasing the context
of tcp_fastopen_key during netns teardown. So add the release action in
exit_batch path.

Tested:
1. Container namespace:
# cat /proc/sys/net/ipv4/tcp_fastopen_key:
2817fff2-f803cf97-eadfd1f3-78c0992b

cookie key in tcp syn packets:
Fast Open Cookie
Kind: TCP Fast Open Cookie (34)
Length: 10
Fast Open Cookie: 1e5dd82a8c492ca9

2. Host:
# cat /proc/sys/net/ipv4/tcp_fastopen_key:
107d7c5f-68eb2ac7-02fb06e6-ed341702

cookie key in tcp syn packets:
Fast Open Cookie
Kind: TCP Fast Open Cookie (34)
Length: 10
Fast Open Cookie: e213c02bf0afbc8a

Signed-off-by: Haishuang Yan 
---
 include/net/netns/ipv4.h   |  4 +++
 include/net/tcp.h  |  6 ++---
 net/ipv4/af_inet.c |  2 +-
 net/ipv4/sysctl_net_ipv4.c | 21 ---
 net/ipv4/tcp.c |  2 +-
 net/ipv4/tcp_fastopen.c| 64 +++---
 net/ipv4/tcp_ipv4.c|  6 +
 7 files changed, 70 insertions(+), 35 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ce6dde0..66b8335 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -36,6 +36,8 @@ struct inet_timewait_death_row {
int sysctl_max_tw_buckets;
 };
 
+struct tcp_fastopen_context;
+
 struct netns_ipv4 {
 #ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -128,6 +130,8 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
+   struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
+   spinlock_t tcp_fastopen_ctx_lock;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 061c128..e27bd18 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1556,13 +1556,13 @@ struct tcp_fastopen_request {
 };
 void tcp_free_fastopen_req(struct tcp_sock *tp);
 
-extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+void tcp_fastopen_ctx_destroy(struct net *net);
+int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len);
 void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
  struct request_sock *req,
  struct tcp_fastopen_cookie *foc);
-void tcp_fastopen_init_key_once(void);
+void tcp_fastopen_init_key_once(struct net *net);
 bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 struct tcp_fastopen_cookie *cookie);
 bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e73ce79..43a1bbe 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog)
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
-   tcp_fastopen_init_key_once();
+   tcp_fastopen_init_key_once(sock_net(sk));
}
 
err = inet_csk_listen_start(sk, backlog);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index f6324ea..20e19fe 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -251,10 +251,12 @@ static int proc_allowed_congestion_control(struct 
ctl_table *ctl,
return ret;
 }
 
-static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
+static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
 void __user *buffer, size_t *lenp,
 loff_t *ppos)
 {
+   struct net *net = container_of(table->data, struct net,
+   ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
int ret;
@@ -265,7 +267,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int 
write,
return -ENOMEM;
 
rcu_read_lock();
-   ctxt = rcu_dereference(tcp_fastopen_ctx);
+   ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
@@ -282,7 +284,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int 
write,
ret = -EINVAL;
goto bad_key;
}
-   tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+   tcp_fastopen_reset_cipher(net, user_key, 
TCP_FASTOPEN_KEY_LENGTH);

[PATCH v5 4/4] ipv4: Namespaceify tcp_fastopen_blackhole_timeout knob

2017-09-26 Thread Haishuang Yan
Different namespace application might require different time period in
second to disable Fastopen on active TCP sockets.

Tested:
Simulate following similar situation that the server's data gets dropped
after 3WHS.
C  syn-data ---> S
C <--- syn/ack - S
C  ack > S
S (accept & write)
C?  X <- data -- S
[retry and timeout]

And then print netstat of TCPFastOpenBlackhole, the counter increased as
expected when the firewall blackhole issue is detected and active TFO is
disabled.
# cat /proc/net/netstat | awk '{print $91}'
TCPFastOpenBlackhole
1

Signed-off-by: Haishuang Yan 
---
 include/net/netns/ipv4.h   |  3 +++
 net/ipv4/sysctl_net_ipv4.c | 20 +++-
 net/ipv4/tcp_fastopen.c| 30 +++---
 net/ipv4/tcp_ipv4.c|  2 ++
 4 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 66b8335..d76edde 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -132,6 +132,9 @@ struct netns_ipv4 {
int sysctl_tcp_fastopen;
struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
spinlock_t tcp_fastopen_ctx_lock;
+   unsigned int sysctl_tcp_fastopen_blackhole_timeout;
+   atomic_t tfo_active_disable_times;
+   unsigned long tfo_active_disable_stamp;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 20e19fe..cac8dd3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -355,11 +355,13 @@ static int proc_tfo_blackhole_detect_timeout(struct 
ctl_table *table,
 void __user *buffer,
 size_t *lenp, loff_t *ppos)
 {
+   struct net *net = container_of(table->data, struct net,
+   ipv4.sysctl_tcp_fastopen_blackhole_timeout);
int ret;
 
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
-   tcp_fastopen_active_timeout_reset();
+   atomic_set(>ipv4.tfo_active_disable_times, 0);
 
return ret;
 }
@@ -398,14 +400,6 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.proc_handler   = proc_dointvec
},
{
-   .procname   = "tcp_fastopen_blackhole_timeout_sec",
-   .data   = _tcp_fastopen_blackhole_timeout,
-   .maxlen = sizeof(int),
-   .mode   = 0644,
-   .proc_handler   = proc_tfo_blackhole_detect_timeout,
-   .extra1 = ,
-   },
-   {
.procname   = "tcp_abort_on_overflow",
.data   = _tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -1083,6 +1077,14 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
.maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
.proc_handler   = proc_tcp_fastopen_key,
},
+   {
+   .procname   = "tcp_fastopen_blackhole_timeout_sec",
+   .data   = 
_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_tfo_blackhole_detect_timeout,
+   .extra1 = ,
+   },
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname   = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 4eae44a..de470e7 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -422,25 +422,16 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
  * TFO connection with data exchanges.
  */
 
-/* Default to 1hr */
-unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
-static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
-static unsigned long tfo_active_disable_stamp __read_mostly;
-
 /* Disable active TFO and record current jiffies and
  * tfo_active_disable_times
  */
 void tcp_fastopen_active_disable(struct sock *sk)
 {
-   atomic_inc(_active_disable_times);
-   tfo_active_disable_stamp = jiffies;
-   NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
-}
+   struct net *net = sock_net(sk);
 
-/* Reset tfo_active_disable_times to 0 */
-void tcp_fastopen_active_timeout_reset(void)
-{
-   atomic_set(_active_disable_times, 0);
+   atomic_inc(>ipv4.tfo_active_disable_times);
+   net->ipv4.tfo_active_disable_stamp = jiffies;
+   NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
 }
 
 /* Calculate timeout for tfo active disable
@@ -449,17 +440,18 @@ void tcp_fastopen_active_timeout_reset(void)
  */
 bool tcp_fastopen_active_should_disable(struct sock *sk)
 {
-   int tfo_da_times = atomic_read(_active_disable_times);
-   int multiplier;
+   

[PATCH v5 3/4] ipv4: Namespaceify tcp_fastopen_key knob

2017-09-26 Thread Haishuang Yan
Different namespace application might require different tcp_fastopen_key
independently of the host.

David Miller pointed out there is a leak without releasing the context
of tcp_fastopen_key during netns teardown. So add the release action in
exit_batch path.

Tested:
1. Container namespace:
# cat /proc/sys/net/ipv4/tcp_fastopen_key:
2817fff2-f803cf97-eadfd1f3-78c0992b

cookie key in tcp syn packets:
Fast Open Cookie
Kind: TCP Fast Open Cookie (34)
Length: 10
Fast Open Cookie: 1e5dd82a8c492ca9

2. Host:
# cat /proc/sys/net/ipv4/tcp_fastopen_key:
107d7c5f-68eb2ac7-02fb06e6-ed341702

cookie key in tcp syn packets:
Fast Open Cookie
Kind: TCP Fast Open Cookie (34)
Length: 10
Fast Open Cookie: e213c02bf0afbc8a

Signed-off-by: Haishuang Yan 
---
 include/net/netns/ipv4.h   |  4 +++
 include/net/tcp.h  |  6 ++---
 net/ipv4/af_inet.c |  2 +-
 net/ipv4/sysctl_net_ipv4.c | 21 ---
 net/ipv4/tcp.c |  2 +-
 net/ipv4/tcp_fastopen.c| 64 +++---
 net/ipv4/tcp_ipv4.c|  6 +
 7 files changed, 70 insertions(+), 35 deletions(-)

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ce6dde0..66b8335 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -36,6 +36,8 @@ struct inet_timewait_death_row {
int sysctl_max_tw_buckets;
 };
 
+struct tcp_fastopen_context;
+
 struct netns_ipv4 {
 #ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -128,6 +130,8 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
int sysctl_tcp_fastopen;
+   struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
+   spinlock_t tcp_fastopen_ctx_lock;
 
 #ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 061c128..e27bd18 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1556,13 +1556,13 @@ struct tcp_fastopen_request {
 };
 void tcp_free_fastopen_req(struct tcp_sock *tp);
 
-extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+void tcp_fastopen_ctx_destroy(struct net *net);
+int tcp_fastopen_reset_cipher(struct net *net, void *key, unsigned int len);
 void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
  struct request_sock *req,
  struct tcp_fastopen_cookie *foc);
-void tcp_fastopen_init_key_once(void);
+void tcp_fastopen_init_key_once(struct net *net);
 bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
 struct tcp_fastopen_cookie *cookie);
 bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e73ce79..43a1bbe 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -222,7 +222,7 @@ int inet_listen(struct socket *sock, int backlog)
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
-   tcp_fastopen_init_key_once();
+   tcp_fastopen_init_key_once(sock_net(sk));
}
 
err = inet_csk_listen_start(sk, backlog);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index f6324ea..20e19fe 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -251,10 +251,12 @@ static int proc_allowed_congestion_control(struct 
ctl_table *ctl,
return ret;
 }
 
-static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
+static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
 void __user *buffer, size_t *lenp,
 loff_t *ppos)
 {
+   struct net *net = container_of(table->data, struct net,
+   ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
int ret;
@@ -265,7 +267,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int 
write,
return -ENOMEM;
 
rcu_read_lock();
-   ctxt = rcu_dereference(tcp_fastopen_ctx);
+   ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
@@ -282,7 +284,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int 
write,
ret = -EINVAL;
goto bad_key;
}
-   tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+   tcp_fastopen_reset_cipher(net, user_key, 
TCP_FASTOPEN_KEY_LENGTH);
}
 
 bad_key:
@@ -396,12 

Re: linux-next: build warning after merge of the scsi-mkp tree

2017-09-26 Thread Stephen Rothwell
Hi James,

On Tue, 26 Sep 2017 14:09:47 +1000 Stephen Rothwell  
wrote:
>
> After merging the scsi-mkp tree, today's linux-next build (x86_64
> allmodconfig) produced this warning:
> 
> drivers/scsi/hisi_sas/hisi_sas_main.c: In function 
> 'hisi_sas_controller_reset':
> drivers/scsi/hisi_sas/hisi_sas_main.c:1049:24: warning: unused variable 
> 'sas_ha' [-Wunused-variable]
>   struct sas_ha_struct *sas_ha = _hba->sha;
> ^
> 
> Introduced by commit
> 
>   042ebd293b86 ("scsi: libsas: kill useless ha_event and do some cleanup")

This warning now exists in the scsi tree.
-- 
Cheers,
Stephen Rothwell


Re: linux-next: build warning after merge of the scsi-mkp tree

2017-09-26 Thread Stephen Rothwell
Hi James,

On Tue, 26 Sep 2017 14:09:47 +1000 Stephen Rothwell  
wrote:
>
> After merging the scsi-mkp tree, today's linux-next build (x86_64
> allmodconfig) produced this warning:
> 
> drivers/scsi/hisi_sas/hisi_sas_main.c: In function 
> 'hisi_sas_controller_reset':
> drivers/scsi/hisi_sas/hisi_sas_main.c:1049:24: warning: unused variable 
> 'sas_ha' [-Wunused-variable]
>   struct sas_ha_struct *sas_ha = _hba->sha;
> ^
> 
> Introduced by commit
> 
>   042ebd293b86 ("scsi: libsas: kill useless ha_event and do some cleanup")

This warning now exists in the scsi tree.
-- 
Cheers,
Stephen Rothwell


Re: [PATCH v2 05/10] arm: dts: mt7623: update pio, usb and crypto nodes

2017-09-26 Thread Ryder Lee
On Tue, 2017-09-26 at 18:17 +0800, Yingjoe Chen wrote:
> On Tue, 2017-09-26 at 10:02 +0800, Ryder Lee wrote:
> > This patch updates pio, usb and crypto nodes to make them be consistent
> > with the binding documents.
> > 
> > Signed-off-by: Ryder Lee 
> > ---
> >  arch/arm/boot/dts/mt7623.dtsi | 52 
> > ++-
> >  1 file changed, 27 insertions(+), 25 deletions(-)
> > 
> > diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi
> > index 381843e..9ec3767 100644
> > --- a/arch/arm/boot/dts/mt7623.dtsi
> > +++ b/arch/arm/boot/dts/mt7623.dtsi
> > @@ -226,21 +226,6 @@
> > #reset-cells = <1>;
> > };
> >  
> > -   pio: pinctrl@10005000 {
> > -   compatible = "mediatek,mt7623-pinctrl",
> > -"mediatek,mt2701-pinctrl";
> > -   reg = <0 0x1000b000 0 0x1000>;
> > -   mediatek,pctl-regmap = <_pctl_a>;
> > -   pins-are-numbered;
> > -   gpio-controller;
> > -   #gpio-cells = <2>;
> > -   interrupt-controller;
> > -   interrupt-parent = <>;
> > -   #interrupt-cells = <2>;
> > -   interrupts = ,
> > -;
> > -   };
> > -
> > syscfg_pctl_a: syscfg@10005000 {
> > compatible = "mediatek,mt7623-pctl-a-syscfg", "syscon";
> > reg = <0 0x10005000 0 0x1000>;
> 
> 
> Hi Ryder,
> 
> pio node is special, it have 2 registers space.
> The address 10005000 is for PIO, it was accesed through syscfg_pctl_a
> regmap. The other one, 0x1000b000, is for EINT.
> 
> So,I think using address @10005000 is correct.
> 
> Joe.C
> 

Thanks for explanation. I'll revert this part to a previous state.

> 




Re: [PATCH v2 05/10] arm: dts: mt7623: update pio, usb and crypto nodes

2017-09-26 Thread Ryder Lee
On Tue, 2017-09-26 at 18:17 +0800, Yingjoe Chen wrote:
> On Tue, 2017-09-26 at 10:02 +0800, Ryder Lee wrote:
> > This patch updates pio, usb and crypto nodes to make them be consistent
> > with the binding documents.
> > 
> > Signed-off-by: Ryder Lee 
> > ---
> >  arch/arm/boot/dts/mt7623.dtsi | 52 
> > ++-
> >  1 file changed, 27 insertions(+), 25 deletions(-)
> > 
> > diff --git a/arch/arm/boot/dts/mt7623.dtsi b/arch/arm/boot/dts/mt7623.dtsi
> > index 381843e..9ec3767 100644
> > --- a/arch/arm/boot/dts/mt7623.dtsi
> > +++ b/arch/arm/boot/dts/mt7623.dtsi
> > @@ -226,21 +226,6 @@
> > #reset-cells = <1>;
> > };
> >  
> > -   pio: pinctrl@10005000 {
> > -   compatible = "mediatek,mt7623-pinctrl",
> > -"mediatek,mt2701-pinctrl";
> > -   reg = <0 0x1000b000 0 0x1000>;
> > -   mediatek,pctl-regmap = <_pctl_a>;
> > -   pins-are-numbered;
> > -   gpio-controller;
> > -   #gpio-cells = <2>;
> > -   interrupt-controller;
> > -   interrupt-parent = <>;
> > -   #interrupt-cells = <2>;
> > -   interrupts = ,
> > -;
> > -   };
> > -
> > syscfg_pctl_a: syscfg@10005000 {
> > compatible = "mediatek,mt7623-pctl-a-syscfg", "syscon";
> > reg = <0 0x10005000 0 0x1000>;
> 
> 
> Hi Ryder,
> 
> pio node is special, it have 2 registers space.
> The address 10005000 is for PIO, it was accesed through syscfg_pctl_a
> regmap. The other one, 0x1000b000, is for EINT.
> 
> So,I think using address @10005000 is correct.
> 
> Joe.C
> 

Thanks for explanation. I'll revert this part to a previous state.

> 




Re: pull-request: wireless-drivers 2017-09-25

2017-09-26 Thread David Miller
From: Kalle Valo 
Date: Mon, 25 Sep 2017 11:55:22 +0300

> here a pull request to net for 4.14, more info in the signed tag below.
> Please let me know if there are any problems.

Pulled, thanks Kalle.


Re: pull-request: wireless-drivers 2017-09-25

2017-09-26 Thread David Miller
From: Kalle Valo 
Date: Mon, 25 Sep 2017 11:55:22 +0300

> here a pull request to net for 4.14, more info in the signed tag below.
> Please let me know if there are any problems.

Pulled, thanks Kalle.


Re: [PATCH net-next 0/5] net: dsa: use generic slave phydev

2017-09-26 Thread David Miller
From: Vivien Didelot 
Date: Tue, 26 Sep 2017 17:15:30 -0400

> DSA currently stores a phy_device pointer in each slave private
> structure. This requires to implement our own ethtool ksettings
> accessors and such.
> 
> This patchset removes the private phy_device in favor of the one
> provided in the net_device structure, and thus allows us to use the
> generic phy_ethtool_* functions.

Series applied, thanks Vivien.


Re: [PATCH net-next 0/5] net: dsa: use generic slave phydev

2017-09-26 Thread David Miller
From: Vivien Didelot 
Date: Tue, 26 Sep 2017 17:15:30 -0400

> DSA currently stores a phy_device pointer in each slave private
> structure. This requires to implement our own ethtool ksettings
> accessors and such.
> 
> This patchset removes the private phy_device in favor of the one
> provided in the net_device structure, and thus allows us to use the
> generic phy_ethtool_* functions.

Series applied, thanks Vivien.


[PATCH] Call xen_cleanhighmap() with 4MB aligned for page tables mapping

2017-09-26 Thread Zhenzhong Duan
When bootup a PVM guest with large memory(Ex.240GB), XEN provided initial
mapping overlaps with kernel module virtual space. When mapping in this space
is cleared by xen_cleanhighmap(), in certain case there could be an 2MB mapping
left. This is due to XEN initialize 4MB aligned mapping but xen_cleanhighmap()
finish at 2MB boundary.

When module loading is just on top of the 2MB space, got below warning:

WARNING: at mm/vmalloc.c:106 vmap_pte_range+0x14e/0x190()
Call Trace:
 [] warn_alloc_failed+0xf3/0x160
 [] __vmalloc_area_node+0x182/0x1c0
 [] ? module_alloc_update_bounds+0x1e/0x80
 [] __vmalloc_node_range+0xa7/0x110
 [] ? module_alloc_update_bounds+0x1e/0x80
 [] module_alloc+0x64/0x70
 [] ? module_alloc_update_bounds+0x1e/0x80
 [] module_alloc_update_bounds+0x1e/0x80
 [] move_module+0x27/0x150
 [] layout_and_allocate+0x120/0x1b0
 [] load_module+0x78/0x640
 [] ? security_file_permission+0x8b/0x90
 [] sys_init_module+0x62/0x1e0
 [] system_call_fastpath+0x16/0x1b

Then the mapping of 2MB is cleared, finally oops when the page in that space is
accessed.

BUG: unable to handle kernel paging request at 88002260
IP: [] clear_page_c_e+0x7/0x10
PGD 1788067 PUD 178c067 PMD 22434067 PTE 0
Oops: 0002 [#1] SMP
Call Trace:
 [] ? prep_new_page+0x127/0x1c0
 [] get_page_from_freelist+0x1e2/0x550
 [] ? ii_iovec_copy_to_user+0x90/0x140
 [] __alloc_pages_nodemask+0x12d/0x230
 [] alloc_pages_vma+0xc6/0x1a0
 [] ? pte_mfn_to_pfn+0x7d/0x100
 [] do_anonymous_page+0x16b/0x350
 [] handle_pte_fault+0x1e4/0x200
 [] ? xen_pmd_val+0xe/0x10
 [] ? __raw_callee_save_xen_pmd_val+0x11/0x1e
 [] handle_mm_fault+0x15b/0x270
 [] do_page_fault+0x140/0x470
 [] page_fault+0x25/0x30

Call xen_cleanhighmap() with 4MB aligned for page tables mapping to fix it.
The unnecessory call of xen_cleanhighmap() in DEBUG mode is also removed.

References: https://lists.xen.org/archives/html/xen-devel/2012-07/msg01562.html
Signed-off-by: Zhenzhong Duan 
---
 arch/x86/xen/mmu_pv.c |   12 +++-
 1 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 7330cb3..3628d97 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1238,21 +1238,15 @@ static void __init xen_pagetable_cleanhighmap(void)
 * from _brk_limit way up to the max_pfn_mapped (which is the end of
 * the ramdisk). We continue on, erasing PMD entries that point to page
 * tables - do note that they are accessible at this stage via __va.
-* For good measure we also round up to the PMD - which means that if
+* For good measure we also round up to the PMD*2 - which means that if
 * anybody is using __ka address to the initial boot-stack - and try
 * to use it - they are going to crash. The xen_start_info has been
 * taken care of already in xen_setup_kernel_pagetable. */
addr = xen_start_info->pt_base;
-   size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
+   size = xen_start_info->nr_pt_frames * PAGE_SIZE;
 
-   xen_cleanhighmap(addr, addr + size);
+   xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE*2));
xen_start_info->pt_base = (unsigned 
long)__va(__pa(xen_start_info->pt_base));
-#ifdef DEBUG
-   /* This is superfluous and is not necessary, but you know what
-* lets do it. The MODULES_VADDR -> MODULES_END should be clear of
-* anything at this stage. */
-   xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
-#endif
 }
 #endif
 
-- 
1.7.3


[PATCH] Call xen_cleanhighmap() with 4MB aligned for page tables mapping

2017-09-26 Thread Zhenzhong Duan
When bootup a PVM guest with large memory(Ex.240GB), XEN provided initial
mapping overlaps with kernel module virtual space. When mapping in this space
is cleared by xen_cleanhighmap(), in certain case there could be an 2MB mapping
left. This is due to XEN initialize 4MB aligned mapping but xen_cleanhighmap()
finish at 2MB boundary.

When module loading is just on top of the 2MB space, got below warning:

WARNING: at mm/vmalloc.c:106 vmap_pte_range+0x14e/0x190()
Call Trace:
 [] warn_alloc_failed+0xf3/0x160
 [] __vmalloc_area_node+0x182/0x1c0
 [] ? module_alloc_update_bounds+0x1e/0x80
 [] __vmalloc_node_range+0xa7/0x110
 [] ? module_alloc_update_bounds+0x1e/0x80
 [] module_alloc+0x64/0x70
 [] ? module_alloc_update_bounds+0x1e/0x80
 [] module_alloc_update_bounds+0x1e/0x80
 [] move_module+0x27/0x150
 [] layout_and_allocate+0x120/0x1b0
 [] load_module+0x78/0x640
 [] ? security_file_permission+0x8b/0x90
 [] sys_init_module+0x62/0x1e0
 [] system_call_fastpath+0x16/0x1b

Then the mapping of 2MB is cleared, finally oops when the page in that space is
accessed.

BUG: unable to handle kernel paging request at 88002260
IP: [] clear_page_c_e+0x7/0x10
PGD 1788067 PUD 178c067 PMD 22434067 PTE 0
Oops: 0002 [#1] SMP
Call Trace:
 [] ? prep_new_page+0x127/0x1c0
 [] get_page_from_freelist+0x1e2/0x550
 [] ? ii_iovec_copy_to_user+0x90/0x140
 [] __alloc_pages_nodemask+0x12d/0x230
 [] alloc_pages_vma+0xc6/0x1a0
 [] ? pte_mfn_to_pfn+0x7d/0x100
 [] do_anonymous_page+0x16b/0x350
 [] handle_pte_fault+0x1e4/0x200
 [] ? xen_pmd_val+0xe/0x10
 [] ? __raw_callee_save_xen_pmd_val+0x11/0x1e
 [] handle_mm_fault+0x15b/0x270
 [] do_page_fault+0x140/0x470
 [] page_fault+0x25/0x30

Call xen_cleanhighmap() with 4MB aligned for page tables mapping to fix it.
The unnecessory call of xen_cleanhighmap() in DEBUG mode is also removed.

References: https://lists.xen.org/archives/html/xen-devel/2012-07/msg01562.html
Signed-off-by: Zhenzhong Duan 
---
 arch/x86/xen/mmu_pv.c |   12 +++-
 1 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 7330cb3..3628d97 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1238,21 +1238,15 @@ static void __init xen_pagetable_cleanhighmap(void)
 * from _brk_limit way up to the max_pfn_mapped (which is the end of
 * the ramdisk). We continue on, erasing PMD entries that point to page
 * tables - do note that they are accessible at this stage via __va.
-* For good measure we also round up to the PMD - which means that if
+* For good measure we also round up to the PMD*2 - which means that if
 * anybody is using __ka address to the initial boot-stack - and try
 * to use it - they are going to crash. The xen_start_info has been
 * taken care of already in xen_setup_kernel_pagetable. */
addr = xen_start_info->pt_base;
-   size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
+   size = xen_start_info->nr_pt_frames * PAGE_SIZE;
 
-   xen_cleanhighmap(addr, addr + size);
+   xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE*2));
xen_start_info->pt_base = (unsigned 
long)__va(__pa(xen_start_info->pt_base));
-#ifdef DEBUG
-   /* This is superfluous and is not necessary, but you know what
-* lets do it. The MODULES_VADDR -> MODULES_END should be clear of
-* anything at this stage. */
-   xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
-#endif
 }
 #endif
 
-- 
1.7.3


[PATCH] HID: hid-multitouch: support fine-grain orientation reporting

2017-09-26 Thread Wei-Ning Huang
The current hid-multitouch driver only allow the report of two
orientations, vertical and horizontal. We use the Azimuth orientation
usage 0x5b under the Digitizer usage page to report orientation directly
from the hid device. A new quirk MT_QUIRK_REPORT_ORIENTATION is added so
user can enable this only if their device supports it.

Signed-off-by: Wei-Ning Huang 
Reviewed-by: Dmitry Torokhov 
---
 drivers/hid/hid-multitouch.c | 38 --
 include/linux/hid.h  |  1 +
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 440b999304a5..4663bf4b2892 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -84,11 +84,12 @@ MODULE_LICENSE("GPL");
 #define MT_IO_FLAGS_PENDING_SLOTS  2
 
 struct mt_slot {
-   __s32 x, y, cx, cy, p, w, h;
+   __s32 x, y, cx, cy, p, w, h, a;
__s32 contactid;/* the device ContactID assigned to this slot */
bool touch_state;   /* is the touch valid? */
bool inrange_state; /* is the finger in proximity of the sensor? */
bool confidence_state;  /* is the touch made by a finger? */
+   bool has_azimuth;   /* the contact reports azimuth */
 };
 
 struct mt_class {
@@ -591,6 +592,23 @@ static int mt_touch_input_mapping(struct hid_device *hdev, 
struct hid_input *hi,
td->cc_index = field->index;
td->cc_value_index = usage->usage_index;
return 1;
+   case HID_DG_AZIMUTH:
+   hid_map_usage(hi, usage, bit, max,
+   EV_ABS, ABS_MT_ORIENTATION);
+   /*
+* Azimuth has the range of [0, MAX) representing a full
+* revolution. Set ABS_MT_ORIENTATION to a quarter of
+* MAX according the definition of ABS_MT_ORIENTATION
+*/
+   input_set_abs_params(hi->input, ABS_MT_ORIENTATION,
+   0, field->logical_maximum / 4,
+   cls->sn_move ?
+   field->logical_maximum / cls->sn_move : 0, 0);
+   input_abs_set_res(hi->input, ABS_MT_ORIENTATION,
+   hidinput_calc_abs_res(field,
+   ABS_MT_ORIENTATION));
+   mt_store_field(usage, td, hi);
+   return 1;
case HID_DG_CONTACTMAX:
/* we don't set td->last_slot_field as contactcount and
 * contact max are global to the report */
@@ -683,6 +701,10 @@ static void mt_complete_slot(struct mt_device *td, struct 
input_dev *input)
int wide = (s->w > s->h);
int major = max(s->w, s->h);
int minor = min(s->w, s->h);
+   int orientation = wide;
+
+   if (s->has_azimuth)
+   orientation = s->a;
 
/*
 * divided by two to match visual scale of touch
@@ -699,7 +721,8 @@ static void mt_complete_slot(struct mt_device *td, struct 
input_dev *input)
input_event(input, EV_ABS, ABS_MT_TOOL_Y, s->cy);
input_event(input, EV_ABS, ABS_MT_DISTANCE,
!s->touch_state);
-   input_event(input, EV_ABS, ABS_MT_ORIENTATION, wide);
+   input_event(input, EV_ABS, ABS_MT_ORIENTATION,
+   orientation);
input_event(input, EV_ABS, ABS_MT_PRESSURE, s->p);
input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);
@@ -789,6 +812,17 @@ static void mt_process_mt_event(struct hid_device *hid, 
struct hid_field *field,
break;
case HID_DG_CONTACTCOUNT:
break;
+   case HID_DG_AZIMUTH:
+   /*
+* Azimuth is counter-clockwise and ranges from [0, MAX)
+* (a full revolution). Convert it to clockwise ranging
+* [-MAX/2, MAX/2].
+*/
+   if (value > field->logical_maximum / 2)
+   value -= field->logical_maximum;
+   td->curdata.a = -value;
+   td->curdata.has_azimuth = true;
+   break;
case HID_DG_TOUCH:
/* do nothing */
break;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index ab05a86269dc..d81b9b6fd83a 100644
--- 

[PATCH] HID: hid-multitouch: support fine-grain orientation reporting

2017-09-26 Thread Wei-Ning Huang
The current hid-multitouch driver only allow the report of two
orientations, vertical and horizontal. We use the Azimuth orientation
usage 0x5b under the Digitizer usage page to report orientation directly
from the hid device. A new quirk MT_QUIRK_REPORT_ORIENTATION is added so
user can enable this only if their device supports it.

Signed-off-by: Wei-Ning Huang 
Reviewed-by: Dmitry Torokhov 
---
 drivers/hid/hid-multitouch.c | 38 --
 include/linux/hid.h  |  1 +
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 440b999304a5..4663bf4b2892 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -84,11 +84,12 @@ MODULE_LICENSE("GPL");
 #define MT_IO_FLAGS_PENDING_SLOTS  2
 
 struct mt_slot {
-   __s32 x, y, cx, cy, p, w, h;
+   __s32 x, y, cx, cy, p, w, h, a;
__s32 contactid;/* the device ContactID assigned to this slot */
bool touch_state;   /* is the touch valid? */
bool inrange_state; /* is the finger in proximity of the sensor? */
bool confidence_state;  /* is the touch made by a finger? */
+   bool has_azimuth;   /* the contact reports azimuth */
 };
 
 struct mt_class {
@@ -591,6 +592,23 @@ static int mt_touch_input_mapping(struct hid_device *hdev, 
struct hid_input *hi,
td->cc_index = field->index;
td->cc_value_index = usage->usage_index;
return 1;
+   case HID_DG_AZIMUTH:
+   hid_map_usage(hi, usage, bit, max,
+   EV_ABS, ABS_MT_ORIENTATION);
+   /*
+* Azimuth has the range of [0, MAX) representing a full
+* revolution. Set ABS_MT_ORIENTATION to a quarter of
+* MAX according the definition of ABS_MT_ORIENTATION
+*/
+   input_set_abs_params(hi->input, ABS_MT_ORIENTATION,
+   0, field->logical_maximum / 4,
+   cls->sn_move ?
+   field->logical_maximum / cls->sn_move : 0, 0);
+   input_abs_set_res(hi->input, ABS_MT_ORIENTATION,
+   hidinput_calc_abs_res(field,
+   ABS_MT_ORIENTATION));
+   mt_store_field(usage, td, hi);
+   return 1;
case HID_DG_CONTACTMAX:
/* we don't set td->last_slot_field as contactcount and
 * contact max are global to the report */
@@ -683,6 +701,10 @@ static void mt_complete_slot(struct mt_device *td, struct 
input_dev *input)
int wide = (s->w > s->h);
int major = max(s->w, s->h);
int minor = min(s->w, s->h);
+   int orientation = wide;
+
+   if (s->has_azimuth)
+   orientation = s->a;
 
/*
 * divided by two to match visual scale of touch
@@ -699,7 +721,8 @@ static void mt_complete_slot(struct mt_device *td, struct 
input_dev *input)
input_event(input, EV_ABS, ABS_MT_TOOL_Y, s->cy);
input_event(input, EV_ABS, ABS_MT_DISTANCE,
!s->touch_state);
-   input_event(input, EV_ABS, ABS_MT_ORIENTATION, wide);
+   input_event(input, EV_ABS, ABS_MT_ORIENTATION,
+   orientation);
input_event(input, EV_ABS, ABS_MT_PRESSURE, s->p);
input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, major);
input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, minor);
@@ -789,6 +812,17 @@ static void mt_process_mt_event(struct hid_device *hid, 
struct hid_field *field,
break;
case HID_DG_CONTACTCOUNT:
break;
+   case HID_DG_AZIMUTH:
+   /*
+* Azimuth is counter-clockwise and ranges from [0, MAX)
+* (a full revolution). Convert it to clockwise ranging
+* [-MAX/2, MAX/2].
+*/
+   if (value > field->logical_maximum / 2)
+   value -= field->logical_maximum;
+   td->curdata.a = -value;
+   td->curdata.has_azimuth = true;
+   break;
case HID_DG_TOUCH:
/* do nothing */
break;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index ab05a86269dc..d81b9b6fd83a 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -281,6 

Re: [PATCH] net/ipv4: Update sk_for_each_entry_offset_rcu macro to utilize rcu methods hlist_next_rcu. This fixes the warnings thrown by sparse regarding net/ipv4/udp.c on line 1974.

2017-09-26 Thread David Miller
From: Tim Hansen 
Date: Tue, 26 Sep 2017 20:54:05 -0400

> Signed-off-by: Tim Hansen 

This is a poor patch submission on many levels.

But the main problem, is that there is no use of
sk_for_each_entry_offset_rcu() in any of my networking kernel trees.

Referencing code by line number never works, you have to mention
what version of the kernel, what tree, and where in what fucntion
the problem is occurring.

Secondly, sk_for_each_entry_offset_rcu() is not meant to be used
in _raw() contexts.  This is why it's not called
sk_for_each_entry_offset_rcu_raw().

The sparse warning is probably legitimate, and points to a bug.

But nobody can tell where becuase you haven't told us what tree
and where this happens.


Re: [PATCH] net/ipv4: Update sk_for_each_entry_offset_rcu macro to utilize rcu methods hlist_next_rcu. This fixes the warnings thrown by sparse regarding net/ipv4/udp.c on line 1974.

2017-09-26 Thread David Miller
From: Tim Hansen 
Date: Tue, 26 Sep 2017 20:54:05 -0400

> Signed-off-by: Tim Hansen 

This is a poor patch submission on many levels.

But the main problem, is that there is no use of
sk_for_each_entry_offset_rcu() in any of my networking kernel trees.

Referencing code by line number never works, you have to mention
what version of the kernel, what tree, and where in what fucntion
the problem is occurring.

Secondly, sk_for_each_entry_offset_rcu() is not meant to be used
in _raw() contexts.  This is why it's not called
sk_for_each_entry_offset_rcu_raw().

The sparse warning is probably legitimate, and points to a bug.

But nobody can tell where becuase you haven't told us what tree
and where this happens.


Re: [PATCH for-next 05/20] RDMA/hns: Add command queue support for hip08 RoCE driver

2017-09-26 Thread Wei Hu (Xavier)



On 2017/9/27 0:18, Doug Ledford wrote:

On 9/26/2017 9:13 AM, Wei Hu (Xavier) wrote:


On 2017/9/26 1:36, Doug Ledford wrote:

On Mon, 2017-09-25 at 20:18 +0300, Leon Romanovsky wrote:

On Mon, Sep 25, 2017 at 01:06:53PM -0400, Doug Ledford wrote:

On Wed, 2017-08-30 at 17:23 +0800, Wei Hu (Xavier) wrote:


+    /*
+ * If the command is sync, wait for the firmware to
write
back,
+ * if multi descriptors to be sent, use the first one to
check
+ */
+    if ((desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
+    do {
+    if (hns_roce_cmq_csq_done(hr_dev))
+    break;
+    usleep_range(1000, 2000);
+    timeout++;
+    } while (timeout < priv->cmq.tx_timeout);
+    }

then we spin here for a maximum amount of time between 200 and
400ms,
so 1/4 to 1/2 a second.  All the time we are holding the bh lock on
this CPU.  That seems excessive to me.  If we are going to spin
that
long, can you find a way to allocate/reserve your resources, send
the
command, then drop the bh lock while you spin, and retake it before
you
complete once the spinning is done?

They don't allocate anything in this loop, but checking the pointers
are
the same, see hns_roce_cmq_csq_done.

I'm not sure I understand your intended implication of your comment.  I
wasn't concerned about them allocating anything, only that if the
hardware is hung, then this loop will hang out for 1/4 to 1/2 a second
and hold up all bottom half processing on this CPU in the meantime.
That's the sort of things that provides poor overall system behavior.

Now, since they are really only checking to see if the hardware has
gotten around to their particular command, and their command is part of
a ring structure, it's possible to record the original head command,
and our new head command, and then release the spin_lock_bh around the
entire do{ }while construct, and in hns_roce_cmd_csq_done() you could
check that head is not in the range old_head:new_head.  That would
protect you in case something in the bottom half processing queued up
some more commands and from one sleep to the next the head jumped from
something other than the new_head to something past new_head, so that
head == priv->cmq.csq.next_to_use ends up being perpetually false.
But, that's just from a quick read of the code, I could easily be
missing something here...

Hi, Doug
     Driver issues the cmds in cmq, and firmware gets and processes them.
     The firmware process only one cmd at the same time, and it will take
     about serveral to 200 us in one cmd currently, so the driver need
     not to use stream mode to issue cmd.

I'm not sure I understand your response here.

I get that the driver issues cmds in the cmq, and that the firmware gets
them and processes them.

I get that the firmware will only work on one command at a time and only
move to the next one once the current one is complete.

I get that commands take anywhere from a few usec to a couple hundred usec.

I also get that because you are sleeping for somewhere in between 1000
and 2000 usecs, that the driver could easily finish a whole slew of
commands.  It could do 10 slow commands, or 100 or more fast commands.
What this tells me is that the only reason your current implementation
of hns_roce_cmq_csq_done() works at all is because you keep the device
locked out from any other commands being put on the queue.  As far as I
can tell, that's the only way you can guarantee that at some point you
will wake up and the head pointer will be exactly at csq->next_to_use.
Otherwise, if you didn't block them out, then you could sleep with the
head pointer before csq->next_to_use and wake up the next time with it
already well past csq->next_to_use.  Am I right about that?  While you
are waiting on this command queue, any other commands are blocked from
being placed on the command queue?

Hi, Doug,
you are right.
And one "hns_x" ib device only has one command queue in hip08,
other commands will be blocked when waiting on the command queue.


I don't understand what you mean by "so the driver need not to use
stream mode to issue cmd".

Sorry, my expression error.
stream -> pipeline

And if you argee, after this patchset has been accepted we will send a 
following up patch :

    In hns_roce_cmq_send function, replace
        usleep_range(1000, 2000);
    with the following statement:
 udelay(1);
    And if so, we can avoid using usleep_range function in spin_lock_bh 
spin region,

    because it probally cause calltrace.

    Best regards
Wei Hu

     Regards
Wei Hu

+#define HNS_ROCE_CMQ_TX_TIMEOUT    200

or you could reduce the size of this define...

--
Doug Ledford 
  GPG KeyID: B826A3330E572FDD
  Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57
2FDD

--
To unsubscribe from this list: send the line "unsubscribe linux-
rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

Re: [PATCH for-next 05/20] RDMA/hns: Add command queue support for hip08 RoCE driver

2017-09-26 Thread Wei Hu (Xavier)



On 2017/9/27 0:18, Doug Ledford wrote:

On 9/26/2017 9:13 AM, Wei Hu (Xavier) wrote:


On 2017/9/26 1:36, Doug Ledford wrote:

On Mon, 2017-09-25 at 20:18 +0300, Leon Romanovsky wrote:

On Mon, Sep 25, 2017 at 01:06:53PM -0400, Doug Ledford wrote:

On Wed, 2017-08-30 at 17:23 +0800, Wei Hu (Xavier) wrote:


+    /*
+ * If the command is sync, wait for the firmware to
write
back,
+ * if multi descriptors to be sent, use the first one to
check
+ */
+    if ((desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) {
+    do {
+    if (hns_roce_cmq_csq_done(hr_dev))
+    break;
+    usleep_range(1000, 2000);
+    timeout++;
+    } while (timeout < priv->cmq.tx_timeout);
+    }

then we spin here for a maximum amount of time between 200 and
400ms,
so 1/4 to 1/2 a second.  All the time we are holding the bh lock on
this CPU.  That seems excessive to me.  If we are going to spin
that
long, can you find a way to allocate/reserve your resources, send
the
command, then drop the bh lock while you spin, and retake it before
you
complete once the spinning is done?

They don't allocate anything in this loop, but checking the pointers
are
the same, see hns_roce_cmq_csq_done.

I'm not sure I understand your intended implication of your comment.  I
wasn't concerned about them allocating anything, only that if the
hardware is hung, then this loop will hang out for 1/4 to 1/2 a second
and hold up all bottom half processing on this CPU in the meantime.
That's the sort of things that provides poor overall system behavior.

Now, since they are really only checking to see if the hardware has
gotten around to their particular command, and their command is part of
a ring structure, it's possible to record the original head command,
and our new head command, and then release the spin_lock_bh around the
entire do{ }while construct, and in hns_roce_cmd_csq_done() you could
check that head is not in the range old_head:new_head.  That would
protect you in case something in the bottom half processing queued up
some more commands and from one sleep to the next the head jumped from
something other than the new_head to something past new_head, so that
head == priv->cmq.csq.next_to_use ends up being perpetually false.
But, that's just from a quick read of the code, I could easily be
missing something here...

Hi, Doug
     Driver issues the cmds in cmq, and firmware gets and processes them.
     The firmware process only one cmd at the same time, and it will take
     about serveral to 200 us in one cmd currently, so the driver need
     not to use stream mode to issue cmd.

I'm not sure I understand your response here.

I get that the driver issues cmds in the cmq, and that the firmware gets
them and processes them.

I get that the firmware will only work on one command at a time and only
move to the next one once the current one is complete.

I get that commands take anywhere from a few usec to a couple hundred usec.

I also get that because you are sleeping for somewhere in between 1000
and 2000 usecs, that the driver could easily finish a whole slew of
commands.  It could do 10 slow commands, or 100 or more fast commands.
What this tells me is that the only reason your current implementation
of hns_roce_cmq_csq_done() works at all is because you keep the device
locked out from any other commands being put on the queue.  As far as I
can tell, that's the only way you can guarantee that at some point you
will wake up and the head pointer will be exactly at csq->next_to_use.
Otherwise, if you didn't block them out, then you could sleep with the
head pointer before csq->next_to_use and wake up the next time with it
already well past csq->next_to_use.  Am I right about that?  While you
are waiting on this command queue, any other commands are blocked from
being placed on the command queue?

Hi, Doug,
you are right.
And one "hns_x" ib device only has one command queue in hip08,
other commands will be blocked when waiting on the command queue.


I don't understand what you mean by "so the driver need not to use
stream mode to issue cmd".

Sorry, my expression error.
stream -> pipeline

And if you argee, after this patchset has been accepted we will send a 
following up patch :

    In hns_roce_cmq_send function, replace
        usleep_range(1000, 2000);
    with the following statement:
 udelay(1);
    And if so, we can avoid using usleep_range function in spin_lock_bh 
spin region,

    because it probally cause calltrace.

    Best regards
Wei Hu

     Regards
Wei Hu

+#define HNS_ROCE_CMQ_TX_TIMEOUT    200

or you could reduce the size of this define...

--
Doug Ledford 
  GPG KeyID: B826A3330E572FDD
  Key fingerprint = AE6B 1BDA 122B 23B4 265B  1274 B826 A333 0E57
2FDD

--
To unsubscribe from this list: send the line "unsubscribe linux-
rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html








Re: [PATCH] net: stmmac: Meet alignment requirements for DMA

2017-09-26 Thread David Miller
From: Paul Burton 
Date: Tue, 26 Sep 2017 14:30:33 -0700

> I'd suggest that at a minimum if you're unwilling to obey the API as
> described in Documentation/DMA-API.txt then it would be beneficial
> if you could propose a change to it such that it works for you, and
> perhaps we can extend the API & its documentation to allow your
> usage whilst also allowing us to catch broken uses.

The networking driver code works fine as is.

I also didn't write that ill-advised documentation in the DMA docs,
nor the non-merged new MIPS assertion.

So I'm trying to figure out on what basis I am required to do
anything.

Thank you.



Re: [PATCH] net: stmmac: Meet alignment requirements for DMA

2017-09-26 Thread David Miller
From: Paul Burton 
Date: Tue, 26 Sep 2017 14:30:33 -0700

> I'd suggest that at a minimum if you're unwilling to obey the API as
> described in Documentation/DMA-API.txt then it would be beneficial
> if you could propose a change to it such that it works for you, and
> perhaps we can extend the API & its documentation to allow your
> usage whilst also allowing us to catch broken uses.

The networking driver code works fine as is.

I also didn't write that ill-advised documentation in the DMA docs,
nor the non-merged new MIPS assertion.

So I'm trying to figure out on what basis I am required to do
anything.

Thank you.



[PATCH v7 0/2] gpio: uniphier: UniPhier GPIO driver

2017-09-26 Thread Masahiro Yamada

This series adds UniPhier GPIO driver.

The interrupt controller part is implemented by using hierarchy irqdomain.

IMHO, the problem of the hierarchy irqdomain is that drivers must hard-code
the fwspec of the interrupt parent.  We will never know the DT binding of
the parent unless we parse #interrupt-cell, etc.

I asked about this:
https://lkml.org/lkml/2017/7/6/758

Apparently, the current kernel does not provide a systematic way to
describe it.

In v1-v3, I hard-coded the parent hwirq numbers in the driver because
irqchip drivers are forced to hard-code more or less about the parent.
This was not accepted by Linus Walleij.

In v4, I tried to use the new API irq_domain_push_irq().  I needed to
change the irqdomain framework to make it work for DT, but seemed
controversial in the irqdomain subsystem review.

In v5, I tried another solution.  At first I thought it worked, but
I found a dead-lock if an irq is disposed from the .alloc hook.

After I considered more, I thought "interrupts" property does not
make much sense here because the last cell (which usually specifies
the trigger type) is useless for the hierarchy irqdomain.

I decided to use a vendor-specific property.  This is what some drivers
actually do.


Changes in v7:
  - Rename uniphier_gpio_irq_get_hwirq()
(I just missed to "git commit" before "git send-email")

Changes in v6:
  - Add "socionext,interrupt-ranges"

Changes in v5:
  - Split into a separate patch for DT binding
  - Add a new patch to export of_phandle_args_to_fwspec
  - Split DT binding into a separate file
  - v4 depends on some patches that change irq_domain_push_irq(), but
they got negative feedback in the irqdomain subsystem review.
Yet another approach here.  Parse "interrupts" property in
.alloc() hook.  If the parent IRQ is already mapped, dispose it
and re-alloc in irqdomain manner.

Changes in v4:
  - Add COMPILE_TEST and select IRQ_DOMAIN_HIERARCHY
  - Reimplement irqchip part by using irq_domain_push_irq()

Changes in v3:
  - Add .irq_set_affinity() hook
  - Use irq_domain_create_hierarchy() instead of legacy
irq_domain_add_hierarchy()

Changes in v2:
  - Remove +32 offset for parent interrupts to follow the GIC
binding convention
  - Let uniphier_gpio_irq_alloc() fail if nr_irqs != 1
  - Allocate gpio_chip statically because just one instance is
supported
  - Fix suspend and resume hooks

Masahiro Yamada (2):
  dt-bindings: gpio: uniphier: add UniPhier GPIO binding
  gpio: uniphier: add UniPhier GPIO controller driver

 .../devicetree/bindings/gpio/gpio-uniphier.txt |  40 ++
 MAINTAINERS|   1 +
 drivers/gpio/Kconfig   |   8 +
 drivers/gpio/Makefile  |   1 +
 drivers/gpio/gpio-uniphier.c   | 507 +
 include/dt-bindings/gpio/uniphier-gpio.h   |  18 +
 6 files changed, 575 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
 create mode 100644 drivers/gpio/gpio-uniphier.c
 create mode 100644 include/dt-bindings/gpio/uniphier-gpio.h

-- 
2.7.4



[PATCH v7 0/2] gpio: uniphier: UniPhier GPIO driver

2017-09-26 Thread Masahiro Yamada

This series adds UniPhier GPIO driver.

The interrupt controller part is implemented by using hierarchy irqdomain.

IMHO, the problem of the hierarchy irqdomain is that drivers must hard-code
the fwspec of the interrupt parent.  We will never know the DT binding of
the parent unless we parse #interrupt-cell, etc.

I asked about this:
https://lkml.org/lkml/2017/7/6/758

Apparently, the current kernel does not provide a systematic way to
describe it.

In v1-v3, I hard-coded the parent hwirq numbers in the driver because
irqchip drivers are forced to hard-code more or less about the parent.
This was not accepted by Linus Walleij.

In v4, I tried to use the new API irq_domain_push_irq().  I needed to
change the irqdomain framework to make it work for DT, but seemed
controversial in the irqdomain subsystem review.

In v5, I tried another solution.  At first I thought it worked, but
I found a dead-lock if an irq is disposed from the .alloc hook.

After I considered more, I thought "interrupts" property does not
make much sense here because the last cell (which usually specifies
the trigger type) is useless for the hierarchy irqdomain.

I decided to use a vendor-specific property.  This is what some drivers
actually do.


Changes in v7:
  - Rename uniphier_gpio_irq_get_hwirq()
(I just missed to "git commit" before "git send-email")

Changes in v6:
  - Add "socionext,interrupt-ranges"

Changes in v5:
  - Split into a separate patch for DT binding
  - Add a new patch to export of_phandle_args_to_fwspec
  - Split DT binding into a separate file
  - v4 depends on some patches that change irq_domain_push_irq(), but
they got negative feedback in the irqdomain subsystem review.
Yet another approach here.  Parse "interrupts" property in
.alloc() hook.  If the parent IRQ is already mapped, dispose it
and re-alloc in irqdomain manner.

Changes in v4:
  - Add COMPILE_TEST and select IRQ_DOMAIN_HIERARCHY
  - Reimplement irqchip part by using irq_domain_push_irq()

Changes in v3:
  - Add .irq_set_affinity() hook
  - Use irq_domain_create_hierarchy() instead of legacy
irq_domain_add_hierarchy()

Changes in v2:
  - Remove +32 offset for parent interrupts to follow the GIC
binding convention
  - Let uniphier_gpio_irq_alloc() fail if nr_irqs != 1
  - Allocate gpio_chip statically because just one instance is
supported
  - Fix suspend and resume hooks

Masahiro Yamada (2):
  dt-bindings: gpio: uniphier: add UniPhier GPIO binding
  gpio: uniphier: add UniPhier GPIO controller driver

 .../devicetree/bindings/gpio/gpio-uniphier.txt |  40 ++
 MAINTAINERS|   1 +
 drivers/gpio/Kconfig   |   8 +
 drivers/gpio/Makefile  |   1 +
 drivers/gpio/gpio-uniphier.c   | 507 +
 include/dt-bindings/gpio/uniphier-gpio.h   |  18 +
 6 files changed, 575 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
 create mode 100644 drivers/gpio/gpio-uniphier.c
 create mode 100644 include/dt-bindings/gpio/uniphier-gpio.h

-- 
2.7.4



[PATCH v7 2/2] gpio: uniphier: add UniPhier GPIO controller driver

2017-09-26 Thread Masahiro Yamada
This GPIO controller is used on UniPhier SoC family.

It also serves as an interrupt controller, but interrupt signals are
just delivered to the parent irqchip without any latching or OR'ing.
This type of hardware can be well described with hierarchy IRQ domain.

One unfortunate thing for this device is that the interrupt mapping to
the interrupt parent is not contiguous.

I asked how DT can describe interrupt mapping between two irqchips [1],
but I could not find a good solution (at least in the framework level).
In fact, irqchip drivers using hierarchy domain generally hard-code the
DT binding of their parent.

After tackling on several approaches such as hard-code of hwirqs,
irq_domain_push_irq(), I ended up with a vendor specific property.
If we come up with a good idea to support this in the framework, we
can migrate over to it, but we can live with a driver-level solution
for now.

[1] https://lkml.org/lkml/2017/7/6/758

Signed-off-by: Masahiro Yamada 
---


 MAINTAINERS  |   1 +
 drivers/gpio/Kconfig |   8 +
 drivers/gpio/Makefile|   1 +
 drivers/gpio/gpio-uniphier.c | 507 +++
 include/dt-bindings/gpio/uniphier-gpio.h |  18 ++
 5 files changed, 535 insertions(+)
 create mode 100644 drivers/gpio/gpio-uniphier.c
 create mode 100644 include/dt-bindings/gpio/uniphier-gpio.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 6671f37..47cfca7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2036,6 +2036,7 @@ F:arch/arm/mm/cache-uniphier.c
 F: arch/arm64/boot/dts/socionext/
 F: drivers/bus/uniphier-system-bus.c
 F: drivers/clk/uniphier/
+F: drivers/gpio/gpio-uniphier.c
 F: drivers/i2c/busses/i2c-uniphier*
 F: drivers/irqchip/irq-uniphier-aidet.c
 F: drivers/pinctrl/uniphier/
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 3f80f16..25c0f308 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -475,6 +475,14 @@ config GPIO_TZ1090_PDC
help
  Say yes here to support Toumaz Xenif TZ1090 PDC GPIOs.
 
+config GPIO_UNIPHIER
+   tristate "UniPhier GPIO support"
+   depends on ARCH_UNIPHIER || COMPILE_TEST
+   depends on OF_GPIO
+   select IRQ_DOMAIN_HIERARCHY
+   help
+ Say yes here to support UniPhier GPIOs.
+
 config GPIO_VF610
def_bool y
depends on ARCH_MXC && SOC_VF610
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index aeb70e9d..472f675 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -131,6 +131,7 @@ obj-$(CONFIG_GPIO_TWL6040)  += gpio-twl6040.o
 obj-$(CONFIG_GPIO_TZ1090)  += gpio-tz1090.o
 obj-$(CONFIG_GPIO_TZ1090_PDC)  += gpio-tz1090-pdc.o
 obj-$(CONFIG_GPIO_UCB1400) += gpio-ucb1400.o
+obj-$(CONFIG_GPIO_UNIPHIER)+= gpio-uniphier.o
 obj-$(CONFIG_GPIO_VF610)   += gpio-vf610.o
 obj-$(CONFIG_GPIO_VIPERBOARD)  += gpio-viperboard.o
 obj-$(CONFIG_GPIO_VR41XX)  += gpio-vr41xx.o
diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
new file mode 100644
index 000..d62cea4
--- /dev/null
+++ b/drivers/gpio/gpio-uniphier.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 2017 Socionext Inc.
+ *   Author: Masahiro Yamada 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define UNIPHIER_GPIO_BANK_MASK\
+   GENMASK((UNIPHIER_GPIO_LINES_PER_BANK) - 1, 0)
+
+#define UNIPHIER_GPIO_IRQ_MAX_NUM  24
+
+#define UNIPHIER_GPIO_PORT_DATA0x0 /* data */
+#define UNIPHIER_GPIO_PORT_DIR 0x4 /* direction (1:in, 0:out) */
+#define UNIPHIER_GPIO_IRQ_EN   0x90/* irq enable */
+#define UNIPHIER_GPIO_IRQ_MODE 0x94/* irq mode (1: both edge) */
+#define UNIPHIER_GPIO_IRQ_FLT_EN   0x98/* noise filter enable */
+#define UNIPHIER_GPIO_IRQ_FLT_CYC  0x9c/* noise filter clock cycle */
+
+struct uniphier_gpio_priv {
+   struct gpio_chip chip;
+   struct irq_chip irq_chip;
+   struct irq_domain *domain;
+   void __iomem *regs;
+   spinlock_t lock;
+   u32 saved_vals[0];
+};
+
+static unsigned int uniphier_gpio_bank_to_reg(unsigned int bank)
+{
+   unsigned int reg;
+
+   reg = (bank + 1) * 8;
+
+   /*
+* Unfortunately, the GPIO port registers are not contiguous because
+* offset 0x90-0x9f is used for IRQ.  Add 0x10 when 

[PATCH v7 2/2] gpio: uniphier: add UniPhier GPIO controller driver

2017-09-26 Thread Masahiro Yamada
This GPIO controller is used on UniPhier SoC family.

It also serves as an interrupt controller, but interrupt signals are
just delivered to the parent irqchip without any latching or OR'ing.
This type of hardware can be well described with hierarchy IRQ domain.

One unfortunate thing for this device is that the interrupt mapping to
the interrupt parent is not contiguous.

I asked how DT can describe interrupt mapping between two irqchips [1],
but I could not find a good solution (at least in the framework level).
In fact, irqchip drivers using hierarchy domain generally hard-code the
DT binding of their parent.

After tackling on several approaches such as hard-code of hwirqs,
irq_domain_push_irq(), I ended up with a vendor specific property.
If we come up with a good idea to support this in the framework, we
can migrate over to it, but we can live with a driver-level solution
for now.

[1] https://lkml.org/lkml/2017/7/6/758

Signed-off-by: Masahiro Yamada 
---


 MAINTAINERS  |   1 +
 drivers/gpio/Kconfig |   8 +
 drivers/gpio/Makefile|   1 +
 drivers/gpio/gpio-uniphier.c | 507 +++
 include/dt-bindings/gpio/uniphier-gpio.h |  18 ++
 5 files changed, 535 insertions(+)
 create mode 100644 drivers/gpio/gpio-uniphier.c
 create mode 100644 include/dt-bindings/gpio/uniphier-gpio.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 6671f37..47cfca7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2036,6 +2036,7 @@ F:arch/arm/mm/cache-uniphier.c
 F: arch/arm64/boot/dts/socionext/
 F: drivers/bus/uniphier-system-bus.c
 F: drivers/clk/uniphier/
+F: drivers/gpio/gpio-uniphier.c
 F: drivers/i2c/busses/i2c-uniphier*
 F: drivers/irqchip/irq-uniphier-aidet.c
 F: drivers/pinctrl/uniphier/
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 3f80f16..25c0f308 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -475,6 +475,14 @@ config GPIO_TZ1090_PDC
help
  Say yes here to support Toumaz Xenif TZ1090 PDC GPIOs.
 
+config GPIO_UNIPHIER
+   tristate "UniPhier GPIO support"
+   depends on ARCH_UNIPHIER || COMPILE_TEST
+   depends on OF_GPIO
+   select IRQ_DOMAIN_HIERARCHY
+   help
+ Say yes here to support UniPhier GPIOs.
+
 config GPIO_VF610
def_bool y
depends on ARCH_MXC && SOC_VF610
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index aeb70e9d..472f675 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -131,6 +131,7 @@ obj-$(CONFIG_GPIO_TWL6040)  += gpio-twl6040.o
 obj-$(CONFIG_GPIO_TZ1090)  += gpio-tz1090.o
 obj-$(CONFIG_GPIO_TZ1090_PDC)  += gpio-tz1090-pdc.o
 obj-$(CONFIG_GPIO_UCB1400) += gpio-ucb1400.o
+obj-$(CONFIG_GPIO_UNIPHIER)+= gpio-uniphier.o
 obj-$(CONFIG_GPIO_VF610)   += gpio-vf610.o
 obj-$(CONFIG_GPIO_VIPERBOARD)  += gpio-viperboard.o
 obj-$(CONFIG_GPIO_VR41XX)  += gpio-vr41xx.o
diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
new file mode 100644
index 000..d62cea4
--- /dev/null
+++ b/drivers/gpio/gpio-uniphier.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 2017 Socionext Inc.
+ *   Author: Masahiro Yamada 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define UNIPHIER_GPIO_BANK_MASK\
+   GENMASK((UNIPHIER_GPIO_LINES_PER_BANK) - 1, 0)
+
+#define UNIPHIER_GPIO_IRQ_MAX_NUM  24
+
+#define UNIPHIER_GPIO_PORT_DATA0x0 /* data */
+#define UNIPHIER_GPIO_PORT_DIR 0x4 /* direction (1:in, 0:out) */
+#define UNIPHIER_GPIO_IRQ_EN   0x90/* irq enable */
+#define UNIPHIER_GPIO_IRQ_MODE 0x94/* irq mode (1: both edge) */
+#define UNIPHIER_GPIO_IRQ_FLT_EN   0x98/* noise filter enable */
+#define UNIPHIER_GPIO_IRQ_FLT_CYC  0x9c/* noise filter clock cycle */
+
+struct uniphier_gpio_priv {
+   struct gpio_chip chip;
+   struct irq_chip irq_chip;
+   struct irq_domain *domain;
+   void __iomem *regs;
+   spinlock_t lock;
+   u32 saved_vals[0];
+};
+
+static unsigned int uniphier_gpio_bank_to_reg(unsigned int bank)
+{
+   unsigned int reg;
+
+   reg = (bank + 1) * 8;
+
+   /*
+* Unfortunately, the GPIO port registers are not contiguous because
+* offset 0x90-0x9f is used for IRQ.  Add 0x10 when crossing the region.
+*/
+   if (reg >= 

[PATCH v7 1/2] dt-bindings: gpio: uniphier: add UniPhier GPIO binding

2017-09-26 Thread Masahiro Yamada
This GPIO controller is used on UniPhier SoC family.

The vendor specific property "socionext,interrupt-ranges" is for
specifying interrupt mapping to the parent interrupt controller
because the mapping is not contiguous.  It works like "ranges",
but transforms "interrupts" instead of "reg".

Signed-off-by: Masahiro Yamada 
Acked-by: Rob Herring 
---


 .../devicetree/bindings/gpio/gpio-uniphier.txt | 40 ++
 1 file changed, 40 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio-uniphier.txt

diff --git a/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt 
b/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
new file mode 100644
index 000..6d9251a
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
@@ -0,0 +1,40 @@
+UniPhier GPIO controller
+
+Required properties:
+- compatible: Should be "socionext,uniphier-gpio".
+- reg: Specifies offset and length of the register set for the device.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells: Should be 2.  The first cell is the pin number and the second
+  cell is used to specify optional parameters.
+- interrupt-parent: Specifies the parent interrupt controller.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Should be 2.  The first cell defines the interrupt number.
+  The second cell bits[3:0] is used to specify trigger type as follows:
+1 = low-to-high edge triggered
+2 = high-to-low edge triggered
+4 = active high level-sensitive
+8 = active low level-sensitive
+  Valid combinations are 1, 2, 3, 4, 8.
+- ngpios: Specifies the number of GPIO lines.
+- gpio-ranges: Mapping to pin controller pins (as described in gpio.txt)
+- socionext,interrupt-ranges: Specifies an interrupt number mapping between
+  this GPIO controller and its interrupt parent, in the form of arbitrary
+  number of  triplets.
+
+Optional properties:
+- gpio-ranges-group-names: Used for named gpio ranges (as described in 
gpio.txt)
+
+Example:
+   gpio: gpio@5500 {
+   compatible = "socionext,uniphier-gpio";
+   reg = <0x5500 0x200>;
+   interrupt-parent = <>;
+   interrupt-controller;
+   #interrupt-cells = <2>;
+   gpio-controller;
+   #gpio-cells = <2>;
+   gpio-ranges = < 0 0 0>;
+   gpio-ranges-group-names = "gpio_range";
+   ngpios = <248>;
+   socionext,interrupt-ranges = <0 48 16>, <16 154 5>, <21 217 3>;
+   };
-- 
2.7.4



[PATCH v7 1/2] dt-bindings: gpio: uniphier: add UniPhier GPIO binding

2017-09-26 Thread Masahiro Yamada
This GPIO controller is used on UniPhier SoC family.

The vendor specific property "socionext,interrupt-ranges" is for
specifying interrupt mapping to the parent interrupt controller
because the mapping is not contiguous.  It works like "ranges",
but transforms "interrupts" instead of "reg".

Signed-off-by: Masahiro Yamada 
Acked-by: Rob Herring 
---


 .../devicetree/bindings/gpio/gpio-uniphier.txt | 40 ++
 1 file changed, 40 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio-uniphier.txt

diff --git a/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt 
b/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
new file mode 100644
index 000..6d9251a
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
@@ -0,0 +1,40 @@
+UniPhier GPIO controller
+
+Required properties:
+- compatible: Should be "socionext,uniphier-gpio".
+- reg: Specifies offset and length of the register set for the device.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells: Should be 2.  The first cell is the pin number and the second
+  cell is used to specify optional parameters.
+- interrupt-parent: Specifies the parent interrupt controller.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Should be 2.  The first cell defines the interrupt number.
+  The second cell bits[3:0] is used to specify trigger type as follows:
+1 = low-to-high edge triggered
+2 = high-to-low edge triggered
+4 = active high level-sensitive
+8 = active low level-sensitive
+  Valid combinations are 1, 2, 3, 4, 8.
+- ngpios: Specifies the number of GPIO lines.
+- gpio-ranges: Mapping to pin controller pins (as described in gpio.txt)
+- socionext,interrupt-ranges: Specifies an interrupt number mapping between
+  this GPIO controller and its interrupt parent, in the form of arbitrary
+  number of  triplets.
+
+Optional properties:
+- gpio-ranges-group-names: Used for named gpio ranges (as described in 
gpio.txt)
+
+Example:
+   gpio: gpio@5500 {
+   compatible = "socionext,uniphier-gpio";
+   reg = <0x5500 0x200>;
+   interrupt-parent = <>;
+   interrupt-controller;
+   #interrupt-cells = <2>;
+   gpio-controller;
+   #gpio-cells = <2>;
+   gpio-ranges = < 0 0 0>;
+   gpio-ranges-group-names = "gpio_range";
+   ngpios = <248>;
+   socionext,interrupt-ranges = <0 48 16>, <16 154 5>, <21 217 3>;
+   };
-- 
2.7.4



Re: [PATCH v5 1/3] irqdomain: export of_phandle_args_to_fwspec

2017-09-26 Thread Masahiro Yamada
2017-09-25 19:11 GMT+09:00 Thomas Gleixner :
> On Thu, 21 Sep 2017, Linus Walleij wrote:
>
>> On Wed, Sep 13, 2017 at 10:56 AM, Masahiro Yamada
>>  wrote:
>>
>> > This helper will be useful for irqchip drivers that need to convert
>> > DT binding into IRQ fwspec.
>> >
>> > Signed-off-by: Masahiro Yamada 
>>
>> I'm waiting for the irqchip maintainers to have a look at this.
>>
>> I can carry it in the GPIO tree, but Thomas often prefers to handle
>> me an immutable git branch for infrastructure.
>
> I don't think we have colliding stuff coming up, so please carry it in your
> tree with Acked-by-me.
>
> Thanks,
>
> tglx
> --


Please hold it back now.

I have sent v6,
which is self-contained in the GPIO subsystem.

I will wait for Linus to check v6.


-- 
Best Regards
Masahiro Yamada


Re: [PATCH v5 1/3] irqdomain: export of_phandle_args_to_fwspec

2017-09-26 Thread Masahiro Yamada
2017-09-25 19:11 GMT+09:00 Thomas Gleixner :
> On Thu, 21 Sep 2017, Linus Walleij wrote:
>
>> On Wed, Sep 13, 2017 at 10:56 AM, Masahiro Yamada
>>  wrote:
>>
>> > This helper will be useful for irqchip drivers that need to convert
>> > DT binding into IRQ fwspec.
>> >
>> > Signed-off-by: Masahiro Yamada 
>>
>> I'm waiting for the irqchip maintainers to have a look at this.
>>
>> I can carry it in the GPIO tree, but Thomas often prefers to handle
>> me an immutable git branch for infrastructure.
>
> I don't think we have colliding stuff coming up, so please carry it in your
> tree with Acked-by-me.
>
> Thanks,
>
> tglx
> --


Please hold it back now.

I have sent v6,
which is self-contained in the GPIO subsystem.

I will wait for Linus to check v6.


-- 
Best Regards
Masahiro Yamada


[PATCH v6 2/2] gpio: uniphier: add UniPhier GPIO controller driver

2017-09-26 Thread Masahiro Yamada
This GPIO controller is used on UniPhier SoC family.

It also serves as an interrupt controller, but interrupt signals are
just delivered to the parent irqchip without any latching or OR'ing.
This type of hardware can be well described with hierarchy IRQ domain.

One unfortunate thing for this device is that the interrupt mapping to
the interrupt parent is not contiguous.

I asked how DT can describe interrupt mapping between two irqchips [1],
but I could not find a good solution (at least in the framework level).
In fact, irqchip drivers using hierarchy domain generally hard-code the
DT binding of their parent.

After tackling on several approaches such as hard-code of hwirqs,
irq_domain_push_irq(), I ended up with a vendor specific property.
If we come up with a good idea to support this in the framework, we
can migrate over to it, but we can live with a driver-level solution
for now.

[1] https://lkml.org/lkml/2017/7/6/758

Signed-off-by: Masahiro Yamada 
---


 MAINTAINERS  |   1 +
 drivers/gpio/Kconfig |   8 +
 drivers/gpio/Makefile|   1 +
 drivers/gpio/gpio-uniphier.c | 507 +++
 include/dt-bindings/gpio/uniphier-gpio.h |  18 ++
 5 files changed, 535 insertions(+)
 create mode 100644 drivers/gpio/gpio-uniphier.c
 create mode 100644 include/dt-bindings/gpio/uniphier-gpio.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 6671f37..47cfca7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2036,6 +2036,7 @@ F:arch/arm/mm/cache-uniphier.c
 F: arch/arm64/boot/dts/socionext/
 F: drivers/bus/uniphier-system-bus.c
 F: drivers/clk/uniphier/
+F: drivers/gpio/gpio-uniphier.c
 F: drivers/i2c/busses/i2c-uniphier*
 F: drivers/irqchip/irq-uniphier-aidet.c
 F: drivers/pinctrl/uniphier/
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 3f80f16..25c0f308 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -475,6 +475,14 @@ config GPIO_TZ1090_PDC
help
  Say yes here to support Toumaz Xenif TZ1090 PDC GPIOs.
 
+config GPIO_UNIPHIER
+   tristate "UniPhier GPIO support"
+   depends on ARCH_UNIPHIER || COMPILE_TEST
+   depends on OF_GPIO
+   select IRQ_DOMAIN_HIERARCHY
+   help
+ Say yes here to support UniPhier GPIOs.
+
 config GPIO_VF610
def_bool y
depends on ARCH_MXC && SOC_VF610
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index aeb70e9d..472f675 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -131,6 +131,7 @@ obj-$(CONFIG_GPIO_TWL6040)  += gpio-twl6040.o
 obj-$(CONFIG_GPIO_TZ1090)  += gpio-tz1090.o
 obj-$(CONFIG_GPIO_TZ1090_PDC)  += gpio-tz1090-pdc.o
 obj-$(CONFIG_GPIO_UCB1400) += gpio-ucb1400.o
+obj-$(CONFIG_GPIO_UNIPHIER)+= gpio-uniphier.o
 obj-$(CONFIG_GPIO_VF610)   += gpio-vf610.o
 obj-$(CONFIG_GPIO_VIPERBOARD)  += gpio-viperboard.o
 obj-$(CONFIG_GPIO_VR41XX)  += gpio-vr41xx.o
diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
new file mode 100644
index 000..a5e3c3a
--- /dev/null
+++ b/drivers/gpio/gpio-uniphier.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 2017 Socionext Inc.
+ *   Author: Masahiro Yamada 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define UNIPHIER_GPIO_BANK_MASK\
+   GENMASK((UNIPHIER_GPIO_LINES_PER_BANK) - 1, 0)
+
+#define UNIPHIER_GPIO_IRQ_MAX_NUM  24
+
+#define UNIPHIER_GPIO_PORT_DATA0x0 /* data */
+#define UNIPHIER_GPIO_PORT_DIR 0x4 /* direction (1:in, 0:out) */
+#define UNIPHIER_GPIO_IRQ_EN   0x90/* irq enable */
+#define UNIPHIER_GPIO_IRQ_MODE 0x94/* irq mode (1: both edge) */
+#define UNIPHIER_GPIO_IRQ_FLT_EN   0x98/* noise filter enable */
+#define UNIPHIER_GPIO_IRQ_FLT_CYC  0x9c/* noise filter clock cycle */
+
+struct uniphier_gpio_priv {
+   struct gpio_chip chip;
+   struct irq_chip irq_chip;
+   struct irq_domain *domain;
+   void __iomem *regs;
+   spinlock_t lock;
+   u32 saved_vals[0];
+};
+
+static unsigned int uniphier_gpio_bank_to_reg(unsigned int bank)
+{
+   unsigned int reg;
+
+   reg = (bank + 1) * 8;
+
+   /*
+* Unfortunately, the GPIO port registers are not contiguous because
+* offset 0x90-0x9f is used for IRQ.  Add 0x10 when 

[PATCH v6 0/2] gpio: uniphier: UniPhier GPIO driver

2017-09-26 Thread Masahiro Yamada
This series adds UniPhier GPIO driver.

The interrupt controller part is implemented by using hierarchy irqdomain.

IMHO, the problem of the hierarchy irqdomain is that drivers must hard-code
the fwspec of the interrupt parent.  We will never know the DT binding of
the parent unless we parse #interrupt-cell, etc.

I asked about this:
https://lkml.org/lkml/2017/7/6/758

Apparently, the current kernel does not provide a systematic way to
describe it.

In v1-v3, I hard-coded the parent hwirq numbers in the driver because
irqchip drivers are forced to hard-code more or less about the parent.
This was not accepted by Linus Walleij.

In v4, I tried to use the new API irq_domain_push_irq().  I needed to
change the irqdomain framework to make it work for DT, but seemed
controversial in the irqdomain subsystem review.

In v5, I tried another solution.  At first I thought it worked, but
I found a dead-lock if an irq is disposed from the .alloc hook.

After I considered more, I thought "interrupts" property does not
make much sense here because the last cell (which usually specifies
the trigger type) is useless for the hierarchy irqdomain.

I decided to use a vendor-specific property.  This is what some drivers
actually do.


Changes in v6:
  - Add "socionext,interrupt-ranges"

Changes in v5:
  - Split into a separate patch for DT binding
  - Add a new patch to export of_phandle_args_to_fwspec
  - Split DT binding into a separate file
  - v4 depends on some patches that change irq_domain_push_irq(), but
they got negative feedback in the irqdomain subsystem review.
Yet another approach here.  Parse "interrupts" property in
.alloc() hook.  If the parent IRQ is already mapped, dispose it
and re-alloc in irqdomain manner.

Changes in v4:
  - Add COMPILE_TEST and select IRQ_DOMAIN_HIERARCHY
  - Reimplement irqchip part by using irq_domain_push_irq()

Changes in v3:
  - Add .irq_set_affinity() hook
  - Use irq_domain_create_hierarchy() instead of legacy
irq_domain_add_hierarchy()

Changes in v2:
  - Remove +32 offset for parent interrupts to follow the GIC
binding convention
  - Let uniphier_gpio_irq_alloc() fail if nr_irqs != 1
  - Allocate gpio_chip statically because just one instance is
supported
  - Fix suspend and resume hooks

Masahiro Yamada (2):
  dt-bindings: gpio: uniphier: add UniPhier GPIO binding
  gpio: uniphier: add UniPhier GPIO controller driver

 .../devicetree/bindings/gpio/gpio-uniphier.txt |  40 ++
 MAINTAINERS|   1 +
 drivers/gpio/Kconfig   |   8 +
 drivers/gpio/Makefile  |   1 +
 drivers/gpio/gpio-uniphier.c   | 507 +
 include/dt-bindings/gpio/uniphier-gpio.h   |  18 +
 6 files changed, 575 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
 create mode 100644 drivers/gpio/gpio-uniphier.c
 create mode 100644 include/dt-bindings/gpio/uniphier-gpio.h

-- 
2.7.4



[PATCH v6 2/2] gpio: uniphier: add UniPhier GPIO controller driver

2017-09-26 Thread Masahiro Yamada
This GPIO controller is used on UniPhier SoC family.

It also serves as an interrupt controller, but interrupt signals are
just delivered to the parent irqchip without any latching or OR'ing.
This type of hardware can be well described with hierarchy IRQ domain.

One unfortunate thing for this device is that the interrupt mapping to
the interrupt parent is not contiguous.

I asked how DT can describe interrupt mapping between two irqchips [1],
but I could not find a good solution (at least in the framework level).
In fact, irqchip drivers using hierarchy domain generally hard-code the
DT binding of their parent.

After tackling on several approaches such as hard-code of hwirqs,
irq_domain_push_irq(), I ended up with a vendor specific property.
If we come up with a good idea to support this in the framework, we
can migrate over to it, but we can live with a driver-level solution
for now.

[1] https://lkml.org/lkml/2017/7/6/758

Signed-off-by: Masahiro Yamada 
---


 MAINTAINERS  |   1 +
 drivers/gpio/Kconfig |   8 +
 drivers/gpio/Makefile|   1 +
 drivers/gpio/gpio-uniphier.c | 507 +++
 include/dt-bindings/gpio/uniphier-gpio.h |  18 ++
 5 files changed, 535 insertions(+)
 create mode 100644 drivers/gpio/gpio-uniphier.c
 create mode 100644 include/dt-bindings/gpio/uniphier-gpio.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 6671f37..47cfca7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2036,6 +2036,7 @@ F:arch/arm/mm/cache-uniphier.c
 F: arch/arm64/boot/dts/socionext/
 F: drivers/bus/uniphier-system-bus.c
 F: drivers/clk/uniphier/
+F: drivers/gpio/gpio-uniphier.c
 F: drivers/i2c/busses/i2c-uniphier*
 F: drivers/irqchip/irq-uniphier-aidet.c
 F: drivers/pinctrl/uniphier/
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 3f80f16..25c0f308 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -475,6 +475,14 @@ config GPIO_TZ1090_PDC
help
  Say yes here to support Toumaz Xenif TZ1090 PDC GPIOs.
 
+config GPIO_UNIPHIER
+   tristate "UniPhier GPIO support"
+   depends on ARCH_UNIPHIER || COMPILE_TEST
+   depends on OF_GPIO
+   select IRQ_DOMAIN_HIERARCHY
+   help
+ Say yes here to support UniPhier GPIOs.
+
 config GPIO_VF610
def_bool y
depends on ARCH_MXC && SOC_VF610
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index aeb70e9d..472f675 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -131,6 +131,7 @@ obj-$(CONFIG_GPIO_TWL6040)  += gpio-twl6040.o
 obj-$(CONFIG_GPIO_TZ1090)  += gpio-tz1090.o
 obj-$(CONFIG_GPIO_TZ1090_PDC)  += gpio-tz1090-pdc.o
 obj-$(CONFIG_GPIO_UCB1400) += gpio-ucb1400.o
+obj-$(CONFIG_GPIO_UNIPHIER)+= gpio-uniphier.o
 obj-$(CONFIG_GPIO_VF610)   += gpio-vf610.o
 obj-$(CONFIG_GPIO_VIPERBOARD)  += gpio-viperboard.o
 obj-$(CONFIG_GPIO_VR41XX)  += gpio-vr41xx.o
diff --git a/drivers/gpio/gpio-uniphier.c b/drivers/gpio/gpio-uniphier.c
new file mode 100644
index 000..a5e3c3a
--- /dev/null
+++ b/drivers/gpio/gpio-uniphier.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (C) 2017 Socionext Inc.
+ *   Author: Masahiro Yamada 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define UNIPHIER_GPIO_BANK_MASK\
+   GENMASK((UNIPHIER_GPIO_LINES_PER_BANK) - 1, 0)
+
+#define UNIPHIER_GPIO_IRQ_MAX_NUM  24
+
+#define UNIPHIER_GPIO_PORT_DATA0x0 /* data */
+#define UNIPHIER_GPIO_PORT_DIR 0x4 /* direction (1:in, 0:out) */
+#define UNIPHIER_GPIO_IRQ_EN   0x90/* irq enable */
+#define UNIPHIER_GPIO_IRQ_MODE 0x94/* irq mode (1: both edge) */
+#define UNIPHIER_GPIO_IRQ_FLT_EN   0x98/* noise filter enable */
+#define UNIPHIER_GPIO_IRQ_FLT_CYC  0x9c/* noise filter clock cycle */
+
+struct uniphier_gpio_priv {
+   struct gpio_chip chip;
+   struct irq_chip irq_chip;
+   struct irq_domain *domain;
+   void __iomem *regs;
+   spinlock_t lock;
+   u32 saved_vals[0];
+};
+
+static unsigned int uniphier_gpio_bank_to_reg(unsigned int bank)
+{
+   unsigned int reg;
+
+   reg = (bank + 1) * 8;
+
+   /*
+* Unfortunately, the GPIO port registers are not contiguous because
+* offset 0x90-0x9f is used for IRQ.  Add 0x10 when crossing the region.
+*/
+   if (reg >= 

[PATCH v6 0/2] gpio: uniphier: UniPhier GPIO driver

2017-09-26 Thread Masahiro Yamada
This series adds UniPhier GPIO driver.

The interrupt controller part is implemented by using hierarchy irqdomain.

IMHO, the problem of the hierarchy irqdomain is that drivers must hard-code
the fwspec of the interrupt parent.  We will never know the DT binding of
the parent unless we parse #interrupt-cell, etc.

I asked about this:
https://lkml.org/lkml/2017/7/6/758

Apparently, the current kernel does not provide a systematic way to
describe it.

In v1-v3, I hard-coded the parent hwirq numbers in the driver because
irqchip drivers are forced to hard-code more or less about the parent.
This was not accepted by Linus Walleij.

In v4, I tried to use the new API irq_domain_push_irq().  I needed to
change the irqdomain framework to make it work for DT, but seemed
controversial in the irqdomain subsystem review.

In v5, I tried another solution.  At first I thought it worked, but
I found a dead-lock if an irq is disposed from the .alloc hook.

After I considered more, I thought "interrupts" property does not
make much sense here because the last cell (which usually specifies
the trigger type) is useless for the hierarchy irqdomain.

I decided to use a vendor-specific property.  This is what some drivers
actually do.


Changes in v6:
  - Add "socionext,interrupt-ranges"

Changes in v5:
  - Split into a separate patch for DT binding
  - Add a new patch to export of_phandle_args_to_fwspec
  - Split DT binding into a separate file
  - v4 depends on some patches that change irq_domain_push_irq(), but
they got negative feedback in the irqdomain subsystem review.
Yet another approach here.  Parse "interrupts" property in
.alloc() hook.  If the parent IRQ is already mapped, dispose it
and re-alloc in irqdomain manner.

Changes in v4:
  - Add COMPILE_TEST and select IRQ_DOMAIN_HIERARCHY
  - Reimplement irqchip part by using irq_domain_push_irq()

Changes in v3:
  - Add .irq_set_affinity() hook
  - Use irq_domain_create_hierarchy() instead of legacy
irq_domain_add_hierarchy()

Changes in v2:
  - Remove +32 offset for parent interrupts to follow the GIC
binding convention
  - Let uniphier_gpio_irq_alloc() fail if nr_irqs != 1
  - Allocate gpio_chip statically because just one instance is
supported
  - Fix suspend and resume hooks

Masahiro Yamada (2):
  dt-bindings: gpio: uniphier: add UniPhier GPIO binding
  gpio: uniphier: add UniPhier GPIO controller driver

 .../devicetree/bindings/gpio/gpio-uniphier.txt |  40 ++
 MAINTAINERS|   1 +
 drivers/gpio/Kconfig   |   8 +
 drivers/gpio/Makefile  |   1 +
 drivers/gpio/gpio-uniphier.c   | 507 +
 include/dt-bindings/gpio/uniphier-gpio.h   |  18 +
 6 files changed, 575 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
 create mode 100644 drivers/gpio/gpio-uniphier.c
 create mode 100644 include/dt-bindings/gpio/uniphier-gpio.h

-- 
2.7.4



[PATCH v6 1/2] dt-bindings: gpio: uniphier: add UniPhier GPIO binding

2017-09-26 Thread Masahiro Yamada
This GPIO controller is used on UniPhier SoC family.

The vendor specific property "socionext,interrupt-ranges" is for
specifying interrupt mapping to the parent interrupt controller
because the mapping is not contiguous.  It works like "ranges",
but transforms "interrupts" instead of "reg".

Signed-off-by: Masahiro Yamada 
Acked-by: Rob Herring 
---


 .../devicetree/bindings/gpio/gpio-uniphier.txt | 40 ++
 1 file changed, 40 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio-uniphier.txt

diff --git a/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt 
b/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
new file mode 100644
index 000..6d9251a
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
@@ -0,0 +1,40 @@
+UniPhier GPIO controller
+
+Required properties:
+- compatible: Should be "socionext,uniphier-gpio".
+- reg: Specifies offset and length of the register set for the device.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells: Should be 2.  The first cell is the pin number and the second
+  cell is used to specify optional parameters.
+- interrupt-parent: Specifies the parent interrupt controller.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Should be 2.  The first cell defines the interrupt number.
+  The second cell bits[3:0] is used to specify trigger type as follows:
+1 = low-to-high edge triggered
+2 = high-to-low edge triggered
+4 = active high level-sensitive
+8 = active low level-sensitive
+  Valid combinations are 1, 2, 3, 4, 8.
+- ngpios: Specifies the number of GPIO lines.
+- gpio-ranges: Mapping to pin controller pins (as described in gpio.txt)
+- socionext,interrupt-ranges: Specifies an interrupt number mapping between
+  this GPIO controller and its interrupt parent, in the form of arbitrary
+  number of  triplets.
+
+Optional properties:
+- gpio-ranges-group-names: Used for named gpio ranges (as described in 
gpio.txt)
+
+Example:
+   gpio: gpio@5500 {
+   compatible = "socionext,uniphier-gpio";
+   reg = <0x5500 0x200>;
+   interrupt-parent = <>;
+   interrupt-controller;
+   #interrupt-cells = <2>;
+   gpio-controller;
+   #gpio-cells = <2>;
+   gpio-ranges = < 0 0 0>;
+   gpio-ranges-group-names = "gpio_range";
+   ngpios = <248>;
+   socionext,interrupt-ranges = <0 48 16>, <16 154 5>, <21 217 3>;
+   };
-- 
2.7.4



[PATCH v6 1/2] dt-bindings: gpio: uniphier: add UniPhier GPIO binding

2017-09-26 Thread Masahiro Yamada
This GPIO controller is used on UniPhier SoC family.

The vendor specific property "socionext,interrupt-ranges" is for
specifying interrupt mapping to the parent interrupt controller
because the mapping is not contiguous.  It works like "ranges",
but transforms "interrupts" instead of "reg".

Signed-off-by: Masahiro Yamada 
Acked-by: Rob Herring 
---


 .../devicetree/bindings/gpio/gpio-uniphier.txt | 40 ++
 1 file changed, 40 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio-uniphier.txt

diff --git a/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt 
b/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
new file mode 100644
index 000..6d9251a
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-uniphier.txt
@@ -0,0 +1,40 @@
+UniPhier GPIO controller
+
+Required properties:
+- compatible: Should be "socionext,uniphier-gpio".
+- reg: Specifies offset and length of the register set for the device.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells: Should be 2.  The first cell is the pin number and the second
+  cell is used to specify optional parameters.
+- interrupt-parent: Specifies the parent interrupt controller.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Should be 2.  The first cell defines the interrupt number.
+  The second cell bits[3:0] is used to specify trigger type as follows:
+1 = low-to-high edge triggered
+2 = high-to-low edge triggered
+4 = active high level-sensitive
+8 = active low level-sensitive
+  Valid combinations are 1, 2, 3, 4, 8.
+- ngpios: Specifies the number of GPIO lines.
+- gpio-ranges: Mapping to pin controller pins (as described in gpio.txt)
+- socionext,interrupt-ranges: Specifies an interrupt number mapping between
+  this GPIO controller and its interrupt parent, in the form of arbitrary
+  number of  triplets.
+
+Optional properties:
+- gpio-ranges-group-names: Used for named gpio ranges (as described in 
gpio.txt)
+
+Example:
+   gpio: gpio@5500 {
+   compatible = "socionext,uniphier-gpio";
+   reg = <0x5500 0x200>;
+   interrupt-parent = <>;
+   interrupt-controller;
+   #interrupt-cells = <2>;
+   gpio-controller;
+   #gpio-cells = <2>;
+   gpio-ranges = < 0 0 0>;
+   gpio-ranges-group-names = "gpio_range";
+   ngpios = <248>;
+   socionext,interrupt-ranges = <0 48 16>, <16 154 5>, <21 217 3>;
+   };
-- 
2.7.4



  1   2   3   4   5   6   7   8   9   10   >