Re: [PATCH v5 01/10] drivers: qcom: rpmh-rsc: add RPMH controller for QCOM SoCs

2018-04-16 Thread Lina Iyer

On Fri, Apr 13 2018 at 11:43 -0600, Stephen Boyd wrote:

Quoting Lina Iyer (2018-04-13 08:37:25)

On Tue, Apr 10 2018 at 22:39 -0600, Stephen Boyd wrote:
>Quoting Lina Iyer (2018-04-05 09:18:25)
>>
>> + */
>> +static irqreturn_t tcs_irq_handler(int irq, void *p)
>> +{
>> +   struct rsc_drv *drv = p;
>> +   int m, i;
>> +   u32 irq_status, sts;
>> +   struct tcs_response *resp;
>> +   struct tcs_cmd *cmd;
>> +
>> +   irq_status = read_tcs_reg(drv, RSC_DRV_IRQ_STATUS, 0, 0);
>> +
>> +   for (m = 0; m < drv->num_tcs; m++) {
>> +   if (!(irq_status & (u32)BIT(m)))


This u32 cast looks out of place. And can't we do for_each_set_bit() in
this loop instead of looping through num_tcs?


Ok.


>> +   continue;
>> +
>> +   resp = get_response(drv, m);
>> +   if (WARN_ON(!resp))
>> +   goto skip_resp;
>> +
>> +   resp->err = 0;
>> +   for (i = 0; i < resp->msg->num_cmds; i++) {
>> +   cmd = &resp->msg->cmds[i];
>> +   sts = read_tcs_reg(drv, RSC_DRV_CMD_STATUS, m, i);
>> +   if (!(sts & CMD_STATUS_ISSUED) ||
>> +  ((resp->msg->wait_for_compl || cmd->wait) &&
>> +  !(sts & CMD_STATUS_COMPL))) {
>> +   resp->err = -EIO;
>> +   break;
>> +   }
>> +   }
>> +skip_resp:
>> +   /* Reclaim the TCS */
>> +   write_tcs_reg(drv, RSC_DRV_CMD_ENABLE, m, 0, 0);
>> +   write_tcs_reg(drv, RSC_DRV_IRQ_CLEAR, 0, 0, BIT(m));
>> +   clear_bit(m, drv->tcs_in_use);
>
>Should we reclaim the TCS if the above for loop fails too? It may make
>more sense to look up the response, reclaim, check if it's NULL and
>execute a 'continue' and otherwise look through resp->msg->cmds for
>something that was done and then send_tcs_response(). At the least,
The TCS will will be reclaimed, even if the for loop fails. We can't
read the CMD_STATUS reliably after reclaiming the TCS.
>don't call send_tcs_response() if resp == NULL.
>
I could do that.


Ah right, the break is for the inner for-loop. Can we push the for-loop
and reclaim into the get_response() function so that the goto inside the
loop is avoided?

resp = get_response(drv, m);
if (WARN_ON(!resp))
continue;
send_tcs_response(resp);



I needed the resp object to get the resp->msg and send_tcs_response()
could happen only after we have reclaimed the TCS.
I removed the response object based on comments from Bjorn, but the idea
would still need to be the same.


>> +/**
>> + * rpmh_rsc_send_data: Validate the incoming message and write to the
>> + * appropriate TCS block.
>> + *
>> + * @drv: the controller
>> + * @msg: the data to be sent
>> + *
>> + * Return: 0 on success, -EINVAL on error.
>> + * Note: This call blocks until a valid data is written to the TCS.
>> + */
>> +int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg)
>> +{
>> +   int ret;
>> +
>> +   if (!msg || !msg->cmds || !msg->num_cmds ||
>> +   msg->num_cmds > MAX_RPMH_PAYLOAD)
>> +   return -EINVAL;
>> +
>> +   do {
>> +   ret = tcs_mbox_write(drv, msg);
>> +   if (ret == -EBUSY) {
>> +   pr_info_ratelimited("TCS Busy, retrying RPMH message 
send: addr=%#x\n",
>> +   msg->cmds[0].addr);
>> +   udelay(10);
>> +   }
>> +   } while (ret == -EBUSY);
>
>This loop never breaks if we can't avoid the BUSY loop. And that printk
>is informational, shouldn't it be an error? Is there some number of
>tries we can make and then just give up?
>
I could do that. Generally, there are some transient conditions the
causes these loops to spin for a while, before we get a free TCS to
write to. Failing after just a handful tries may be calling it quits
early. If we increase the delay to compensate for it, then we end
slowing up requests that could have otherwise been faster.


So a 10 second timeout with a 10uS delay between attempts? I'm not
asking to increase the delay between attempts, instead I'm asking for
this loop to not go forever in case something goes wrong. Getting stuck
here would not be much fun.


There is no recoverable situtation if we are unable to send RPMH
requests. I can timeout here, but then most drivers have no way to
recover from that. The only reason why we would timeout is when the
TCSes are otherwise engaged and the requests that were hung. You cannot
clear the TCSes to continue the system.



>> +
>> +   return ret;
>> +}
>> +EXPORT_SYMBOL(rpmh_rsc_send_data);
>> +
>> diff --git a/include/dt-bindings/soc/qcom,rpmh-rsc.h 
b/include/dt-bindings/soc/qcom,rpmh-rsc.h
>> new file mode 100644
>> index ..868f998ea998
>> --- /dev/null
>> +++ b/i

Re: [PATCH v5 01/10] drivers: qcom: rpmh-rsc: add RPMH controller for QCOM SoCs

2018-04-13 Thread Stephen Boyd
Quoting Lina Iyer (2018-04-13 08:37:25)
> On Tue, Apr 10 2018 at 22:39 -0600, Stephen Boyd wrote:
> >Quoting Lina Iyer (2018-04-05 09:18:25)
> >>
> >> diff --git a/drivers/soc/qcom/rpmh-internal.h 
> >> b/drivers/soc/qcom/rpmh-internal.h
> >> new file mode 100644
> >> index ..aa73ec4b3e42
> >> --- /dev/null
> >> +++ b/drivers/soc/qcom/rpmh-internal.h
> >> @@ -0,0 +1,89 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +/*
> >> + * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
> >> + */
> >> +
> >> +
> >> +#ifndef __RPM_INTERNAL_H__
> >> +#define __RPM_INTERNAL_H__
> >> +
> >> +#include 
> >> +#include 
> >> +
> >> +#define TCS_TYPE_NR4
> >> +#define MAX_CMDS_PER_TCS   16
> >> +#define MAX_TCS_PER_TYPE   3
> >> +#define MAX_TCS_NR (MAX_TCS_PER_TYPE * TCS_TYPE_NR)
> >> +
> >> +struct rsc_drv;
> >> +
> >> +/**
> >> + * struct tcs_response: Response object for a request
> >> + *
> >> + * @drv:   the controller
> >> + * @msg:   the request for this response
> >> + * @m: the tcs identifier
> >> + * @err:   error reported in the response
> >> + * @list:  element in list of pending response objects
> >> + */
> >> +struct tcs_response {
> >> +   struct rsc_drv *drv;
> >> +   const struct tcs_request *msg;
> >> +   u32 m;
> >> +   int err;
> >> +   struct list_head list;
> >> +};
> >> +
> >> +/**
> >> + * struct tcs_group: group of Trigger Command Sets for a request state
> >
> >Put (ACRONYM) for the acronyms that are spelled out the first time
> >please. Also, make sure we know what 'request state' is.
> >
> Its already in the commit text, but sure.

Thanks!

> 
> >> + *
> >> + * @drv:   the controller
> >> + * @type:  type of the TCS in this group - active, sleep, wake
> >
> >Now 'group' means 'request state'?
> >
> Group of TCSes. TCSes are grouped based on their use - sending requests
> for active, sleep and wake.

Ok so maybe "type of the TCSes in this group, either active, sleep,
wake, etc."

> 
> >> + * @mask:  mask of the TCSes relative to all the TCSes in the RSC
> >> + * @offset:start of the TCS group relative to the TCSes in the RSC
> >> + * @num_tcs:   number of TCSes in this type
> >> + * @ncpt:  number of commands in each TCS
> >> + * @lock:  lock for synchronizing this TCS writes
> >> + * @responses: response objects for requests sent from each TCS
> >> + */
> >> +struct tcs_group {
> >> +   struct rsc_drv *drv;
> >> +   int type;
> >
> >Is type supposed to be an enum?
> >
> Uses the #defines from include/dt-bindings/qcom,rpmh-rsc.txt.
> 
> >> +   u32 mask;
> >> +   u32 offset;
> >> +   int num_tcs;
> >> +   int ncpt;
> >> +   spinlock_t lock;
> >> +   struct tcs_response *responses[MAX_TCS_PER_TYPE];
> >> +};
> >> +
> >> +/**
> >> + * struct rsc_drv: the Resource State Coordinator controller
> >> + *
> >> + * @name:   controller identifier
> >> + * @tcs_base:   start address of the TCS registers in this controller
> >> + * @id: instance id in the controller (Direct Resource Voter)
> >> + * @num_tcs:number of TCSes in this DRV
> >
> >It changed from an RSC to a DRV here?
> >
> RSC has DRVs. A DRV has TCS(es).

It seems like RSC and DRV are pretty much interchangeable then?

> 
> >> +
> >> +#endif /* __RPM_INTERNAL_H__ */
> >> diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
> >> new file mode 100644
> >> index ..8bde1e9bd599
> >> --- /dev/null
> >> +++ b/drivers/soc/qcom/rpmh-rsc.c
> >> @@ -0,0 +1,571 @@
> 
> >> +{
> >> +   kfree(resp);
> >> +}
> >> +
> >> +static struct tcs_response *get_response(struct rsc_drv *drv, u32 m)
> >> +{
> >> +   struct tcs_group *tcs = get_tcs_from_index(drv, m);
> >> +
> >> +   return tcs->responses[m - tcs->offset];
> >> +}
> >> +
> >> +static u32 read_tcs_reg(struct rsc_drv *drv, int reg, int m, int n)
> >> +{
> >> +   return readl_relaxed(drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * m +
> >> +RSC_DRV_CMD_OFFSET * n);
> >> +}
> >> +
> >> +static void write_tcs_reg(struct rsc_drv *drv, int reg, int m, int n, u32 
> >> data)
> >
> >Is m the type of TCS (sleep, active, wake) and n is just an offset?
> >Maybe you can replace m with 'tcs_type' and n with 'index' or 'i' or
> >'offset'. And then don't use this function to write the random TCS
> >registers that don't have to do with the TCS command slots? I see
> >various places where there are things like:
> >
> If you look at the spec and the registers, this representation matches
> the usage there.
> d = DRV
> m = TCS number in the DRV
> n = Command slot in the TCS

Ok. I don't have access to the spec and the registers so I can't really
map it to anything.

> 
> >> +   write_tcs_reg(drv, RSC_DRV_CMD_ENABLE, m, 0, 0);
> >> +   write_tcs_reg(drv, RSC_DRV_CMD_WAIT_FOR_CMPL, m, 0, cmd_complete);
> >> +   write_tcs_reg(drv, RSC_DRV_CMD_ENABL

Re: [PATCH v5 01/10] drivers: qcom: rpmh-rsc: add RPMH controller for QCOM SoCs

2018-04-13 Thread Lina Iyer

On Tue, Apr 10 2018 at 17:40 -0600, Bjorn Andersson wrote:

On Thu 05 Apr 09:18 PDT 2018, Lina Iyer wrote:
[..]

diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h

[..]

+/**
+ * struct tcs_response: Response object for a request
+ *
+ * @drv:   the controller
+ * @msg:   the request for this response
+ * @m: the tcs identifier
+ * @err:   error reported in the response
+ * @list:  element in list of pending response objects
+ */
+struct tcs_response {
+   struct rsc_drv *drv;
+   const struct tcs_request *msg;
+   u32 m;


m is assigned in one place but never used.


Right. Remnant from the downstream driver that uses buffers of
responses.


+   int err;
+   struct list_head list;
+};

[..]

diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c

[..]

+static struct tcs_group *get_tcs_from_index(struct rsc_drv *drv, int m)
+{
+   struct tcs_group *tcs;
+   int i;
+
+   for (i = 0; i < drv->num_tcs; i++) {
+   tcs = &drv->tcs[i];
+   if (tcs->mask & BIT(m))
+   return tcs;
+   }
+
+   WARN(i == drv->num_tcs, "Incorrect TCS index %d", m);
+
+   return NULL;
+}
+
+static struct tcs_response *setup_response(struct rsc_drv *drv,
+  const struct tcs_request *msg, int m)
+{
+   struct tcs_response *resp;
+   struct tcs_group *tcs;
+
+   resp = kzalloc(sizeof(*resp), GFP_ATOMIC);


I still don't like the idea that you allocate a response struct for each
request, then upon getting an ack post this on a list and schedule a
tasklet in order to optionally deliver the return value to the waiting
caller.

Why don't you just just add the "err" and a completion to the
tcs_request struct and if it's a sync operation you complete that in
your irq handler?

That would remove the response struct, the list of them, the tasklet and
the dynamic memory handling - at the "cost" of making the code possible
to follow.


Hmm.. Ok. Will try to simplify.


+   if (!resp)
+   return ERR_PTR(-ENOMEM);
+
+   resp->drv = drv;
+   resp->msg = msg;
+   resp->err = 0;
+
+   tcs = get_tcs_from_index(drv, m);
+   if (!tcs)
+   return ERR_PTR(-EINVAL);
+
+   assert_spin_locked(&tcs->lock);


I tried to boot the kernel with the rpmh-clk and rpmh-regulator drivers
and I kept hitting this assert.

Turns out that find_free_tcs() finds an empty TCS with index 'm' within
the tcs, then passes it to setup_response() which tries to use the 'm'
to figure out which tcs contains the TCS we're operating on.

But as 'm' is in tcs-local space and get_tcs_from_index() tries to
lookup the TCS in the global drv space we get hold of the wrong TCS.


You are right. I will fix it. Thanks for point out. Wonder what is in
your DT, that caused this to be triggered. Clearly it's missing my
setup.


+   tcs->responses[m - tcs->offset] = resp;
+
+   return resp;
+}
+
+static void free_response(struct tcs_response *resp)
+{
+   kfree(resp);
+}
+
+static struct tcs_response *get_response(struct rsc_drv *drv, u32 m)
+{
+   struct tcs_group *tcs = get_tcs_from_index(drv, m);
+
+   return tcs->responses[m - tcs->offset];
+}
+
+static u32 read_tcs_reg(struct rsc_drv *drv, int reg, int m, int n)
+{
+   return readl_relaxed(drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * m +
+RSC_DRV_CMD_OFFSET * n);
+}
+
+static void write_tcs_reg(struct rsc_drv *drv, int reg, int m, int n, u32 data)
+{
+   writel_relaxed(data, drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * m +
+  RSC_DRV_CMD_OFFSET * n);


Do you really want this relaxed? Isn't the ordering of these
significant?


The ordering isnt. I can make it not relaxed. Only ordering requirement
is that we tigger after writing everything.


+}
+
+static void write_tcs_reg_sync(struct rsc_drv *drv, int reg, int m, int n,
+  u32 data)
+{
+   write_tcs_reg(drv, reg, m, n, data);
+   for (;;) {
+   if (data == read_tcs_reg(drv, reg, m, n))
+   break;
+   udelay(1);
+   }
+}
+
+static bool tcs_is_free(struct rsc_drv *drv, int m)
+{
+   return !test_bit(m, drv->tcs_in_use) &&
+  read_tcs_reg(drv, RSC_DRV_STATUS, m, 0);
+}
+
+static struct tcs_group *get_tcs_of_type(struct rsc_drv *drv, int type)


According to rpmh_rsc_probe() the tcs array is indexed by "type", so you
can replace the entire function with:

return &drv->tcs[type];


Hmm. Ok.


+{
+   int i;
+   struct tcs_group *tcs;
+
+   for (i = 0; i < TCS_TYPE_NR; i++) {
+   if (type == drv->tcs[i].type)
+   break;
+   }
+
+   if (i == TCS_TYPE_NR)
+   return ERR_PTR(-EINVAL);
+
+   tcs = &drv->tcs[i];
+   if (!tcs->num_tcs)
+   return ERR_PTR(-EINVAL);
+
+   return tcs;
+}
+
+static struct 

Re: [PATCH v5 01/10] drivers: qcom: rpmh-rsc: add RPMH controller for QCOM SoCs

2018-04-13 Thread Lina Iyer

On Tue, Apr 10 2018 at 22:39 -0600, Stephen Boyd wrote:

Quoting Lina Iyer (2018-04-05 09:18:25)

Add controller driver for QCOM SoCs that have hardware based shared
resource management. The hardware IP known as RSC (Resource State
Coordinator) houses multiple Direct Resource Voter (DRV) for different
execution levels. A DRV is a unique voter on the state of a shared
resource. A Trigger Control Set (TCS) is a bunch of slots that can house
multiple resource state requests, that when triggered will issue those
requests through an internal bus to the Resource Power Manager Hardened
(RPMH) blocks. These hardware blocks are capable of adjusting clocks,
voltages, etc. The resource state request from a DRV are aggregated along
with state requests from other processors in the SoC and the aggregate
value is applied on the resource.

Some important aspects of the RPMH communication -
- Requests are  with some header information
- Multiple requests (upto 16) may be sent through a TCS, at a time
- Requests in a TCS are sent in sequence
- Requests may be fire-n-forget or completion (response expected)
- Multiple TCS from the same DRV may be triggered simultaneously
- Cannot send a request if another requesit for the same addr is in


s/requesit/request/


Ok.

  progress from the same DRV
- When all the requests from a TCS are complete, an IRQ is raised
- The IRQ handler needs to clear the TCS before it is available for
  reuse
- TCS configuration is specific to a DRV
- Platform drivers may use DRV from different RSCs to make requests


This last point is sort of not true anymore? At least my understanding
is that platform drivers are children of the rsc and that they can only
use that rsc to do anything with rpmh.


Platform drivers may talk to multiple RSC+DRV instances and make
requests from those DRVs.



Resource state requests made when CPUs are active are called 'active'
state requests. Requests made when all the CPUs are powered down (idle
state) are called 'sleep' state requests. They are matched by a
corresponding 'wake' state requests which puts the resources back in to
previously requested active state before resuming any CPU. TCSes are
dedicated for each type of requests. Control TCS are used to provide
specific information to the controller.


Can you mention AMC here too? I see the acronym but no definition of
what it is besides "Active or AMC" which may indicate A == Active.


Ok.



diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
new file mode 100644
index ..aa73ec4b3e42
--- /dev/null
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
+ */
+
+
+#ifndef __RPM_INTERNAL_H__
+#define __RPM_INTERNAL_H__
+
+#include 
+#include 
+
+#define TCS_TYPE_NR4
+#define MAX_CMDS_PER_TCS   16
+#define MAX_TCS_PER_TYPE   3
+#define MAX_TCS_NR (MAX_TCS_PER_TYPE * TCS_TYPE_NR)
+
+struct rsc_drv;
+
+/**
+ * struct tcs_response: Response object for a request
+ *
+ * @drv:   the controller
+ * @msg:   the request for this response
+ * @m: the tcs identifier
+ * @err:   error reported in the response
+ * @list:  element in list of pending response objects
+ */
+struct tcs_response {
+   struct rsc_drv *drv;
+   const struct tcs_request *msg;
+   u32 m;
+   int err;
+   struct list_head list;
+};
+
+/**
+ * struct tcs_group: group of Trigger Command Sets for a request state


Put (ACRONYM) for the acronyms that are spelled out the first time
please. Also, make sure we know what 'request state' is.


Its already in the commit text, but sure.


+ *
+ * @drv:   the controller
+ * @type:  type of the TCS in this group - active, sleep, wake


Now 'group' means 'request state'?


Group of TCSes. TCSes are grouped based on their use - sending requests
for active, sleep and wake.


+ * @mask:  mask of the TCSes relative to all the TCSes in the RSC
+ * @offset:start of the TCS group relative to the TCSes in the RSC
+ * @num_tcs:   number of TCSes in this type
+ * @ncpt:  number of commands in each TCS
+ * @lock:  lock for synchronizing this TCS writes
+ * @responses: response objects for requests sent from each TCS
+ */
+struct tcs_group {
+   struct rsc_drv *drv;
+   int type;


Is type supposed to be an enum?


Uses the #defines from include/dt-bindings/qcom,rpmh-rsc.txt.


+   u32 mask;
+   u32 offset;
+   int num_tcs;
+   int ncpt;
+   spinlock_t lock;
+   struct tcs_response *responses[MAX_TCS_PER_TYPE];
+};
+
+/**
+ * struct rsc_drv: the Resource State Coordinator controller
+ *
+ * @name:   controller identifier
+ * @tcs_base:   start address of the TCS registers in this controller
+ * @id: instance id in the controller (Direct Resource Voter)
+ * @num_tcs:number of TCSes in this DRV


It changed from an RSC

Re: [PATCH v5 01/10] drivers: qcom: rpmh-rsc: add RPMH controller for QCOM SoCs

2018-04-10 Thread Stephen Boyd
Quoting Lina Iyer (2018-04-05 09:18:25)
> Add controller driver for QCOM SoCs that have hardware based shared
> resource management. The hardware IP known as RSC (Resource State
> Coordinator) houses multiple Direct Resource Voter (DRV) for different
> execution levels. A DRV is a unique voter on the state of a shared
> resource. A Trigger Control Set (TCS) is a bunch of slots that can house
> multiple resource state requests, that when triggered will issue those
> requests through an internal bus to the Resource Power Manager Hardened
> (RPMH) blocks. These hardware blocks are capable of adjusting clocks,
> voltages, etc. The resource state request from a DRV are aggregated along
> with state requests from other processors in the SoC and the aggregate
> value is applied on the resource.
> 
> Some important aspects of the RPMH communication -
> - Requests are  with some header information
> - Multiple requests (upto 16) may be sent through a TCS, at a time
> - Requests in a TCS are sent in sequence
> - Requests may be fire-n-forget or completion (response expected)
> - Multiple TCS from the same DRV may be triggered simultaneously
> - Cannot send a request if another requesit for the same addr is in

s/requesit/request/

>   progress from the same DRV
> - When all the requests from a TCS are complete, an IRQ is raised
> - The IRQ handler needs to clear the TCS before it is available for
>   reuse
> - TCS configuration is specific to a DRV
> - Platform drivers may use DRV from different RSCs to make requests

This last point is sort of not true anymore? At least my understanding
is that platform drivers are children of the rsc and that they can only
use that rsc to do anything with rpmh.

> 
> Resource state requests made when CPUs are active are called 'active'
> state requests. Requests made when all the CPUs are powered down (idle
> state) are called 'sleep' state requests. They are matched by a
> corresponding 'wake' state requests which puts the resources back in to
> previously requested active state before resuming any CPU. TCSes are
> dedicated for each type of requests. Control TCS are used to provide
> specific information to the controller.

Can you mention AMC here too? I see the acronym but no definition of
what it is besides "Active or AMC" which may indicate A == Active.

> 
> diff --git a/drivers/soc/qcom/rpmh-internal.h 
> b/drivers/soc/qcom/rpmh-internal.h
> new file mode 100644
> index ..aa73ec4b3e42
> --- /dev/null
> +++ b/drivers/soc/qcom/rpmh-internal.h
> @@ -0,0 +1,89 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (c) 2016-2018, The Linux Foundation. All rights reserved.
> + */
> +
> +
> +#ifndef __RPM_INTERNAL_H__
> +#define __RPM_INTERNAL_H__
> +
> +#include 
> +#include 
> +
> +#define TCS_TYPE_NR4
> +#define MAX_CMDS_PER_TCS   16
> +#define MAX_TCS_PER_TYPE   3
> +#define MAX_TCS_NR (MAX_TCS_PER_TYPE * TCS_TYPE_NR)
> +
> +struct rsc_drv;
> +
> +/**
> + * struct tcs_response: Response object for a request
> + *
> + * @drv:   the controller
> + * @msg:   the request for this response
> + * @m: the tcs identifier
> + * @err:   error reported in the response
> + * @list:  element in list of pending response objects
> + */
> +struct tcs_response {
> +   struct rsc_drv *drv;
> +   const struct tcs_request *msg;
> +   u32 m;
> +   int err;
> +   struct list_head list;
> +};
> +
> +/**
> + * struct tcs_group: group of Trigger Command Sets for a request state

Put (ACRONYM) for the acronyms that are spelled out the first time
please. Also, make sure we know what 'request state' is.

> + *
> + * @drv:   the controller
> + * @type:  type of the TCS in this group - active, sleep, wake

Now 'group' means 'request state'?

> + * @mask:  mask of the TCSes relative to all the TCSes in the RSC
> + * @offset:start of the TCS group relative to the TCSes in the RSC
> + * @num_tcs:   number of TCSes in this type
> + * @ncpt:  number of commands in each TCS
> + * @lock:  lock for synchronizing this TCS writes
> + * @responses: response objects for requests sent from each TCS
> + */
> +struct tcs_group {
> +   struct rsc_drv *drv;
> +   int type;

Is type supposed to be an enum?

> +   u32 mask;
> +   u32 offset;
> +   int num_tcs;
> +   int ncpt;
> +   spinlock_t lock;
> +   struct tcs_response *responses[MAX_TCS_PER_TYPE];
> +};
> +
> +/**
> + * struct rsc_drv: the Resource State Coordinator controller
> + *
> + * @name:   controller identifier
> + * @tcs_base:   start address of the TCS registers in this controller
> + * @id: instance id in the controller (Direct Resource Voter)
> + * @num_tcs:number of TCSes in this DRV

It changed from an RSC to a DRV here?

> + * @tasklet:handle responses, off-load work from IRQ handler
> + * @response_pending:
> + *  list of responses that needs to be sent to

Re: [PATCH v5 01/10] drivers: qcom: rpmh-rsc: add RPMH controller for QCOM SoCs

2018-04-10 Thread Bjorn Andersson
On Thu 05 Apr 09:18 PDT 2018, Lina Iyer wrote:
[..]
> diff --git a/drivers/soc/qcom/rpmh-internal.h 
> b/drivers/soc/qcom/rpmh-internal.h
[..]
> +/**
> + * struct tcs_response: Response object for a request
> + *
> + * @drv:   the controller
> + * @msg:   the request for this response
> + * @m: the tcs identifier
> + * @err:   error reported in the response
> + * @list:  element in list of pending response objects
> + */
> +struct tcs_response {
> + struct rsc_drv *drv;
> + const struct tcs_request *msg;
> + u32 m;

m is assigned in one place but never used.

> + int err;
> + struct list_head list;
> +};
[..]
> diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
[..]
> +static struct tcs_group *get_tcs_from_index(struct rsc_drv *drv, int m)
> +{
> + struct tcs_group *tcs;
> + int i;
> +
> + for (i = 0; i < drv->num_tcs; i++) {
> + tcs = &drv->tcs[i];
> + if (tcs->mask & BIT(m))
> + return tcs;
> + }
> +
> + WARN(i == drv->num_tcs, "Incorrect TCS index %d", m);
> +
> + return NULL;
> +}
> +
> +static struct tcs_response *setup_response(struct rsc_drv *drv,
> +const struct tcs_request *msg, int m)
> +{
> + struct tcs_response *resp;
> + struct tcs_group *tcs;
> +
> + resp = kzalloc(sizeof(*resp), GFP_ATOMIC);

I still don't like the idea that you allocate a response struct for each
request, then upon getting an ack post this on a list and schedule a
tasklet in order to optionally deliver the return value to the waiting
caller.

Why don't you just just add the "err" and a completion to the
tcs_request struct and if it's a sync operation you complete that in
your irq handler?

That would remove the response struct, the list of them, the tasklet and
the dynamic memory handling - at the "cost" of making the code possible
to follow.

> + if (!resp)
> + return ERR_PTR(-ENOMEM);
> +
> + resp->drv = drv;
> + resp->msg = msg;
> + resp->err = 0;
> +
> + tcs = get_tcs_from_index(drv, m);
> + if (!tcs)
> + return ERR_PTR(-EINVAL);
> +
> + assert_spin_locked(&tcs->lock);

I tried to boot the kernel with the rpmh-clk and rpmh-regulator drivers
and I kept hitting this assert.

Turns out that find_free_tcs() finds an empty TCS with index 'm' within
the tcs, then passes it to setup_response() which tries to use the 'm'
to figure out which tcs contains the TCS we're operating on.

But as 'm' is in tcs-local space and get_tcs_from_index() tries to
lookup the TCS in the global drv space we get hold of the wrong TCS.

> + tcs->responses[m - tcs->offset] = resp;
> +
> + return resp;
> +}
> +
> +static void free_response(struct tcs_response *resp)
> +{
> + kfree(resp);
> +}
> +
> +static struct tcs_response *get_response(struct rsc_drv *drv, u32 m)
> +{
> + struct tcs_group *tcs = get_tcs_from_index(drv, m);
> +
> + return tcs->responses[m - tcs->offset];
> +}
> +
> +static u32 read_tcs_reg(struct rsc_drv *drv, int reg, int m, int n)
> +{
> + return readl_relaxed(drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * m +
> +  RSC_DRV_CMD_OFFSET * n);
> +}
> +
> +static void write_tcs_reg(struct rsc_drv *drv, int reg, int m, int n, u32 
> data)
> +{
> + writel_relaxed(data, drv->tcs_base + reg + RSC_DRV_TCS_OFFSET * m +
> +RSC_DRV_CMD_OFFSET * n);

Do you really want this relaxed? Isn't the ordering of these
significant?

> +}
> +
> +static void write_tcs_reg_sync(struct rsc_drv *drv, int reg, int m, int n,
> +u32 data)
> +{
> + write_tcs_reg(drv, reg, m, n, data);
> + for (;;) {
> + if (data == read_tcs_reg(drv, reg, m, n))
> + break;
> + udelay(1);
> + }
> +}
> +
> +static bool tcs_is_free(struct rsc_drv *drv, int m)
> +{
> + return !test_bit(m, drv->tcs_in_use) &&
> +read_tcs_reg(drv, RSC_DRV_STATUS, m, 0);
> +}
> +
> +static struct tcs_group *get_tcs_of_type(struct rsc_drv *drv, int type)

According to rpmh_rsc_probe() the tcs array is indexed by "type", so you
can replace the entire function with:

return &drv->tcs[type];

> +{
> + int i;
> + struct tcs_group *tcs;
> +
> + for (i = 0; i < TCS_TYPE_NR; i++) {
> + if (type == drv->tcs[i].type)
> + break;
> + }
> +
> + if (i == TCS_TYPE_NR)
> + return ERR_PTR(-EINVAL);
> +
> + tcs = &drv->tcs[i];
> + if (!tcs->num_tcs)
> + return ERR_PTR(-EINVAL);
> +
> + return tcs;
> +}
> +
> +static struct tcs_group *get_tcs_for_msg(struct rsc_drv *drv,
> +  const struct tcs_request *msg)
> +{
> + int type;
> +
> + switch (msg->state) {
> + case RPMH_ACTIVE_ONLY_STATE:
> + type = ACTIVE_TCS;
> + break;
> + default:
> + return ERR_PTR(-EINVAL);
>