Re: [PATCH 2/3] remoteproc: Add inline coredump functionality

2020-05-14 Thread rishabhb

On 2020-05-07 13:21, Bjorn Andersson wrote:

On Thu 16 Apr 11:38 PDT 2020, Rishabh Bhatnagar wrote:


This patch adds the inline coredump functionality. The current
coredump implementation uses vmalloc area to copy all the segments.
But this might put a lot of strain on low memory targets as the
firmware size sometimes is in ten's of MBs. The situation becomes
worse if there are multiple remote processors  undergoing recovery
at the same time. This patch directly copies the device memory to
userspace buffer and avoids extra memory usage. This requires
recovery to be halted until data is read by userspace and free
function is called.

Signed-off-by: Rishabh Bhatnagar 
---
 drivers/remoteproc/remoteproc_coredump.c | 130 
+++

 drivers/remoteproc/remoteproc_internal.h |  23 +-
 include/linux/remoteproc.h   |   2 +
 3 files changed, 153 insertions(+), 2 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_coredump.c 
b/drivers/remoteproc/remoteproc_coredump.c

index 9de0467..888b7dec91 100644
--- a/drivers/remoteproc/remoteproc_coredump.c
+++ b/drivers/remoteproc/remoteproc_coredump.c
@@ -12,6 +12,84 @@
 #include 
 #include "remoteproc_internal.h"

+static void rproc_free_dump(void *data)


rproc_coredump_free()


+{
+   struct rproc_coredump_state *dump_state = data;
+
+   complete(_state->dump_done);


vfree(dump_state->header);


+}
+
+static unsigned long resolve_addr(loff_t user_offset,


rproc_coredump_find_segment()


+  struct list_head *segments,
+  unsigned long *data_left)
+{
+   struct rproc_dump_segment *segment;
+
+   list_for_each_entry(segment, segments, node) {
+   if (user_offset >= segment->size)
+   user_offset -= segment->size;
+   else
+   break;


if (user_offset < segment->size) {
*data_left = segment->size - user_offset;
return segment->da + user_offset;
}

user_offset -= segment->size;

+   }


*data_left = 0;
return 0;


+
+   if (>node == segments) {
+   *data_left = 0;
+   return 0;
+   }
+
+   *data_left = segment->size - user_offset;
+
+   return segment->da + user_offset;
+}
+
+static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t 
count,

+   void *data, size_t header_size)
+{
+   void *device_mem;
+   size_t data_left, copy_size, bytes_left = count;
+   unsigned long addr;
+   struct rproc_coredump_state *dump_state = data;
+   struct rproc *rproc = dump_state->rproc;
+   void *elfcore = dump_state->header;
+
+   /* Copy the header first */
+   if (offset < header_size) {
+   copy_size = header_size - offset;
+   copy_size = min(copy_size, bytes_left);
+
+   memcpy(buffer, elfcore + offset, copy_size);
+   offset += copy_size;
+   bytes_left -= copy_size;
+   buffer += copy_size;
+   }


Perhaps you can take inspiration from devcd_readv() here?


+
+   while (bytes_left) {
+   addr = resolve_addr(offset - header_size,
+   >dump_segments, _left);
+   /* EOF check */
+   if (data_left == 0) {


Afaict data_left denotes the amount of data left in this particular
segment, rather than in the entire core.

I think you should start by making bytes_left the minimum of the core
size and @count and then have this loop as long as bytes_left, copying
data to the buffer either from header or an appropriate segment based 
on

the current offset.


+   pr_info("Ramdump complete %lld bytes read", offset);


dev_dbg(>dev, ...)


+   break;
+   }
+
+   copy_size = min_t(size_t, bytes_left, data_left);
+
+   device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);


rproc_da_to_va()


+   if (!device_mem) {
+   pr_err("Address:%lx with size %zd out of remoteproc 
carveout\n",


dev_err(>dev, "coredump: %#lx size %#zx outside of carveouts\n",
..);


+   addr, copy_size);
+   return -ENOMEM;
+   }
+   memcpy(buffer, device_mem, copy_size);
+
+   offset += copy_size;
+   buffer += copy_size;
+   bytes_left -= copy_size;
+   }
+
+   return count - bytes_left;
+}
+
 static void create_elf_header(void *data, int phnum, struct rproc 
*rproc)

 {
struct elf32_phdr *phdr;
@@ -55,6 +133,58 @@ static void create_elf_header(void *data, int 
phnum, struct rproc *rproc)

 }

 /**
+ * rproc_inline_coredump() - perform synchronized coredump
+ * @rproc: rproc handle
+ *
+ * This function will generate an ELF header for 

Re: [PATCH 2/3] remoteproc: Add inline coredump functionality

2020-05-13 Thread Mathieu Poirier
On Mon, 11 May 2020 at 18:32, Bjorn Andersson
 wrote:
>
> On Mon 11 May 17:11 PDT 2020, risha...@codeaurora.org wrote:
>
> > On 2020-05-07 13:21, Bjorn Andersson wrote:
> > > On Thu 16 Apr 11:38 PDT 2020, Rishabh Bhatnagar wrote:
> > >
> > > > This patch adds the inline coredump functionality. The current
> > > > coredump implementation uses vmalloc area to copy all the segments.
> > > > But this might put a lot of strain on low memory targets as the
> > > > firmware size sometimes is in ten's of MBs. The situation becomes
> > > > worse if there are multiple remote processors  undergoing recovery
> > > > at the same time. This patch directly copies the device memory to
> > > > userspace buffer and avoids extra memory usage. This requires
> > > > recovery to be halted until data is read by userspace and free
> > > > function is called.
> > > >
> > > > Signed-off-by: Rishabh Bhatnagar 
> > > > ---
> > > >  drivers/remoteproc/remoteproc_coredump.c | 130
> > > > +++
> > > >  drivers/remoteproc/remoteproc_internal.h |  23 +-
> > > >  include/linux/remoteproc.h   |   2 +
> > > >  3 files changed, 153 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/remoteproc/remoteproc_coredump.c
> > > > b/drivers/remoteproc/remoteproc_coredump.c
> > > > index 9de0467..888b7dec91 100644
> > > > --- a/drivers/remoteproc/remoteproc_coredump.c
> > > > +++ b/drivers/remoteproc/remoteproc_coredump.c
> > > > @@ -12,6 +12,84 @@
> > > >  #include 
> > > >  #include "remoteproc_internal.h"
> > > >
> > > > +static void rproc_free_dump(void *data)
> > >
> > > rproc_coredump_free()
> > >
> > > > +{
> > > > + struct rproc_coredump_state *dump_state = data;
> > > > +
> > > > + complete(_state->dump_done);
> > >
> > > vfree(dump_state->header);
> > >
> > > > +}
> > > > +
> > > > +static unsigned long resolve_addr(loff_t user_offset,
> > >
> > > rproc_coredump_find_segment()
> > >
> > > > +struct list_head *segments,
> > > > +unsigned long *data_left)
> > > > +{
> > > > + struct rproc_dump_segment *segment;
> > > > +
> > > > + list_for_each_entry(segment, segments, node) {
> > > > + if (user_offset >= segment->size)
> > > > + user_offset -= segment->size;
> > > > + else
> > > > + break;
> > >
> > > if (user_offset < segment->size) {
> > > *data_left = segment->size - user_offset;
> > > return segment->da + user_offset;
> > > }
> > >
> > > user_offset -= segment->size;
> > > > + }
> > >
> > > *data_left = 0;
> > > return 0;
> > >
> > > > +
> > > > + if (>node == segments) {
> > > > + *data_left = 0;
> > > > + return 0;
> > > > + }
> > > > +
> > > > + *data_left = segment->size - user_offset;
> > > > +
> > > > + return segment->da + user_offset;
> > > > +}
> > > > +
> > > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t
> > > > count,
> > > > + void *data, size_t header_size)
> > > > +{
> > > > + void *device_mem;
> > > > + size_t data_left, copy_size, bytes_left = count;
> > > > + unsigned long addr;
> > > > + struct rproc_coredump_state *dump_state = data;
> > > > + struct rproc *rproc = dump_state->rproc;
> > > > + void *elfcore = dump_state->header;
> > > > +
> > > > + /* Copy the header first */
> > > > + if (offset < header_size) {
> > > > + copy_size = header_size - offset;
> > > > + copy_size = min(copy_size, bytes_left);
> > > > +
> > > > + memcpy(buffer, elfcore + offset, copy_size);
> > > > + offset += copy_size;
> > > > + bytes_left -= copy_size;
> > > > + buffer += copy_size;
> > > > + }
> > >
> > > Perhaps you can take inspiration from devcd_readv() here?
> > >
> > > > +
> > > > + while (bytes_left) {
> > > > + addr = resolve_addr(offset - header_size,
> > > > + >dump_segments, _left);
> > > > + /* EOF check */
> > > > + if (data_left == 0) {
> > >
> > > Afaict data_left denotes the amount of data left in this particular
> > > segment, rather than in the entire core.
> > >
> > Yes, but it only returns 0 when the final segment has been copied
> > completely.  Otherwise it gives data left to copy for every segment
> > and moves to next segment once the current one is copied.
>
> You're right.

I remember spending a lot of time looking at this function and now
Bjorn has stumbled on it as well.  As such either a redesign or adding
a generous amount of comments is in order.

Thanks,
Mathieu

>
> > > I think you should start by making bytes_left the minimum of the core
> > > size and @count and then have this loop as long as bytes_left, copying
> > > data to the buffer either from header or an appropriate segment based on
> > > the current offset.
> > >
> > That would require an extra function that calculates entire core size,
> > as 

Re: [PATCH 2/3] remoteproc: Add inline coredump functionality

2020-05-11 Thread Bjorn Andersson
On Mon 11 May 17:41 PDT 2020, risha...@codeaurora.org wrote:

> On 2020-05-11 17:30, Bjorn Andersson wrote:
> > On Mon 11 May 17:11 PDT 2020, risha...@codeaurora.org wrote:
> > > On 2020-05-07 13:21, Bjorn Andersson wrote:
> > > > On Thu 16 Apr 11:38 PDT 2020, Rishabh Bhatnagar wrote:
> > > > > diff --git a/drivers/remoteproc/remoteproc_coredump.c
> > > > > b/drivers/remoteproc/remoteproc_coredump.c
[..]
> > > > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t
> > > > > count,
> > > > > + void *data, size_t header_size)
> > > > > +{
> > > > > + void *device_mem;
> > > > > + size_t data_left, copy_size, bytes_left = count;
> > > > > + unsigned long addr;
> > > > > + struct rproc_coredump_state *dump_state = data;
> > > > > + struct rproc *rproc = dump_state->rproc;
> > > > > + void *elfcore = dump_state->header;
> > > > > +
> > > > > + /* Copy the header first */
> > > > > + if (offset < header_size) {
> > > > > + copy_size = header_size - offset;
> > > > > + copy_size = min(copy_size, bytes_left);
> > > > > +
> > > > > + memcpy(buffer, elfcore + offset, copy_size);
> > > > > + offset += copy_size;
> > > > > + bytes_left -= copy_size;
> > > > > + buffer += copy_size;
> > > > > + }
> > > >
> > > > Perhaps you can take inspiration from devcd_readv() here?
> > > >
> > > > > +
> > > > > + while (bytes_left) {
> > > > > + addr = resolve_addr(offset - header_size,
> > > > > + >dump_segments, _left);
> > > > > + /* EOF check */
> > > > > + if (data_left == 0) {
> > > >
> > > > Afaict data_left denotes the amount of data left in this particular
> > > > segment, rather than in the entire core.
> > > >
> > > Yes, but it only returns 0 when the final segment has been copied
> > > completely.  Otherwise it gives data left to copy for every segment
> > > and moves to next segment once the current one is copied.
> > 
> > You're right.
> > 
> > > > I think you should start by making bytes_left the minimum of the core
> > > > size and @count and then have this loop as long as bytes_left, copying
> > > > data to the buffer either from header or an appropriate segment based on
> > > > the current offset.
> > > >
> > > That would require an extra function that calculates entire core size,
> > > as its not available right now. Do you see any missed corner cases
> > > with this
> > > approach?
> > 
> > You're looping over all the segments as you're building the header
> > anyways, so you could simply store this in the dump_state. I think this
> > depend more on the ability to reuse the read function between inline and
> > default coredump.
> > 
> > Regards,
> > Bjorn
> 
> Wouldn't the first if condition take care of "default" dump as it is?
> The header_size in that case would involve the 'header + all segments'.

Correct.

Regards,
Bjorn


Re: [PATCH 2/3] remoteproc: Add inline coredump functionality

2020-05-11 Thread rishabhb

On 2020-05-11 17:30, Bjorn Andersson wrote:

On Mon 11 May 17:11 PDT 2020, risha...@codeaurora.org wrote:


On 2020-05-07 13:21, Bjorn Andersson wrote:
> On Thu 16 Apr 11:38 PDT 2020, Rishabh Bhatnagar wrote:
>
> > This patch adds the inline coredump functionality. The current
> > coredump implementation uses vmalloc area to copy all the segments.
> > But this might put a lot of strain on low memory targets as the
> > firmware size sometimes is in ten's of MBs. The situation becomes
> > worse if there are multiple remote processors  undergoing recovery
> > at the same time. This patch directly copies the device memory to
> > userspace buffer and avoids extra memory usage. This requires
> > recovery to be halted until data is read by userspace and free
> > function is called.
> >
> > Signed-off-by: Rishabh Bhatnagar 
> > ---
> >  drivers/remoteproc/remoteproc_coredump.c | 130
> > +++
> >  drivers/remoteproc/remoteproc_internal.h |  23 +-
> >  include/linux/remoteproc.h   |   2 +
> >  3 files changed, 153 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/remoteproc/remoteproc_coredump.c
> > b/drivers/remoteproc/remoteproc_coredump.c
> > index 9de0467..888b7dec91 100644
> > --- a/drivers/remoteproc/remoteproc_coredump.c
> > +++ b/drivers/remoteproc/remoteproc_coredump.c
> > @@ -12,6 +12,84 @@
> >  #include 
> >  #include "remoteproc_internal.h"
> >
> > +static void rproc_free_dump(void *data)
>
> rproc_coredump_free()
>
> > +{
> > + struct rproc_coredump_state *dump_state = data;
> > +
> > + complete(_state->dump_done);
>
> vfree(dump_state->header);
>
> > +}
> > +
> > +static unsigned long resolve_addr(loff_t user_offset,
>
> rproc_coredump_find_segment()
>
> > +struct list_head *segments,
> > +unsigned long *data_left)
> > +{
> > + struct rproc_dump_segment *segment;
> > +
> > + list_for_each_entry(segment, segments, node) {
> > + if (user_offset >= segment->size)
> > + user_offset -= segment->size;
> > + else
> > + break;
>
>if (user_offset < segment->size) {
>*data_left = segment->size - user_offset;
>return segment->da + user_offset;
>}
>
>user_offset -= segment->size;
> > + }
>
>*data_left = 0;
>return 0;
>
> > +
> > + if (>node == segments) {
> > + *data_left = 0;
> > + return 0;
> > + }
> > +
> > + *data_left = segment->size - user_offset;
> > +
> > + return segment->da + user_offset;
> > +}
> > +
> > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t
> > count,
> > + void *data, size_t header_size)
> > +{
> > + void *device_mem;
> > + size_t data_left, copy_size, bytes_left = count;
> > + unsigned long addr;
> > + struct rproc_coredump_state *dump_state = data;
> > + struct rproc *rproc = dump_state->rproc;
> > + void *elfcore = dump_state->header;
> > +
> > + /* Copy the header first */
> > + if (offset < header_size) {
> > + copy_size = header_size - offset;
> > + copy_size = min(copy_size, bytes_left);
> > +
> > + memcpy(buffer, elfcore + offset, copy_size);
> > + offset += copy_size;
> > + bytes_left -= copy_size;
> > + buffer += copy_size;
> > + }
>
> Perhaps you can take inspiration from devcd_readv() here?
>
> > +
> > + while (bytes_left) {
> > + addr = resolve_addr(offset - header_size,
> > + >dump_segments, _left);
> > + /* EOF check */
> > + if (data_left == 0) {
>
> Afaict data_left denotes the amount of data left in this particular
> segment, rather than in the entire core.
>
Yes, but it only returns 0 when the final segment has been copied
completely.  Otherwise it gives data left to copy for every segment
and moves to next segment once the current one is copied.


You're right.


> I think you should start by making bytes_left the minimum of the core
> size and @count and then have this loop as long as bytes_left, copying
> data to the buffer either from header or an appropriate segment based on
> the current offset.
>
That would require an extra function that calculates entire core size,
as its not available right now. Do you see any missed corner cases 
with this

approach?


You're looping over all the segments as you're building the header
anyways, so you could simply store this in the dump_state. I think this
depend more on the ability to reuse the read function between inline 
and

default coredump.

Regards,
Bjorn


Wouldn't the first if condition take care of "default" dump as it is?
The header_size in that case would involve the 'header + all segments'.



> > + pr_info("Ramdump complete %lld bytes read", offset);
>

Re: [PATCH 2/3] remoteproc: Add inline coredump functionality

2020-05-11 Thread Bjorn Andersson
On Mon 11 May 17:11 PDT 2020, risha...@codeaurora.org wrote:

> On 2020-05-07 13:21, Bjorn Andersson wrote:
> > On Thu 16 Apr 11:38 PDT 2020, Rishabh Bhatnagar wrote:
> > 
> > > This patch adds the inline coredump functionality. The current
> > > coredump implementation uses vmalloc area to copy all the segments.
> > > But this might put a lot of strain on low memory targets as the
> > > firmware size sometimes is in ten's of MBs. The situation becomes
> > > worse if there are multiple remote processors  undergoing recovery
> > > at the same time. This patch directly copies the device memory to
> > > userspace buffer and avoids extra memory usage. This requires
> > > recovery to be halted until data is read by userspace and free
> > > function is called.
> > > 
> > > Signed-off-by: Rishabh Bhatnagar 
> > > ---
> > >  drivers/remoteproc/remoteproc_coredump.c | 130
> > > +++
> > >  drivers/remoteproc/remoteproc_internal.h |  23 +-
> > >  include/linux/remoteproc.h   |   2 +
> > >  3 files changed, 153 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/remoteproc/remoteproc_coredump.c
> > > b/drivers/remoteproc/remoteproc_coredump.c
> > > index 9de0467..888b7dec91 100644
> > > --- a/drivers/remoteproc/remoteproc_coredump.c
> > > +++ b/drivers/remoteproc/remoteproc_coredump.c
> > > @@ -12,6 +12,84 @@
> > >  #include 
> > >  #include "remoteproc_internal.h"
> > > 
> > > +static void rproc_free_dump(void *data)
> > 
> > rproc_coredump_free()
> > 
> > > +{
> > > + struct rproc_coredump_state *dump_state = data;
> > > +
> > > + complete(_state->dump_done);
> > 
> > vfree(dump_state->header);
> > 
> > > +}
> > > +
> > > +static unsigned long resolve_addr(loff_t user_offset,
> > 
> > rproc_coredump_find_segment()
> > 
> > > +struct list_head *segments,
> > > +unsigned long *data_left)
> > > +{
> > > + struct rproc_dump_segment *segment;
> > > +
> > > + list_for_each_entry(segment, segments, node) {
> > > + if (user_offset >= segment->size)
> > > + user_offset -= segment->size;
> > > + else
> > > + break;
> > 
> > if (user_offset < segment->size) {
> > *data_left = segment->size - user_offset;
> > return segment->da + user_offset;
> > }
> > 
> > user_offset -= segment->size;
> > > + }
> > 
> > *data_left = 0;
> > return 0;
> > 
> > > +
> > > + if (>node == segments) {
> > > + *data_left = 0;
> > > + return 0;
> > > + }
> > > +
> > > + *data_left = segment->size - user_offset;
> > > +
> > > + return segment->da + user_offset;
> > > +}
> > > +
> > > +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t
> > > count,
> > > + void *data, size_t header_size)
> > > +{
> > > + void *device_mem;
> > > + size_t data_left, copy_size, bytes_left = count;
> > > + unsigned long addr;
> > > + struct rproc_coredump_state *dump_state = data;
> > > + struct rproc *rproc = dump_state->rproc;
> > > + void *elfcore = dump_state->header;
> > > +
> > > + /* Copy the header first */
> > > + if (offset < header_size) {
> > > + copy_size = header_size - offset;
> > > + copy_size = min(copy_size, bytes_left);
> > > +
> > > + memcpy(buffer, elfcore + offset, copy_size);
> > > + offset += copy_size;
> > > + bytes_left -= copy_size;
> > > + buffer += copy_size;
> > > + }
> > 
> > Perhaps you can take inspiration from devcd_readv() here?
> > 
> > > +
> > > + while (bytes_left) {
> > > + addr = resolve_addr(offset - header_size,
> > > + >dump_segments, _left);
> > > + /* EOF check */
> > > + if (data_left == 0) {
> > 
> > Afaict data_left denotes the amount of data left in this particular
> > segment, rather than in the entire core.
> > 
> Yes, but it only returns 0 when the final segment has been copied
> completely.  Otherwise it gives data left to copy for every segment
> and moves to next segment once the current one is copied.

You're right.

> > I think you should start by making bytes_left the minimum of the core
> > size and @count and then have this loop as long as bytes_left, copying
> > data to the buffer either from header or an appropriate segment based on
> > the current offset.
> > 
> That would require an extra function that calculates entire core size,
> as its not available right now. Do you see any missed corner cases with this
> approach?

You're looping over all the segments as you're building the header
anyways, so you could simply store this in the dump_state. I think this
depend more on the ability to reuse the read function between inline and
default coredump.

Regards,
Bjorn

> > > + pr_info("Ramdump complete %lld bytes read", offset);
> > 
> > dev_dbg(>dev, ...)
> > 
> > > + break;
> > > + 

Re: [PATCH 2/3] remoteproc: Add inline coredump functionality

2020-05-11 Thread rishabhb

On 2020-05-07 13:21, Bjorn Andersson wrote:

On Thu 16 Apr 11:38 PDT 2020, Rishabh Bhatnagar wrote:


This patch adds the inline coredump functionality. The current
coredump implementation uses vmalloc area to copy all the segments.
But this might put a lot of strain on low memory targets as the
firmware size sometimes is in ten's of MBs. The situation becomes
worse if there are multiple remote processors  undergoing recovery
at the same time. This patch directly copies the device memory to
userspace buffer and avoids extra memory usage. This requires
recovery to be halted until data is read by userspace and free
function is called.

Signed-off-by: Rishabh Bhatnagar 
---
 drivers/remoteproc/remoteproc_coredump.c | 130 
+++

 drivers/remoteproc/remoteproc_internal.h |  23 +-
 include/linux/remoteproc.h   |   2 +
 3 files changed, 153 insertions(+), 2 deletions(-)

diff --git a/drivers/remoteproc/remoteproc_coredump.c 
b/drivers/remoteproc/remoteproc_coredump.c

index 9de0467..888b7dec91 100644
--- a/drivers/remoteproc/remoteproc_coredump.c
+++ b/drivers/remoteproc/remoteproc_coredump.c
@@ -12,6 +12,84 @@
 #include 
 #include "remoteproc_internal.h"

+static void rproc_free_dump(void *data)


rproc_coredump_free()


+{
+   struct rproc_coredump_state *dump_state = data;
+
+   complete(_state->dump_done);


vfree(dump_state->header);


+}
+
+static unsigned long resolve_addr(loff_t user_offset,


rproc_coredump_find_segment()


+  struct list_head *segments,
+  unsigned long *data_left)
+{
+   struct rproc_dump_segment *segment;
+
+   list_for_each_entry(segment, segments, node) {
+   if (user_offset >= segment->size)
+   user_offset -= segment->size;
+   else
+   break;


if (user_offset < segment->size) {
*data_left = segment->size - user_offset;
return segment->da + user_offset;
}

user_offset -= segment->size;

+   }


*data_left = 0;
return 0;


+
+   if (>node == segments) {
+   *data_left = 0;
+   return 0;
+   }
+
+   *data_left = segment->size - user_offset;
+
+   return segment->da + user_offset;
+}
+
+static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t 
count,

+   void *data, size_t header_size)
+{
+   void *device_mem;
+   size_t data_left, copy_size, bytes_left = count;
+   unsigned long addr;
+   struct rproc_coredump_state *dump_state = data;
+   struct rproc *rproc = dump_state->rproc;
+   void *elfcore = dump_state->header;
+
+   /* Copy the header first */
+   if (offset < header_size) {
+   copy_size = header_size - offset;
+   copy_size = min(copy_size, bytes_left);
+
+   memcpy(buffer, elfcore + offset, copy_size);
+   offset += copy_size;
+   bytes_left -= copy_size;
+   buffer += copy_size;
+   }


Perhaps you can take inspiration from devcd_readv() here?


+
+   while (bytes_left) {
+   addr = resolve_addr(offset - header_size,
+   >dump_segments, _left);
+   /* EOF check */
+   if (data_left == 0) {


Afaict data_left denotes the amount of data left in this particular
segment, rather than in the entire core.

Yes, but it only returns 0 when the final segment has been copied 
completely.
Otherwise it gives data left to copy for every segment and moves to next 
segment

once the current one is copied.

I think you should start by making bytes_left the minimum of the core
size and @count and then have this loop as long as bytes_left, copying
data to the buffer either from header or an appropriate segment based 
on

the current offset.


That would require an extra function that calculates entire core size,
as its not available right now. Do you see any missed corner cases with 
this

approach?

+   pr_info("Ramdump complete %lld bytes read", offset);


dev_dbg(>dev, ...)


+   break;
+   }
+
+   copy_size = min_t(size_t, bytes_left, data_left);
+
+   device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);


rproc_da_to_va()


+   if (!device_mem) {
+   pr_err("Address:%lx with size %zd out of remoteproc 
carveout\n",


dev_err(>dev, "coredump: %#lx size %#zx outside of carveouts\n",
..);


+   addr, copy_size);
+   return -ENOMEM;
+   }
+   memcpy(buffer, device_mem, copy_size);
+
+   offset += copy_size;
+   buffer += copy_size;
+   bytes_left -= copy_size;
+   }
+
+   return count - bytes_left;
+}
+
 

Re: [PATCH 2/3] remoteproc: Add inline coredump functionality

2020-05-07 Thread Bjorn Andersson
On Thu 16 Apr 11:38 PDT 2020, Rishabh Bhatnagar wrote:

> This patch adds the inline coredump functionality. The current
> coredump implementation uses vmalloc area to copy all the segments.
> But this might put a lot of strain on low memory targets as the
> firmware size sometimes is in ten's of MBs. The situation becomes
> worse if there are multiple remote processors  undergoing recovery
> at the same time. This patch directly copies the device memory to
> userspace buffer and avoids extra memory usage. This requires
> recovery to be halted until data is read by userspace and free
> function is called.
> 
> Signed-off-by: Rishabh Bhatnagar 
> ---
>  drivers/remoteproc/remoteproc_coredump.c | 130 
> +++
>  drivers/remoteproc/remoteproc_internal.h |  23 +-
>  include/linux/remoteproc.h   |   2 +
>  3 files changed, 153 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/remoteproc/remoteproc_coredump.c 
> b/drivers/remoteproc/remoteproc_coredump.c
> index 9de0467..888b7dec91 100644
> --- a/drivers/remoteproc/remoteproc_coredump.c
> +++ b/drivers/remoteproc/remoteproc_coredump.c
> @@ -12,6 +12,84 @@
>  #include 
>  #include "remoteproc_internal.h"
>  
> +static void rproc_free_dump(void *data)

rproc_coredump_free()

> +{
> + struct rproc_coredump_state *dump_state = data;
> +
> + complete(_state->dump_done);

vfree(dump_state->header);

> +}
> +
> +static unsigned long resolve_addr(loff_t user_offset,

rproc_coredump_find_segment()

> +struct list_head *segments,
> +unsigned long *data_left)
> +{
> + struct rproc_dump_segment *segment;
> +
> + list_for_each_entry(segment, segments, node) {
> + if (user_offset >= segment->size)
> + user_offset -= segment->size;
> + else
> + break;

if (user_offset < segment->size) {
*data_left = segment->size - user_offset;
return segment->da + user_offset;
}

user_offset -= segment->size;
> + }

*data_left = 0;
return 0;

> +
> + if (>node == segments) {
> + *data_left = 0;
> + return 0;
> + }
> +
> + *data_left = segment->size - user_offset;
> +
> + return segment->da + user_offset;
> +}
> +
> +static ssize_t rproc_read_dump(char *buffer, loff_t offset, size_t count,
> + void *data, size_t header_size)
> +{
> + void *device_mem;
> + size_t data_left, copy_size, bytes_left = count;
> + unsigned long addr;
> + struct rproc_coredump_state *dump_state = data;
> + struct rproc *rproc = dump_state->rproc;
> + void *elfcore = dump_state->header;
> +
> + /* Copy the header first */
> + if (offset < header_size) {
> + copy_size = header_size - offset;
> + copy_size = min(copy_size, bytes_left);
> +
> + memcpy(buffer, elfcore + offset, copy_size);
> + offset += copy_size;
> + bytes_left -= copy_size;
> + buffer += copy_size;
> + }

Perhaps you can take inspiration from devcd_readv() here?

> +
> + while (bytes_left) {
> + addr = resolve_addr(offset - header_size,
> + >dump_segments, _left);
> + /* EOF check */
> + if (data_left == 0) {

Afaict data_left denotes the amount of data left in this particular
segment, rather than in the entire core.

I think you should start by making bytes_left the minimum of the core
size and @count and then have this loop as long as bytes_left, copying
data to the buffer either from header or an appropriate segment based on
the current offset.

> + pr_info("Ramdump complete %lld bytes read", offset);

dev_dbg(>dev, ...)

> + break;
> + }
> +
> + copy_size = min_t(size_t, bytes_left, data_left);
> +
> + device_mem = rproc->ops->da_to_va(rproc, addr, copy_size);

rproc_da_to_va()

> + if (!device_mem) {
> + pr_err("Address:%lx with size %zd out of remoteproc 
> carveout\n",

dev_err(>dev, "coredump: %#lx size %#zx outside of carveouts\n",
..);

> + addr, copy_size);
> + return -ENOMEM;
> + }
> + memcpy(buffer, device_mem, copy_size);
> +
> + offset += copy_size;
> + buffer += copy_size;
> + bytes_left -= copy_size;
> + }
> +
> + return count - bytes_left;
> +}
> +
>  static void create_elf_header(void *data, int phnum, struct rproc *rproc)
>  {
>   struct elf32_phdr *phdr;
> @@ -55,6 +133,58 @@ static void create_elf_header(void *data, int phnum, 
> struct rproc *rproc)
>  }
>  
>  /**
> + * rproc_inline_coredump() - perform synchronized coredump
> + * @rproc:   rproc handle
> + *
>