Re: sparc64 softraid boot: workaround for memory leak

2018-04-03 Thread Mike Larkin
On Thu, Mar 29, 2018 at 09:21:16AM +0200, Mark Kettenis wrote:
> > Date: Wed, 28 Mar 2018 23:46:49 -0700
> > From: Mike Larkin 
> > 
> > On Thu, Mar 29, 2018 at 08:40:27AM +0200, Stefan Sperling wrote:
> > > On Mon, Mar 12, 2018 at 11:38:14AM +0100, Stefan Sperling wrote:
> > > > I haven't had any feedback on the previous diff. However, I felt it
> > > > was a bit of a hack, so I tried to come up with a cleaner solution.
> > > 
> > > Anyone? Can this go in now? I hope to get this tested across
> > > many sparc64 machines during the 6.4 release cycle.
> > > 
> > 
> > Wish I could help but my T5240 has never been able to run any ldoms. 
> > Something
> > about mismatched firmware versions. ldomctl(8) just generates interrupt
> > storms.
> 
> Even with all the fixes that stsp@ made during the 6.3 release cycle?
> 

It does look like some things have been fixed. I still can't launch ldoms
but I believe this due to some local error, not anything in the code. All the
commands work now, it's just that after reboot the ldom isn't started.

I'll see if I can diagnose more this week, but that machine is a beast and
really noisy.

-ml

> > > > The diff below opens the softraid boot chunk early on and keeps the
> > > > handle open until a kernel has been loaded from the softraid volume.
> > > > This means sr_strategy() does not have to worry about obtaining a
> > > > handle from the firmware anymore.
> > > > 
> > > > Also consolidate some repeated lines of code in sr_strategy().
> > > > 
> > > > Tested on a T5220 ldom guest (CRYPTO) and a v240 machine (RAID1).
> > > > 
> > > > Index: boot.c
> > > > ===
> > > > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/boot.c,v
> > > > retrieving revision 1.27
> > > > diff -u -p -r1.27 boot.c
> > > > --- boot.c  11 Sep 2016 17:53:26 -  1.27
> > > > +++ boot.c  12 Mar 2018 10:26:23 -
> > > > @@ -248,6 +248,8 @@ loadfile(int fd, char *args)
> > > > close(fd);
> > > >  
> > > >  #ifdef SOFTRAID
> > > > +   if (bootdev_dip)
> > > > +   OF_close(bootdev_dip->sr_handle);
> > > > sr_clear_keys();
> > > >  #endif
> > > > chain(entry, args, ssym, esym);
> > > > @@ -283,12 +285,11 @@ fail:
> > > >  }
> > > >  
> > > >  #ifdef SOFTRAID
> > > > -/* Set bootdev_dip to the software boot volume, if specified. */
> > > > +/* Set bootdev_dip to the softraid boot volume, if specified. */
> > > >  static int
> > > >  srbootdev(const char *bootline)
> > > >  {
> > > > struct sr_boot_volume *bv;
> > > > -   struct diskinfo *dip;
> > > > int unit;
> > > >  
> > > > bootdev_dip = NULL;
> > > > @@ -320,9 +321,23 @@ srbootdev(const char *bootline)
> > > > return EPERM;
> > > >  
> > > > if (bv->sbv_diskinfo == NULL) {
> > > > +   struct sr_boot_chunk *bc;
> > > > +   struct diskinfo *dip, *bc_dip;
> > > > +   int sr_handle;
> > > > +
> > > > +   /* All reads will come from the boot chunk. */
> > > > +   bc = sr_vol_boot_chunk(bv);
> > > > +   if (bc == NULL)
> > > > +   return ENXIO;
> > > > +   bc_dip = (struct diskinfo *)bc->sbc_diskinfo;
> > > > +   sr_handle = OF_open(bc_dip->path);
> > > > +   if (sr_handle == -1)
> > > > +   return EIO;
> > > > +
> > > > dip = alloc(sizeof(struct diskinfo));
> > > > bzero(dip, sizeof(*dip));
> > > > dip->sr_vol = bv;
> > > > +   dip->sr_handle = sr_handle;
> > > > bv->sbv_diskinfo = dip;
> > > > }
> > > >  
> > > > @@ -331,7 +346,8 @@ srbootdev(const char *bootline)
> > > >  
> > > > /* Attempt to read disklabel. */
> > > > bv->sbv_part = 'c';
> > > > -   if (sr_getdisklabel(bv, >disklabel)) {
> > > > +   if (sr_getdisklabel(bv, _dip->disklabel)) {
> > > > +   OF_close(bootdev_dip->sr_handle);
> > > > free(bv->sbv_diskinfo, sizeof(struct diskinfo));
> > > > bv->sbv_diskinfo = NULL;
> > > > bootdev_dip = NULL;
> > > > Index: disk.h
> > > > ===
> > > > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/disk.h,v
> > > > retrieving revision 1.1
> > > > diff -u -p -r1.1 disk.h
> > > > --- disk.h  26 Nov 2014 20:30:41 -  1.1
> > > > +++ disk.h  12 Mar 2018 10:26:23 -
> > > > @@ -36,7 +36,10 @@
> > > >  struct diskinfo {
> > > > char path[256];
> > > > struct disklabel disklabel;
> > > > +
> > > > +   /* Used during softraid boot. */
> > > >

Re: sparc64 softraid boot: workaround for memory leak

2018-03-29 Thread Mike Larkin
On Thu, Mar 29, 2018 at 09:21:16AM +0200, Mark Kettenis wrote:
> > Date: Wed, 28 Mar 2018 23:46:49 -0700
> > From: Mike Larkin 
> > 
> > On Thu, Mar 29, 2018 at 08:40:27AM +0200, Stefan Sperling wrote:
> > > On Mon, Mar 12, 2018 at 11:38:14AM +0100, Stefan Sperling wrote:
> > > > I haven't had any feedback on the previous diff. However, I felt it
> > > > was a bit of a hack, so I tried to come up with a cleaner solution.
> > > 
> > > Anyone? Can this go in now? I hope to get this tested across
> > > many sparc64 machines during the 6.4 release cycle.
> > > 
> > 
> > Wish I could help but my T5240 has never been able to run any ldoms. 
> > Something
> > about mismatched firmware versions. ldomctl(8) just generates interrupt
> > storms.
> 
> Even with all the fixes that stsp@ made during the 6.3 release cycle?
> 

As of about a month ago, I still couldn't run any ldoms. But since you mention
it, I'll try again tomorrow!

-ml

> > > > The diff below opens the softraid boot chunk early on and keeps the
> > > > handle open until a kernel has been loaded from the softraid volume.
> > > > This means sr_strategy() does not have to worry about obtaining a
> > > > handle from the firmware anymore.
> > > > 
> > > > Also consolidate some repeated lines of code in sr_strategy().
> > > > 
> > > > Tested on a T5220 ldom guest (CRYPTO) and a v240 machine (RAID1).
> > > > 
> > > > Index: boot.c
> > > > ===
> > > > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/boot.c,v
> > > > retrieving revision 1.27
> > > > diff -u -p -r1.27 boot.c
> > > > --- boot.c  11 Sep 2016 17:53:26 -  1.27
> > > > +++ boot.c  12 Mar 2018 10:26:23 -
> > > > @@ -248,6 +248,8 @@ loadfile(int fd, char *args)
> > > > close(fd);
> > > >  
> > > >  #ifdef SOFTRAID
> > > > +   if (bootdev_dip)
> > > > +   OF_close(bootdev_dip->sr_handle);
> > > > sr_clear_keys();
> > > >  #endif
> > > > chain(entry, args, ssym, esym);
> > > > @@ -283,12 +285,11 @@ fail:
> > > >  }
> > > >  
> > > >  #ifdef SOFTRAID
> > > > -/* Set bootdev_dip to the software boot volume, if specified. */
> > > > +/* Set bootdev_dip to the softraid boot volume, if specified. */
> > > >  static int
> > > >  srbootdev(const char *bootline)
> > > >  {
> > > > struct sr_boot_volume *bv;
> > > > -   struct diskinfo *dip;
> > > > int unit;
> > > >  
> > > > bootdev_dip = NULL;
> > > > @@ -320,9 +321,23 @@ srbootdev(const char *bootline)
> > > > return EPERM;
> > > >  
> > > > if (bv->sbv_diskinfo == NULL) {
> > > > +   struct sr_boot_chunk *bc;
> > > > +   struct diskinfo *dip, *bc_dip;
> > > > +   int sr_handle;
> > > > +
> > > > +   /* All reads will come from the boot chunk. */
> > > > +   bc = sr_vol_boot_chunk(bv);
> > > > +   if (bc == NULL)
> > > > +   return ENXIO;
> > > > +   bc_dip = (struct diskinfo *)bc->sbc_diskinfo;
> > > > +   sr_handle = OF_open(bc_dip->path);
> > > > +   if (sr_handle == -1)
> > > > +   return EIO;
> > > > +
> > > > dip = alloc(sizeof(struct diskinfo));
> > > > bzero(dip, sizeof(*dip));
> > > > dip->sr_vol = bv;
> > > > +   dip->sr_handle = sr_handle;
> > > > bv->sbv_diskinfo = dip;
> > > > }
> > > >  
> > > > @@ -331,7 +346,8 @@ srbootdev(const char *bootline)
> > > >  
> > > > /* Attempt to read disklabel. */
> > > > bv->sbv_part = 'c';
> > > > -   if (sr_getdisklabel(bv, >disklabel)) {
> > > > +   if (sr_getdisklabel(bv, _dip->disklabel)) {
> > > > +   OF_close(bootdev_dip->sr_handle);
> > > > free(bv->sbv_diskinfo, sizeof(struct diskinfo));
> > > > bv->sbv_diskinfo = NULL;
> > > > bootdev_dip = NULL;
> > > > Index: disk.h
> > > > ===
> > > > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/disk.h,v
> > > > retrieving revision 1.1
> > > > diff -u -p -r1.1 disk.h
> > > > --- disk.h  26 Nov 2014 20:30:41 -  1.1
> > > > +++ disk.h  12 Mar 2018 10:26:23 -
> > > > @@ -36,7 +36,10 @@
> > > >  struct diskinfo {
> > > > char path[256];
> > > > struct disklabel disklabel;
> > > > +
> > > > +   /* Used during softraid boot. */
> > > > struct sr_boot_volume *sr_vol;
> > > > +   int sr_handle; /* open handle for reading from boot chunk */
> > > >  
> > > > TAILQ_ENTRY(diskinfo) list;
> > > >  };
> > > > Index: ofdev.c
> 

Re: sparc64 softraid boot: workaround for memory leak

2018-03-29 Thread Mark Kettenis
> Date: Wed, 28 Mar 2018 23:46:49 -0700
> From: Mike Larkin 
> 
> On Thu, Mar 29, 2018 at 08:40:27AM +0200, Stefan Sperling wrote:
> > On Mon, Mar 12, 2018 at 11:38:14AM +0100, Stefan Sperling wrote:
> > > I haven't had any feedback on the previous diff. However, I felt it
> > > was a bit of a hack, so I tried to come up with a cleaner solution.
> > 
> > Anyone? Can this go in now? I hope to get this tested across
> > many sparc64 machines during the 6.4 release cycle.
> > 
> 
> Wish I could help but my T5240 has never been able to run any ldoms. Something
> about mismatched firmware versions. ldomctl(8) just generates interrupt
> storms.

Even with all the fixes that stsp@ made during the 6.3 release cycle?

> > > The diff below opens the softraid boot chunk early on and keeps the
> > > handle open until a kernel has been loaded from the softraid volume.
> > > This means sr_strategy() does not have to worry about obtaining a
> > > handle from the firmware anymore.
> > > 
> > > Also consolidate some repeated lines of code in sr_strategy().
> > > 
> > > Tested on a T5220 ldom guest (CRYPTO) and a v240 machine (RAID1).
> > > 
> > > Index: boot.c
> > > ===
> > > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/boot.c,v
> > > retrieving revision 1.27
> > > diff -u -p -r1.27 boot.c
> > > --- boot.c11 Sep 2016 17:53:26 -  1.27
> > > +++ boot.c12 Mar 2018 10:26:23 -
> > > @@ -248,6 +248,8 @@ loadfile(int fd, char *args)
> > >   close(fd);
> > >  
> > >  #ifdef SOFTRAID
> > > + if (bootdev_dip)
> > > + OF_close(bootdev_dip->sr_handle);
> > >   sr_clear_keys();
> > >  #endif
> > >   chain(entry, args, ssym, esym);
> > > @@ -283,12 +285,11 @@ fail:
> > >  }
> > >  
> > >  #ifdef SOFTRAID
> > > -/* Set bootdev_dip to the software boot volume, if specified. */
> > > +/* Set bootdev_dip to the softraid boot volume, if specified. */
> > >  static int
> > >  srbootdev(const char *bootline)
> > >  {
> > >   struct sr_boot_volume *bv;
> > > - struct diskinfo *dip;
> > >   int unit;
> > >  
> > >   bootdev_dip = NULL;
> > > @@ -320,9 +321,23 @@ srbootdev(const char *bootline)
> > >   return EPERM;
> > >  
> > >   if (bv->sbv_diskinfo == NULL) {
> > > + struct sr_boot_chunk *bc;
> > > + struct diskinfo *dip, *bc_dip;
> > > + int sr_handle;
> > > +
> > > + /* All reads will come from the boot chunk. */
> > > + bc = sr_vol_boot_chunk(bv);
> > > + if (bc == NULL)
> > > + return ENXIO;
> > > + bc_dip = (struct diskinfo *)bc->sbc_diskinfo;
> > > + sr_handle = OF_open(bc_dip->path);
> > > + if (sr_handle == -1)
> > > + return EIO;
> > > +
> > >   dip = alloc(sizeof(struct diskinfo));
> > >   bzero(dip, sizeof(*dip));
> > >   dip->sr_vol = bv;
> > > + dip->sr_handle = sr_handle;
> > >   bv->sbv_diskinfo = dip;
> > >   }
> > >  
> > > @@ -331,7 +346,8 @@ srbootdev(const char *bootline)
> > >  
> > >   /* Attempt to read disklabel. */
> > >   bv->sbv_part = 'c';
> > > - if (sr_getdisklabel(bv, >disklabel)) {
> > > + if (sr_getdisklabel(bv, _dip->disklabel)) {
> > > + OF_close(bootdev_dip->sr_handle);
> > >   free(bv->sbv_diskinfo, sizeof(struct diskinfo));
> > >   bv->sbv_diskinfo = NULL;
> > >   bootdev_dip = NULL;
> > > Index: disk.h
> > > ===
> > > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/disk.h,v
> > > retrieving revision 1.1
> > > diff -u -p -r1.1 disk.h
> > > --- disk.h26 Nov 2014 20:30:41 -  1.1
> > > +++ disk.h12 Mar 2018 10:26:23 -
> > > @@ -36,7 +36,10 @@
> > >  struct diskinfo {
> > >   char path[256];
> > >   struct disklabel disklabel;
> > > +
> > > + /* Used during softraid boot. */
> > >   struct sr_boot_volume *sr_vol;
> > > + int sr_handle; /* open handle for reading from boot chunk */
> > >  
> > >   TAILQ_ENTRY(diskinfo) list;
> > >  };
> > > Index: ofdev.c
> > > ===
> > > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/ofdev.c,v
> > > retrieving revision 1.25
> > > diff -u -p -r1.25 ofdev.c
> > > --- ofdev.c   1 Oct 2015 16:08:20 -   1.25
> > > +++ ofdev.c   12 Mar 2018 10:26:23 -
> > > @@ -125,8 +125,8 @@ strategy(void *devdata, int rw, daddr32_
> > >  #ifdef SOFTRAID
> > >   /* Intercept strategy for softraid volumes. */
> > >   if (dev->type == OFDEV_SOFTRAID)
> > > - return sr_strategy(bootdev_dip->sr_vol, rw,
> > > - blk, size, buf, rsize);
> > > + return 

Re: sparc64 softraid boot: workaround for memory leak

2018-03-29 Thread Mark Kettenis
> From: Stefan Sperling 
> 
> On Mon, Mar 12, 2018 at 11:38:14AM +0100, Stefan Sperling wrote:
> > I haven't had any feedback on the previous diff. However, I felt it
> > was a bit of a hack, so I tried to come up with a cleaner solution.
> 
> Anyone? Can this go in now? I hope to get this tested across
> many sparc64 machines during the 6.4 release cycle.

ok kettenis@

> > The diff below opens the softraid boot chunk early on and keeps the
> > handle open until a kernel has been loaded from the softraid volume.
> > This means sr_strategy() does not have to worry about obtaining a
> > handle from the firmware anymore.
> > 
> > Also consolidate some repeated lines of code in sr_strategy().
> > 
> > Tested on a T5220 ldom guest (CRYPTO) and a v240 machine (RAID1).
> > 
> > Index: boot.c
> > ===
> > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/boot.c,v
> > retrieving revision 1.27
> > diff -u -p -r1.27 boot.c
> > --- boot.c  11 Sep 2016 17:53:26 -  1.27
> > +++ boot.c  12 Mar 2018 10:26:23 -
> > @@ -248,6 +248,8 @@ loadfile(int fd, char *args)
> > close(fd);
> >  
> >  #ifdef SOFTRAID
> > +   if (bootdev_dip)
> > +   OF_close(bootdev_dip->sr_handle);
> > sr_clear_keys();
> >  #endif
> > chain(entry, args, ssym, esym);
> > @@ -283,12 +285,11 @@ fail:
> >  }
> >  
> >  #ifdef SOFTRAID
> > -/* Set bootdev_dip to the software boot volume, if specified. */
> > +/* Set bootdev_dip to the softraid boot volume, if specified. */
> >  static int
> >  srbootdev(const char *bootline)
> >  {
> > struct sr_boot_volume *bv;
> > -   struct diskinfo *dip;
> > int unit;
> >  
> > bootdev_dip = NULL;
> > @@ -320,9 +321,23 @@ srbootdev(const char *bootline)
> > return EPERM;
> >  
> > if (bv->sbv_diskinfo == NULL) {
> > +   struct sr_boot_chunk *bc;
> > +   struct diskinfo *dip, *bc_dip;
> > +   int sr_handle;
> > +
> > +   /* All reads will come from the boot chunk. */
> > +   bc = sr_vol_boot_chunk(bv);
> > +   if (bc == NULL)
> > +   return ENXIO;
> > +   bc_dip = (struct diskinfo *)bc->sbc_diskinfo;
> > +   sr_handle = OF_open(bc_dip->path);
> > +   if (sr_handle == -1)
> > +   return EIO;
> > +
> > dip = alloc(sizeof(struct diskinfo));
> > bzero(dip, sizeof(*dip));
> > dip->sr_vol = bv;
> > +   dip->sr_handle = sr_handle;
> > bv->sbv_diskinfo = dip;
> > }
> >  
> > @@ -331,7 +346,8 @@ srbootdev(const char *bootline)
> >  
> > /* Attempt to read disklabel. */
> > bv->sbv_part = 'c';
> > -   if (sr_getdisklabel(bv, >disklabel)) {
> > +   if (sr_getdisklabel(bv, _dip->disklabel)) {
> > +   OF_close(bootdev_dip->sr_handle);
> > free(bv->sbv_diskinfo, sizeof(struct diskinfo));
> > bv->sbv_diskinfo = NULL;
> > bootdev_dip = NULL;
> > Index: disk.h
> > ===
> > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/disk.h,v
> > retrieving revision 1.1
> > diff -u -p -r1.1 disk.h
> > --- disk.h  26 Nov 2014 20:30:41 -  1.1
> > +++ disk.h  12 Mar 2018 10:26:23 -
> > @@ -36,7 +36,10 @@
> >  struct diskinfo {
> > char path[256];
> > struct disklabel disklabel;
> > +
> > +   /* Used during softraid boot. */
> > struct sr_boot_volume *sr_vol;
> > +   int sr_handle; /* open handle for reading from boot chunk */
> >  
> > TAILQ_ENTRY(diskinfo) list;
> >  };
> > Index: ofdev.c
> > ===
> > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/ofdev.c,v
> > retrieving revision 1.25
> > diff -u -p -r1.25 ofdev.c
> > --- ofdev.c 1 Oct 2015 16:08:20 -   1.25
> > +++ ofdev.c 12 Mar 2018 10:26:23 -
> > @@ -125,8 +125,8 @@ strategy(void *devdata, int rw, daddr32_
> >  #ifdef SOFTRAID
> > /* Intercept strategy for softraid volumes. */
> > if (dev->type == OFDEV_SOFTRAID)
> > -   return sr_strategy(bootdev_dip->sr_vol, rw,
> > -   blk, size, buf, rsize);
> > +   return sr_strategy(bootdev_dip->sr_vol, bootdev_dip->sr_handle,
> > +   rw, blk, size, buf, rsize);
> >  #endif
> > if (dev->type != OFDEV_DISK)
> > panic("strategy");
> > Index: softraid_sparc64.c
> > ===
> > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/softraid_sparc64.c,v
> > retrieving revision 1.2
> > diff -u -p -r1.2 softraid_sparc64.c
> > --- softraid_sparc64.c  11 Sep 2016 17:53:26 -  1.2
> > +++ 

Re: sparc64 softraid boot: workaround for memory leak

2018-03-29 Thread Mike Larkin
On Thu, Mar 29, 2018 at 08:40:27AM +0200, Stefan Sperling wrote:
> On Mon, Mar 12, 2018 at 11:38:14AM +0100, Stefan Sperling wrote:
> > I haven't had any feedback on the previous diff. However, I felt it
> > was a bit of a hack, so I tried to come up with a cleaner solution.
> 
> Anyone? Can this go in now? I hope to get this tested across
> many sparc64 machines during the 6.4 release cycle.
> 

Wish I could help but my T5240 has never been able to run any ldoms. Something
about mismatched firmware versions. ldomctl(8) just generates interrupt
storms.

-ml

> > The diff below opens the softraid boot chunk early on and keeps the
> > handle open until a kernel has been loaded from the softraid volume.
> > This means sr_strategy() does not have to worry about obtaining a
> > handle from the firmware anymore.
> > 
> > Also consolidate some repeated lines of code in sr_strategy().
> > 
> > Tested on a T5220 ldom guest (CRYPTO) and a v240 machine (RAID1).
> > 
> > Index: boot.c
> > ===
> > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/boot.c,v
> > retrieving revision 1.27
> > diff -u -p -r1.27 boot.c
> > --- boot.c  11 Sep 2016 17:53:26 -  1.27
> > +++ boot.c  12 Mar 2018 10:26:23 -
> > @@ -248,6 +248,8 @@ loadfile(int fd, char *args)
> > close(fd);
> >  
> >  #ifdef SOFTRAID
> > +   if (bootdev_dip)
> > +   OF_close(bootdev_dip->sr_handle);
> > sr_clear_keys();
> >  #endif
> > chain(entry, args, ssym, esym);
> > @@ -283,12 +285,11 @@ fail:
> >  }
> >  
> >  #ifdef SOFTRAID
> > -/* Set bootdev_dip to the software boot volume, if specified. */
> > +/* Set bootdev_dip to the softraid boot volume, if specified. */
> >  static int
> >  srbootdev(const char *bootline)
> >  {
> > struct sr_boot_volume *bv;
> > -   struct diskinfo *dip;
> > int unit;
> >  
> > bootdev_dip = NULL;
> > @@ -320,9 +321,23 @@ srbootdev(const char *bootline)
> > return EPERM;
> >  
> > if (bv->sbv_diskinfo == NULL) {
> > +   struct sr_boot_chunk *bc;
> > +   struct diskinfo *dip, *bc_dip;
> > +   int sr_handle;
> > +
> > +   /* All reads will come from the boot chunk. */
> > +   bc = sr_vol_boot_chunk(bv);
> > +   if (bc == NULL)
> > +   return ENXIO;
> > +   bc_dip = (struct diskinfo *)bc->sbc_diskinfo;
> > +   sr_handle = OF_open(bc_dip->path);
> > +   if (sr_handle == -1)
> > +   return EIO;
> > +
> > dip = alloc(sizeof(struct diskinfo));
> > bzero(dip, sizeof(*dip));
> > dip->sr_vol = bv;
> > +   dip->sr_handle = sr_handle;
> > bv->sbv_diskinfo = dip;
> > }
> >  
> > @@ -331,7 +346,8 @@ srbootdev(const char *bootline)
> >  
> > /* Attempt to read disklabel. */
> > bv->sbv_part = 'c';
> > -   if (sr_getdisklabel(bv, >disklabel)) {
> > +   if (sr_getdisklabel(bv, _dip->disklabel)) {
> > +   OF_close(bootdev_dip->sr_handle);
> > free(bv->sbv_diskinfo, sizeof(struct diskinfo));
> > bv->sbv_diskinfo = NULL;
> > bootdev_dip = NULL;
> > Index: disk.h
> > ===
> > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/disk.h,v
> > retrieving revision 1.1
> > diff -u -p -r1.1 disk.h
> > --- disk.h  26 Nov 2014 20:30:41 -  1.1
> > +++ disk.h  12 Mar 2018 10:26:23 -
> > @@ -36,7 +36,10 @@
> >  struct diskinfo {
> > char path[256];
> > struct disklabel disklabel;
> > +
> > +   /* Used during softraid boot. */
> > struct sr_boot_volume *sr_vol;
> > +   int sr_handle; /* open handle for reading from boot chunk */
> >  
> > TAILQ_ENTRY(diskinfo) list;
> >  };
> > Index: ofdev.c
> > ===
> > RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/ofdev.c,v
> > retrieving revision 1.25
> > diff -u -p -r1.25 ofdev.c
> > --- ofdev.c 1 Oct 2015 16:08:20 -   1.25
> > +++ ofdev.c 12 Mar 2018 10:26:23 -
> > @@ -125,8 +125,8 @@ strategy(void *devdata, int rw, daddr32_
> >  #ifdef SOFTRAID
> > /* Intercept strategy for softraid volumes. */
> > if (dev->type == OFDEV_SOFTRAID)
> > -   return sr_strategy(bootdev_dip->sr_vol, rw,
> > -   blk, size, buf, rsize);
> > +   return sr_strategy(bootdev_dip->sr_vol, bootdev_dip->sr_handle,
> > +   rw, blk, size, buf, rsize);
> >  #endif
> > if (dev->type != OFDEV_DISK)
> > panic("strategy");
> > Index: softraid_sparc64.c
> > ===
> > RCS file: 

Re: sparc64 softraid boot: workaround for memory leak

2018-03-29 Thread Stefan Sperling
On Mon, Mar 12, 2018 at 11:38:14AM +0100, Stefan Sperling wrote:
> I haven't had any feedback on the previous diff. However, I felt it
> was a bit of a hack, so I tried to come up with a cleaner solution.

Anyone? Can this go in now? I hope to get this tested across
many sparc64 machines during the 6.4 release cycle.

> The diff below opens the softraid boot chunk early on and keeps the
> handle open until a kernel has been loaded from the softraid volume.
> This means sr_strategy() does not have to worry about obtaining a
> handle from the firmware anymore.
> 
> Also consolidate some repeated lines of code in sr_strategy().
> 
> Tested on a T5220 ldom guest (CRYPTO) and a v240 machine (RAID1).
> 
> Index: boot.c
> ===
> RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/boot.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 boot.c
> --- boot.c11 Sep 2016 17:53:26 -  1.27
> +++ boot.c12 Mar 2018 10:26:23 -
> @@ -248,6 +248,8 @@ loadfile(int fd, char *args)
>   close(fd);
>  
>  #ifdef SOFTRAID
> + if (bootdev_dip)
> + OF_close(bootdev_dip->sr_handle);
>   sr_clear_keys();
>  #endif
>   chain(entry, args, ssym, esym);
> @@ -283,12 +285,11 @@ fail:
>  }
>  
>  #ifdef SOFTRAID
> -/* Set bootdev_dip to the software boot volume, if specified. */
> +/* Set bootdev_dip to the softraid boot volume, if specified. */
>  static int
>  srbootdev(const char *bootline)
>  {
>   struct sr_boot_volume *bv;
> - struct diskinfo *dip;
>   int unit;
>  
>   bootdev_dip = NULL;
> @@ -320,9 +321,23 @@ srbootdev(const char *bootline)
>   return EPERM;
>  
>   if (bv->sbv_diskinfo == NULL) {
> + struct sr_boot_chunk *bc;
> + struct diskinfo *dip, *bc_dip;
> + int sr_handle;
> +
> + /* All reads will come from the boot chunk. */
> + bc = sr_vol_boot_chunk(bv);
> + if (bc == NULL)
> + return ENXIO;
> + bc_dip = (struct diskinfo *)bc->sbc_diskinfo;
> + sr_handle = OF_open(bc_dip->path);
> + if (sr_handle == -1)
> + return EIO;
> +
>   dip = alloc(sizeof(struct diskinfo));
>   bzero(dip, sizeof(*dip));
>   dip->sr_vol = bv;
> + dip->sr_handle = sr_handle;
>   bv->sbv_diskinfo = dip;
>   }
>  
> @@ -331,7 +346,8 @@ srbootdev(const char *bootline)
>  
>   /* Attempt to read disklabel. */
>   bv->sbv_part = 'c';
> - if (sr_getdisklabel(bv, >disklabel)) {
> + if (sr_getdisklabel(bv, _dip->disklabel)) {
> + OF_close(bootdev_dip->sr_handle);
>   free(bv->sbv_diskinfo, sizeof(struct diskinfo));
>   bv->sbv_diskinfo = NULL;
>   bootdev_dip = NULL;
> Index: disk.h
> ===
> RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/disk.h,v
> retrieving revision 1.1
> diff -u -p -r1.1 disk.h
> --- disk.h26 Nov 2014 20:30:41 -  1.1
> +++ disk.h12 Mar 2018 10:26:23 -
> @@ -36,7 +36,10 @@
>  struct diskinfo {
>   char path[256];
>   struct disklabel disklabel;
> +
> + /* Used during softraid boot. */
>   struct sr_boot_volume *sr_vol;
> + int sr_handle; /* open handle for reading from boot chunk */
>  
>   TAILQ_ENTRY(diskinfo) list;
>  };
> Index: ofdev.c
> ===
> RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/ofdev.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 ofdev.c
> --- ofdev.c   1 Oct 2015 16:08:20 -   1.25
> +++ ofdev.c   12 Mar 2018 10:26:23 -
> @@ -125,8 +125,8 @@ strategy(void *devdata, int rw, daddr32_
>  #ifdef SOFTRAID
>   /* Intercept strategy for softraid volumes. */
>   if (dev->type == OFDEV_SOFTRAID)
> - return sr_strategy(bootdev_dip->sr_vol, rw,
> - blk, size, buf, rsize);
> + return sr_strategy(bootdev_dip->sr_vol, bootdev_dip->sr_handle,
> + rw, blk, size, buf, rsize);
>  #endif
>   if (dev->type != OFDEV_DISK)
>   panic("strategy");
> Index: softraid_sparc64.c
> ===
> RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/softraid_sparc64.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 softraid_sparc64.c
> --- softraid_sparc64.c11 Sep 2016 17:53:26 -  1.2
> +++ softraid_sparc64.c12 Mar 2018 10:26:23 -
> @@ -306,9 +306,25 @@ srprobe(void)
>   free(md, SR_META_SIZE * DEV_BSIZE);
>  }
>  
> +struct sr_boot_chunk *
> +sr_vol_boot_chunk(struct 

Re: sparc64 softraid boot: workaround for memory leak

2018-03-12 Thread Stefan Sperling
On Sun, Mar 04, 2018 at 05:02:45PM +0100, Stefan Sperling wrote:
> On my T5220 LDOM guests cannot boot from softraid because ofwboot
> crashes with a "Fast Data Access MMU Miss" while loading the kernel:
> 
> >> OpenBSD BOOT 1.9   
> ERROR: /iscsi-hba: No iscsi-network-bootpath property  
> sr0*
> Passphrase:   
> Booting sr0:a/bsd   
> 8480888@0x100
> ERROR: Last Trap: Fast Data Access MMU Miss 
> 
> I've tracked down the failure to a crash in OF_open() called in sr_strategy().
> There is no missing OF_close() call. So it seems a memory/resource leak
> of some kind is happening in firmware during OF_open()/OF_close().
> 
> Affected firmware version info:
> 
>   SP firmware 3.0.12.8.a
>   SP firmware build number: 108523
>   SP firmware date: Fri Mar 11 07:19:16 PST 2016
>   SP filesystem version: 0.1.22
> 
> hypervisor_version = Hypervisor 1.10.7.h 2016/03/11 07:13
> obp_version = OpenBoot 4.33.6.g 2016/03/11 06:05
> post_version = POST 4.33.6.g 2016/03/11 06:15
> status = OpenBSD running
> sysfw_version = Sun System Firmware 7.4.10.a 2016/03/11 07:45
> 
> The diff below allows a guest to boot from softraid on this machine.

I haven't had any feedback on the previous diff. However, I felt it
was a bit of a hack, so I tried to come up with a cleaner solution.

The diff below opens the softraid boot chunk early on and keeps the
handle open until a kernel has been loaded from the softraid volume.
This means sr_strategy() does not have to worry about obtaining a
handle from the firmware anymore.

Also consolidate some repeated lines of code in sr_strategy().

Tested on a T5220 ldom guest (CRYPTO) and a v240 machine (RAID1).

Index: boot.c
===
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/boot.c,v
retrieving revision 1.27
diff -u -p -r1.27 boot.c
--- boot.c  11 Sep 2016 17:53:26 -  1.27
+++ boot.c  12 Mar 2018 10:26:23 -
@@ -248,6 +248,8 @@ loadfile(int fd, char *args)
close(fd);
 
 #ifdef SOFTRAID
+   if (bootdev_dip)
+   OF_close(bootdev_dip->sr_handle);
sr_clear_keys();
 #endif
chain(entry, args, ssym, esym);
@@ -283,12 +285,11 @@ fail:
 }
 
 #ifdef SOFTRAID
-/* Set bootdev_dip to the software boot volume, if specified. */
+/* Set bootdev_dip to the softraid boot volume, if specified. */
 static int
 srbootdev(const char *bootline)
 {
struct sr_boot_volume *bv;
-   struct diskinfo *dip;
int unit;
 
bootdev_dip = NULL;
@@ -320,9 +321,23 @@ srbootdev(const char *bootline)
return EPERM;
 
if (bv->sbv_diskinfo == NULL) {
+   struct sr_boot_chunk *bc;
+   struct diskinfo *dip, *bc_dip;
+   int sr_handle;
+
+   /* All reads will come from the boot chunk. */
+   bc = sr_vol_boot_chunk(bv);
+   if (bc == NULL)
+   return ENXIO;
+   bc_dip = (struct diskinfo *)bc->sbc_diskinfo;
+   sr_handle = OF_open(bc_dip->path);
+   if (sr_handle == -1)
+   return EIO;
+
dip = alloc(sizeof(struct diskinfo));
bzero(dip, sizeof(*dip));
dip->sr_vol = bv;
+   dip->sr_handle = sr_handle;
bv->sbv_diskinfo = dip;
}
 
@@ -331,7 +346,8 @@ srbootdev(const char *bootline)
 
/* Attempt to read disklabel. */
bv->sbv_part = 'c';
-   if (sr_getdisklabel(bv, >disklabel)) {
+   if (sr_getdisklabel(bv, _dip->disklabel)) {
+   OF_close(bootdev_dip->sr_handle);
free(bv->sbv_diskinfo, sizeof(struct diskinfo));
bv->sbv_diskinfo = NULL;
bootdev_dip = NULL;
Index: disk.h
===
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/disk.h,v
retrieving revision 1.1
diff -u -p -r1.1 disk.h
--- disk.h  26 Nov 2014 20:30:41 -  1.1
+++ disk.h  12 Mar 2018 10:26:23 -
@@ -36,7 +36,10 @@
 struct diskinfo {
char path[256];
struct disklabel disklabel;
+
+   /* Used during softraid boot. */
struct sr_boot_volume *sr_vol;
+   int sr_handle; /* open handle for reading from boot chunk */
 
TAILQ_ENTRY(diskinfo) list;
 };
Index: ofdev.c
===
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/ofdev.c,v
retrieving revision 1.25
diff -u -p -r1.25 ofdev.c
--- ofdev.c 1 Oct 2015 16:08:20 -  

sparc64 softraid boot: workaround for memory leak

2018-03-04 Thread Stefan Sperling
On my T5220 LDOM guests cannot boot from softraid because ofwboot
crashes with a "Fast Data Access MMU Miss" while loading the kernel:

>> OpenBSD BOOT 1.9   
ERROR: /iscsi-hba: No iscsi-network-bootpath property  
sr0*
Passphrase:   
Booting sr0:a/bsd   
8480888@0x100
ERROR: Last Trap: Fast Data Access MMU Miss 

I've tracked down the failure to a crash in OF_open() called in sr_strategy().
There is no missing OF_close() call. So it seems a memory/resource leak
of some kind is happening in firmware during OF_open()/OF_close().

Affected firmware version info:

SP firmware 3.0.12.8.a
SP firmware build number: 108523
SP firmware date: Fri Mar 11 07:19:16 PST 2016
SP filesystem version: 0.1.22

hypervisor_version = Hypervisor 1.10.7.h 2016/03/11 07:13
obp_version = OpenBoot 4.33.6.g 2016/03/11 06:05
post_version = POST 4.33.6.g 2016/03/11 06:15
status = OpenBSD running
sysfw_version = Sun System Firmware 7.4.10.a 2016/03/11 07:45

The diff below allows a guest to boot from softraid on this machine.

ok?

Index: disk.h
===
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/disk.h,v
retrieving revision 1.1
diff -u -p -r1.1 disk.h
--- disk.h  26 Nov 2014 20:30:41 -  1.1
+++ disk.h  4 Mar 2018 15:56:59 -
@@ -37,6 +37,9 @@ struct diskinfo {
char path[256];
struct disklabel disklabel;
struct sr_boot_volume *sr_vol;
+   int ihandle;
+   int flags;
+#define DISKINFO_FLAG_OPEN 0x1
 
TAILQ_ENTRY(diskinfo) list;
 };
Index: ofdev.c
===
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/ofdev.c,v
retrieving revision 1.25
diff -u -p -r1.25 ofdev.c
--- ofdev.c 1 Oct 2015 16:08:20 -   1.25
+++ ofdev.c 4 Mar 2018 15:56:59 -
@@ -165,6 +165,7 @@ devclose(struct open_file *of)
 #endif
 #ifdef SOFTRAID
if (op->type == OFDEV_SOFTRAID) {
+   sr_close_volume(bootdev_dip->sr_vol);
op->handle = -1;
return 0;
}
Index: softraid_sparc64.c
===
RCS file: /cvs/src/sys/arch/sparc64/stand/ofwboot/softraid_sparc64.c,v
retrieving revision 1.2
diff -u -p -r1.2 softraid_sparc64.c
--- softraid_sparc64.c  11 Sep 2016 17:53:26 -  1.2
+++ softraid_sparc64.c  4 Mar 2018 15:56:59 -
@@ -306,6 +306,22 @@ srprobe(void)
free(md, SR_META_SIZE * DEV_BSIZE);
 }
 
+/* 
+ * Cache ihandle to work around memory leaks in firmware
+ * with repeated OF_open/OF_close calls.
+ */
+int
+sr_open_disk(struct diskinfo *dip)
+{
+   if ((dip->flags & DISKINFO_FLAG_OPEN) == 0) {
+   dip->ihandle = OF_open(dip->path);
+   if (dip->ihandle != -1)
+   dip->flags |= DISKINFO_FLAG_OPEN;
+   }
+
+   return dip->ihandle;
+}
+
 int
 sr_strategy(struct sr_boot_volume *bv, int rw, daddr32_t blk, size_t size,
 void *buf, size_t *rsize)
@@ -347,7 +363,7 @@ sr_strategy(struct sr_boot_volume *bv, i
blk += bv->sbv_data_blkno;
 
/* XXX - If I/O failed we should try another chunk... */
-   ihandle = OF_open(dip->path);
+   ihandle = sr_open_disk(dip);
if (ihandle == -1)
return EIO;
bzero(, sizeof(ofdev));
@@ -356,7 +372,6 @@ sr_strategy(struct sr_boot_volume *bv, i
ofdev.bsize = DEV_BSIZE;
ofdev.partoff = DL_GETPOFFSET(pp);
err = strategy(, rw, blk, size, buf, rsize);
-   OF_close(ihandle);
return err;
 
} else if (bv->sbv_level == 'C') {
@@ -371,7 +386,7 @@ sr_strategy(struct sr_boot_volume *bv, i
dip = (struct diskinfo *)bc->sbc_diskinfo;
pp = >disklabel.d_partitions[bc->sbc_part - 'a'];
 
-   ihandle = OF_open(dip->path);
+   ihandle = sr_open_disk(dip);
if (ihandle == -1)
return EIO;
bzero(, sizeof(ofdev));
@@ -395,7 +410,6 @@ sr_strategy(struct sr_boot_volume *bv, i
printf("Read from crypto volume failed "
"(read %d bytes): %s\n", *rsize,
strerror(err));
-   OF_close(ihandle);
return err;
}
bcopy(, iv, sizeof(blkno));
@@ -404,11 +418,30 @@ sr_strategy(struct sr_boot_volume *bv, i
aes_xts_decrypt(, bp + j);
}
*rsize = nsect * DEV_BSIZE;
-   OF_close(ihandle);
return err;