Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-19 Thread Gleb Natapov
On Mon, Nov 15, 2010 at 08:29:24PM +, Blue Swirl wrote:
> 2010/11/15 Gleb Natapov :
> > On Sun, Nov 14, 2010 at 10:50:13PM +, Blue Swirl wrote:
> >> On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov  wrote:
> >> >
> >> > Signed-off-by: Gleb Natapov 
> >> > ---
> >> >  hw/fw_cfg.c |   14 ++
> >> >  hw/fw_cfg.h |    4 +++-
> >> >  sysemu.h    |    1 +
> >> >  vl.c        |   51 +++
> >> >  4 files changed, 69 insertions(+), 1 deletions(-)
> >> >
> >> > diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> >> > index 7b9434f..f6a67db 100644
> >> > --- a/hw/fw_cfg.c
> >> > +++ b/hw/fw_cfg.c
> >> > @@ -53,6 +53,7 @@ struct FWCfgState {
> >> >     FWCfgFiles *files;
> >> >     uint16_t cur_entry;
> >> >     uint32_t cur_offset;
> >> > +    Notifier machine_ready;
> >> >  };
> >> >
> >> >  static void fw_cfg_write(FWCfgState *s, uint8_t value)
> >> > @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
> >> > *filename, uint8_t *data,
> >> >     return 1;
> >> >  }
> >> >
> >> > +static void fw_cfg_machine_ready(struct Notifier* n)
> >> > +{
> >> > +    uint32_t len;
> >> > +    char *bootindex = get_boot_devices_list(&len);
> >> > +
> >> > +    fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
> >> > +                     FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
> >> > +}
> >> > +
> >> >  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
> >> >                         target_phys_addr_t ctl_addr, target_phys_addr_t 
> >> > data_addr)
> >> >  {
> >> > @@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t 
> >> > data_port,
> >> >     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
> >> >     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
> >> >
> >> > +
> >> > +    s->machine_ready.notify = fw_cfg_machine_ready;
> >> > +    qemu_add_machine_init_done_notifier(&s->machine_ready);
> >> > +
> >> >     return s;
> >> >  }
> >> >
> >> > diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
> >> > index 856bf91..4d61410 100644
> >> > --- a/hw/fw_cfg.h
> >> > +++ b/hw/fw_cfg.h
> >> > @@ -30,7 +30,9 @@
> >> >
> >> >  #define FW_CFG_FILE_FIRST       0x20
> >> >  #define FW_CFG_FILE_SLOTS       0x10
> >> > -#define FW_CFG_MAX_ENTRY        (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> >> > +#define FW_CFG_FILE_LAST_SLOT   (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> >> > +#define FW_CFG_BOOTINDEX        (FW_CFG_FILE_LAST_SLOT + 1)
> >> > +#define FW_CFG_MAX_ENTRY        FW_CFG_BOOTINDEX
> >>
> >> This should be
> >> #define FW_CFG_MAX_ENTRY        (FW_CFG_BOOTINDEX + 1)
> >> because the check is like this:
> >>     if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
> >>         s->cur_entry = FW_CFG_INVALID;
> >>
> > Yeah, will fix.
> >
> >> With that change, I got the bootindex passed to OpenBIOS:
> >> OpenBIOS for Sparc64
> >> Configuration device id QEMU version 1 machine id 0
> >> kernel cmdline
> >> CPUs: 1 x SUNW,UltraSPARC-IIi
> >> UUID: ----
> >> bootindex num_strings 1
> >> bootindex /p...@01fe/i...@5/dr...@1/d...@0
> >>
> >> The device path does not match exactly, but it's close:
> >> /p...@1fe,0/pci-...@5/i...@600/d...@0
> >
> > pbm->pci should be solvable by the patch at the end. Were in the spec
> > it is allowed to abbreviate 1fe as 1fe,0? Spec allows to drop
> > starting zeroes but TARGET_FMT_plx definition in targphys.h has 0 after
> > %. I can define another one without leading zeroes. Can you suggest
> > a name?
> 
> I think OpenBIOS for Sparc64 is not correct here, so it may be a bad
> reference architecture. OBP on a real Ultra-5 used a path like this:
> /p...@1f,0/p...@1,1/i...@3/d...@0,0
> 
> p...@1f,0 specifies the PCI host bridge at UPA bus port ID of 0x1f.
According to device name qemu creates pci controller is memory mapped
at address 1fe and by looking at the code I can see that this
is indeed the case. How is UPA naming works?

> p...@1,1 specifies a PCI-PCI bridge.
> 
> > TARGET_FMT_lx is poisoned. As of ATA there is no open firmware
> > binding spec for ATA, so everyone does what he pleases. I based my
> > implementation on what open firmware showing when running on qemu x86.
> > "pci-ata" should be "ide" according to PCI binding spec :)
> 
> Yes, for example there is no ATA in the Ultra-5 tree but in UltraAX it exists:
> /p...@1f,4000/i...@3/a...@0,0/c...@0,0
> 
> > diff --git a/hw/apb_pci.c b/hw/apb_pci.c
> > index c619112..643aa49 100644
> > --- a/hw/apb_pci.c
> > +++ b/hw/apb_pci.c
> > @@ -453,6 +453,7 @@ static PCIDeviceInfo pbm_pci_host_info = {
> >
> >  static SysBusDeviceInfo pbm_host_info = {
> >     .qdev.name = "pbm",
> > +    .qdev.fw_name = "pci",
> 
> Perhaps the FW path should use device class names if no name is specified.
What do you mean by "device class name". We can do something like this:
if (dev->child_bus.lh_first)
return dev->child_bus.lh_first->info->name;

i.e if there is child bus use its bus n

Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-19 Thread Kevin O'Connor
On Tue, Nov 16, 2010 at 09:22:45AM +0200, Gleb Natapov wrote:
> On Mon, Nov 15, 2010 at 09:52:19PM -0500, Kevin O'Connor wrote:
> > I also have an ulterior motive here.  If the boot order is exposed as
> > a newline separated list via an entry in QEMU_CFG_FILE_DIR, then this
> > becomes free for coreboot users as well.  (On coreboot, the boot order
> > could be placed in a "file" in flash with no change to the seabios
> > code.)
> > 
> You can define get_boot_order() function and implement it differently
> for qemu and coreboot. For coreboot it will be one linear. Just call
> cbfs_copyfile("bootorder"). BTW why newline separation is important? 

Sure, but it'd be nice to just use romfile_copy("bootorder").

Using newline separated just makes it easier for users to vi and/or
cat the file.

-Kevin
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-18 Thread Gleb Natapov
On Thu, Nov 18, 2010 at 03:12:02PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 18, 2010 at 02:37:08PM +0200, Gleb Natapov wrote:
> > > So that's unavoidable if we think paths are correct.
> > > But if we know they are wrong, we are better off
> > > correcting them first IMO.
> > > 
> > They are correct for x86. My patch set does not even tries to cover all
> > HW. If sparc want to use them to it better be fixed. Or if there is enough
> > info in the path to determine device it may choose to use it as is.
> 
> Fair enough I guess.
> 
> > > > But the problem exists only if migration happens in a short window
> > > > between start of the boot process and BIOS reading boot order string.
> > > > After reboot new qemu should have new BIOS.
> > > 
> > > That makes it even more nasty, doesn't it?
> > No.
> 
> Nasty as in hard to reproduce.
> 
It is very easy to reproduce if you know what you are looking for :).
Just stick sleep() in correct place in the BIOS.
 
--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-18 Thread Michael S. Tsirkin
On Thu, Nov 18, 2010 at 02:37:08PM +0200, Gleb Natapov wrote:
> > So that's unavoidable if we think paths are correct.
> > But if we know they are wrong, we are better off
> > correcting them first IMO.
> > 
> They are correct for x86. My patch set does not even tries to cover all
> HW. If sparc want to use them to it better be fixed. Or if there is enough
> info in the path to determine device it may choose to use it as is.

Fair enough I guess.

> > > But the problem exists only if migration happens in a short window
> > > between start of the boot process and BIOS reading boot order string.
> > > After reboot new qemu should have new BIOS.
> > 
> > That makes it even more nasty, doesn't it?
> No.

Nasty as in hard to reproduce.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-18 Thread Gleb Natapov
On Thu, Nov 18, 2010 at 02:23:20PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 18, 2010 at 02:16:05PM +0200, Gleb Natapov wrote:
> > On Thu, Nov 18, 2010 at 01:52:30PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Nov 18, 2010 at 01:45:04PM +0200, Gleb Natapov wrote:
> > > > On Thu, Nov 18, 2010 at 01:38:31PM +0200, Michael S. Tsirkin wrote:
> > > > > On Thu, Nov 18, 2010 at 12:18:27PM +0200, Gleb Natapov wrote:
> > > > > > On Wed, Nov 17, 2010 at 09:54:27PM +, Blue Swirl wrote:
> > > > > > > 2010/11/16 Gleb Natapov :
> > > > > > > > On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote:
> > > > > > > >> >> Perhaps the FW path should use device class names if no 
> > > > > > > >> >> name is specified.
> > > > > > > >> > What do you mean by "device class name". We can do something 
> > > > > > > >> > like this:
> > > > > > > >> > if (dev->child_bus.lh_first)
> > > > > > > >> >        return dev->child_bus.lh_first->info->name;
> > > > > > > >> >
> > > > > > > >> > i.e if there is child bus use its bus name as fw name. This 
> > > > > > > >> > will make
> > > > > > > >> > all pci devices to have "pci" as fw name automatically. The 
> > > > > > > >> > problem is
> > > > > > > >> > that theoretically same device can provide different buses.
> > > > > > > >>
> > > > > > > >> I meant PCI class name, like "display" for display controllers,
> > > > > > > >> "network" for NICs etc.
> > > > > > > >>
> > > > > > > > That is what my pci bus related patch is doing already.
> > > > > > > >
> > > > > > > >> >> I'll try Sparc32 to see how this fits there.
> > > > > > > >>
> > > > > > > >> Except bootindex is not implemented for SCSI.
> > > > > > > > Will look into adding it.
> > > > > > > 
> > > > > > > Thanks. The bootindex on Sparc32 looks like this:
> > > > > > > bootindex /e...@7880/d...@1,0
> > > > > > > /ether...@/ethernet-...@0
> > > > > > > 
> > > > > > For arches other then x86 there is a lot of work left to be done :)
> > > > > > For starter exotic sparc buses should get their own 
> > > > > > get_fw_dev_path()
> > > > > > implementation.
> > > > > > 
> > > > > > > I don't think I got Lance setup right.
> > > > > > > 
> > > > > > > OF paths for the devices would be:
> > > > > > > /io...@0,1000/s...@0,10001000/esp...@5,840/e...@5,880/s...@1,0
> > > > > > > /io...@0,1000/s...@0,10001000/le...@5,8400010/l...@5,8c0
> > > > > > If qdev hierarchy does not correspond to real HW there is no much 
> > > > > > we can
> > > > > > do expect for fixing qdev.
> > > > > 
> > > > > That's bad.  This raises a concern: if these paths expose qdev
> > > > > internals, any attempt to fix this will break migration.
> > > > > 
> > > > The path expose internal HW hierarchy. It is designed to do so. Qdev
> > > > designed to do the same: describe HW hierarchy. If qdev fails to do so 
> > > > it
> > > > is broken.
> > > 
> > > Yes. But since you use qdev to build up the path, a broken
> > > qdev will give you a broken path.
> > > 
> > Qdev bug. Fix it like any other bug. The nice is that when you compare
> > device path produced by qdev and real HW you can see when qdev is wrong.
> > 
> > > > I do not see connection to migration at all since the path is
> > > > not used in migration code.
> > > 
> > > The connection is that if we pass the list with path 1 which you define
> > > as broken to BIOS, then migrate to a machine with an updated qemu
> > > which has a correct path, BIOS won't be able to complete the boot.
> > You solve it like you solve all such issue with -M machine type.
> 
> So that's unavoidable if we think paths are correct.
> But if we know they are wrong, we are better off
> correcting them first IMO.
> 
They are correct for x86. My patch set does not even tries to cover all
HW. If sparc want to use them to it better be fixed. Or if there is enough
info in the path to determine device it may choose to use it as is.

> > But the problem exists only if migration happens in a short window
> > between start of the boot process and BIOS reading boot order string.
> > After reboot new qemu should have new BIOS.
> 
> That makes it even more nasty, doesn't it?
No.

> 
> > > Right? Same in reverse direction.
> > Reverse direction is not and never was supported.
> > 
> > > As solution could be a fuzzy matching
> > > of paths that wiull let us recover.
> > > 
> > Firmware can try its best of course, but nothing is guarantied.
> 
> No I mean qemu could do matching fuzzily.
> This way if we get a path from the old BIOS we can
> survive.
Qemu does not take paths from BIOS so I don't know what are you talking
about here.

> 
> > > > > > > 
> > > > > > > The logic for ESP is that ESP (registers at 0x7880, slot 
> > > > > > > offset
> > > > > > > 0x88) is handled by the DMA controller (registers at 
> > > > > > > 0x7840,
> > > > > > > slot offset 0x84), they are in a SBus slot #5, and SBus 
> > > > > > > (registers
> > > > > > > at 0x10001000) is in turn handled b

Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-18 Thread Michael S. Tsirkin
On Thu, Nov 18, 2010 at 02:16:05PM +0200, Gleb Natapov wrote:
> On Thu, Nov 18, 2010 at 01:52:30PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 18, 2010 at 01:45:04PM +0200, Gleb Natapov wrote:
> > > On Thu, Nov 18, 2010 at 01:38:31PM +0200, Michael S. Tsirkin wrote:
> > > > On Thu, Nov 18, 2010 at 12:18:27PM +0200, Gleb Natapov wrote:
> > > > > On Wed, Nov 17, 2010 at 09:54:27PM +, Blue Swirl wrote:
> > > > > > 2010/11/16 Gleb Natapov :
> > > > > > > On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote:
> > > > > > >> >> Perhaps the FW path should use device class names if no name 
> > > > > > >> >> is specified.
> > > > > > >> > What do you mean by "device class name". We can do something 
> > > > > > >> > like this:
> > > > > > >> > if (dev->child_bus.lh_first)
> > > > > > >> >        return dev->child_bus.lh_first->info->name;
> > > > > > >> >
> > > > > > >> > i.e if there is child bus use its bus name as fw name. This 
> > > > > > >> > will make
> > > > > > >> > all pci devices to have "pci" as fw name automatically. The 
> > > > > > >> > problem is
> > > > > > >> > that theoretically same device can provide different buses.
> > > > > > >>
> > > > > > >> I meant PCI class name, like "display" for display controllers,
> > > > > > >> "network" for NICs etc.
> > > > > > >>
> > > > > > > That is what my pci bus related patch is doing already.
> > > > > > >
> > > > > > >> >> I'll try Sparc32 to see how this fits there.
> > > > > > >>
> > > > > > >> Except bootindex is not implemented for SCSI.
> > > > > > > Will look into adding it.
> > > > > > 
> > > > > > Thanks. The bootindex on Sparc32 looks like this:
> > > > > > bootindex /e...@7880/d...@1,0
> > > > > > /ether...@/ethernet-...@0
> > > > > > 
> > > > > For arches other then x86 there is a lot of work left to be done :)
> > > > > For starter exotic sparc buses should get their own get_fw_dev_path()
> > > > > implementation.
> > > > > 
> > > > > > I don't think I got Lance setup right.
> > > > > > 
> > > > > > OF paths for the devices would be:
> > > > > > /io...@0,1000/s...@0,10001000/esp...@5,840/e...@5,880/s...@1,0
> > > > > > /io...@0,1000/s...@0,10001000/le...@5,8400010/l...@5,8c0
> > > > > If qdev hierarchy does not correspond to real HW there is no much we 
> > > > > can
> > > > > do expect for fixing qdev.
> > > > 
> > > > That's bad.  This raises a concern: if these paths expose qdev
> > > > internals, any attempt to fix this will break migration.
> > > > 
> > > The path expose internal HW hierarchy. It is designed to do so. Qdev
> > > designed to do the same: describe HW hierarchy. If qdev fails to do so it
> > > is broken.
> > 
> > Yes. But since you use qdev to build up the path, a broken
> > qdev will give you a broken path.
> > 
> Qdev bug. Fix it like any other bug. The nice is that when you compare
> device path produced by qdev and real HW you can see when qdev is wrong.
> 
> > > I do not see connection to migration at all since the path is
> > > not used in migration code.
> > 
> > The connection is that if we pass the list with path 1 which you define
> > as broken to BIOS, then migrate to a machine with an updated qemu
> > which has a correct path, BIOS won't be able to complete the boot.
> You solve it like you solve all such issue with -M machine type.

So that's unavoidable if we think paths are correct.
But if we know they are wrong, we are better off
correcting them first IMO.

> But the problem exists only if migration happens in a short window
> between start of the boot process and BIOS reading boot order string.
> After reboot new qemu should have new BIOS.

That makes it even more nasty, doesn't it?

> > Right? Same in reverse direction.
> Reverse direction is not and never was supported.
> 
> > As solution could be a fuzzy matching
> > of paths that wiull let us recover.
> > 
> Firmware can try its best of course, but nothing is guarantied.

No I mean qemu could do matching fuzzily.
This way if we get a path from the old BIOS we can
survive.

> > > > > > 
> > > > > > The logic for ESP is that ESP (registers at 0x7880, slot offset
> > > > > > 0x88) is handled by the DMA controller (registers at 0x7840,
> > > > > > slot offset 0x84), they are in a SBus slot #5, and SBus 
> > > > > > (registers
> > > > > > at 0x10001000) is in turn handled by IOMMU (registers at 
> > > > > > 0x1000).
> > > > > > Lance should be handled the same way.
> > > > > > 
> > > > > > This hierarchy is partly known by QEMU because DMA accesses use this
> > > > > > flow, but not otherwise. There is no concept of SBus slots, DMA 
> > > > > > talks
> > > > > > to IOMMU directly. Though in this case both ESP, Lance and their DMA
> > > > > > controllers are on board devices in a MACIO chip. It may be possible
> > > > > > to add the hierarchy information at each stage.
> > > > > > 
> > > > > > It should also be possible for BIOS to determine the device just 
> > > > > > from
> >

Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-18 Thread Gleb Natapov
On Thu, Nov 18, 2010 at 01:52:30PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 18, 2010 at 01:45:04PM +0200, Gleb Natapov wrote:
> > On Thu, Nov 18, 2010 at 01:38:31PM +0200, Michael S. Tsirkin wrote:
> > > On Thu, Nov 18, 2010 at 12:18:27PM +0200, Gleb Natapov wrote:
> > > > On Wed, Nov 17, 2010 at 09:54:27PM +, Blue Swirl wrote:
> > > > > 2010/11/16 Gleb Natapov :
> > > > > > On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote:
> > > > > >> >> Perhaps the FW path should use device class names if no name is 
> > > > > >> >> specified.
> > > > > >> > What do you mean by "device class name". We can do something 
> > > > > >> > like this:
> > > > > >> > if (dev->child_bus.lh_first)
> > > > > >> >        return dev->child_bus.lh_first->info->name;
> > > > > >> >
> > > > > >> > i.e if there is child bus use its bus name as fw name. This will 
> > > > > >> > make
> > > > > >> > all pci devices to have "pci" as fw name automatically. The 
> > > > > >> > problem is
> > > > > >> > that theoretically same device can provide different buses.
> > > > > >>
> > > > > >> I meant PCI class name, like "display" for display controllers,
> > > > > >> "network" for NICs etc.
> > > > > >>
> > > > > > That is what my pci bus related patch is doing already.
> > > > > >
> > > > > >> >> I'll try Sparc32 to see how this fits there.
> > > > > >>
> > > > > >> Except bootindex is not implemented for SCSI.
> > > > > > Will look into adding it.
> > > > > 
> > > > > Thanks. The bootindex on Sparc32 looks like this:
> > > > > bootindex /e...@7880/d...@1,0
> > > > > /ether...@/ethernet-...@0
> > > > > 
> > > > For arches other then x86 there is a lot of work left to be done :)
> > > > For starter exotic sparc buses should get their own get_fw_dev_path()
> > > > implementation.
> > > > 
> > > > > I don't think I got Lance setup right.
> > > > > 
> > > > > OF paths for the devices would be:
> > > > > /io...@0,1000/s...@0,10001000/esp...@5,840/e...@5,880/s...@1,0
> > > > > /io...@0,1000/s...@0,10001000/le...@5,8400010/l...@5,8c0
> > > > If qdev hierarchy does not correspond to real HW there is no much we can
> > > > do expect for fixing qdev.
> > > 
> > > That's bad.  This raises a concern: if these paths expose qdev
> > > internals, any attempt to fix this will break migration.
> > > 
> > The path expose internal HW hierarchy. It is designed to do so. Qdev
> > designed to do the same: describe HW hierarchy. If qdev fails to do so it
> > is broken.
> 
> Yes. But since you use qdev to build up the path, a broken
> qdev will give you a broken path.
> 
Qdev bug. Fix it like any other bug. The nice is that when you compare
device path produced by qdev and real HW you can see when qdev is wrong.

> > I do not see connection to migration at all since the path is
> > not used in migration code.
> 
> The connection is that if we pass the list with path 1 which you define
> as broken to BIOS, then migrate to a machine with an updated qemu
> which has a correct path, BIOS won't be able to complete the boot.
You solve it like you solve all such issue with -M machine type.
But the problem exists only if migration happens in a short window
between start of the boot process and BIOS reading boot order string.
After reboot new qemu should have new BIOS.

> Right? Same in reverse direction.
Reverse direction is not and never was supported.

> As solution could be a fuzzy matching
> of paths that wiull let us recover.
> 
Firmware can try its best of course, but nothing is guarantied.

> > > > > 
> > > > > The logic for ESP is that ESP (registers at 0x7880, slot offset
> > > > > 0x88) is handled by the DMA controller (registers at 0x7840,
> > > > > slot offset 0x84), they are in a SBus slot #5, and SBus (registers
> > > > > at 0x10001000) is in turn handled by IOMMU (registers at 0x1000).
> > > > > Lance should be handled the same way.
> > > > > 
> > > > > This hierarchy is partly known by QEMU because DMA accesses use this
> > > > > flow, but not otherwise. There is no concept of SBus slots, DMA talks
> > > > > to IOMMU directly. Though in this case both ESP, Lance and their DMA
> > > > > controllers are on board devices in a MACIO chip. It may be possible
> > > > > to add the hierarchy information at each stage.
> > > > > 
> > > > > It should also be possible for BIOS to determine the device just from
> > > > > the physical address if we ignored OF compatibility.
> > > > It would be nice to be OF compatible at least at some level. Of course 
> > > > OF
> > > > spec is not strict enough to have two different implementations produce
> > > > exactly same device path that can be compared by strcpy.  Can we apply
> > > > the series now? At least for x86 it provides useful paths and work can
> > > > be continue for other arches by interested parties.
> > > > 
> > > > --
> > > > Gleb.
> > > 
> > > Something I only now realized is that we commit
> > > to never 

Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-18 Thread Michael S. Tsirkin
On Thu, Nov 18, 2010 at 01:45:04PM +0200, Gleb Natapov wrote:
> On Thu, Nov 18, 2010 at 01:38:31PM +0200, Michael S. Tsirkin wrote:
> > On Thu, Nov 18, 2010 at 12:18:27PM +0200, Gleb Natapov wrote:
> > > On Wed, Nov 17, 2010 at 09:54:27PM +, Blue Swirl wrote:
> > > > 2010/11/16 Gleb Natapov :
> > > > > On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote:
> > > > >> >> Perhaps the FW path should use device class names if no name is 
> > > > >> >> specified.
> > > > >> > What do you mean by "device class name". We can do something like 
> > > > >> > this:
> > > > >> > if (dev->child_bus.lh_first)
> > > > >> >        return dev->child_bus.lh_first->info->name;
> > > > >> >
> > > > >> > i.e if there is child bus use its bus name as fw name. This will 
> > > > >> > make
> > > > >> > all pci devices to have "pci" as fw name automatically. The 
> > > > >> > problem is
> > > > >> > that theoretically same device can provide different buses.
> > > > >>
> > > > >> I meant PCI class name, like "display" for display controllers,
> > > > >> "network" for NICs etc.
> > > > >>
> > > > > That is what my pci bus related patch is doing already.
> > > > >
> > > > >> >> I'll try Sparc32 to see how this fits there.
> > > > >>
> > > > >> Except bootindex is not implemented for SCSI.
> > > > > Will look into adding it.
> > > > 
> > > > Thanks. The bootindex on Sparc32 looks like this:
> > > > bootindex /e...@7880/d...@1,0
> > > > /ether...@/ethernet-...@0
> > > > 
> > > For arches other then x86 there is a lot of work left to be done :)
> > > For starter exotic sparc buses should get their own get_fw_dev_path()
> > > implementation.
> > > 
> > > > I don't think I got Lance setup right.
> > > > 
> > > > OF paths for the devices would be:
> > > > /io...@0,1000/s...@0,10001000/esp...@5,840/e...@5,880/s...@1,0
> > > > /io...@0,1000/s...@0,10001000/le...@5,8400010/l...@5,8c0
> > > If qdev hierarchy does not correspond to real HW there is no much we can
> > > do expect for fixing qdev.
> > 
> > That's bad.  This raises a concern: if these paths expose qdev
> > internals, any attempt to fix this will break migration.
> > 
> The path expose internal HW hierarchy. It is designed to do so. Qdev
> designed to do the same: describe HW hierarchy. If qdev fails to do so it
> is broken.

Yes. But since you use qdev to build up the path, a broken
qdev will give you a broken path.

> I do not see connection to migration at all since the path is
> not used in migration code.

The connection is that if we pass the list with path 1 which you define
as broken to BIOS, then migrate to a machine with an updated qemu
which has a correct path, BIOS won't be able to complete the boot.
Right? Same in reverse direction.
As solution could be a fuzzy matching
of paths that wiull let us recover.

> > > > 
> > > > The logic for ESP is that ESP (registers at 0x7880, slot offset
> > > > 0x88) is handled by the DMA controller (registers at 0x7840,
> > > > slot offset 0x84), they are in a SBus slot #5, and SBus (registers
> > > > at 0x10001000) is in turn handled by IOMMU (registers at 0x1000).
> > > > Lance should be handled the same way.
> > > > 
> > > > This hierarchy is partly known by QEMU because DMA accesses use this
> > > > flow, but not otherwise. There is no concept of SBus slots, DMA talks
> > > > to IOMMU directly. Though in this case both ESP, Lance and their DMA
> > > > controllers are on board devices in a MACIO chip. It may be possible
> > > > to add the hierarchy information at each stage.
> > > > 
> > > > It should also be possible for BIOS to determine the device just from
> > > > the physical address if we ignored OF compatibility.
> > > It would be nice to be OF compatible at least at some level. Of course OF
> > > spec is not strict enough to have two different implementations produce
> > > exactly same device path that can be compared by strcpy.  Can we apply
> > > the series now? At least for x86 it provides useful paths and work can
> > > be continue for other arches by interested parties.
> > > 
> > > --
> > >   Gleb.
> > 
> > Something I only now realized is that we commit
> > to never changing the paths for any architecture
> > that supports migration.
> > 
> No connection to migration whatsoever.

It just seems silly to use different paths for the same thing.

Besides the connection above, I was hoping to use these paths
for section names in migration. If we can't guarantee they are
stable, we'll have to roll our own, and if we do this,
with stability guarantees required for migration format,
maybe use it for other things like BIOS as well?

> --
>   Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-18 Thread Gleb Natapov
On Thu, Nov 18, 2010 at 01:38:31PM +0200, Michael S. Tsirkin wrote:
> On Thu, Nov 18, 2010 at 12:18:27PM +0200, Gleb Natapov wrote:
> > On Wed, Nov 17, 2010 at 09:54:27PM +, Blue Swirl wrote:
> > > 2010/11/16 Gleb Natapov :
> > > > On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote:
> > > >> >> Perhaps the FW path should use device class names if no name is 
> > > >> >> specified.
> > > >> > What do you mean by "device class name". We can do something like 
> > > >> > this:
> > > >> > if (dev->child_bus.lh_first)
> > > >> >        return dev->child_bus.lh_first->info->name;
> > > >> >
> > > >> > i.e if there is child bus use its bus name as fw name. This will make
> > > >> > all pci devices to have "pci" as fw name automatically. The problem 
> > > >> > is
> > > >> > that theoretically same device can provide different buses.
> > > >>
> > > >> I meant PCI class name, like "display" for display controllers,
> > > >> "network" for NICs etc.
> > > >>
> > > > That is what my pci bus related patch is doing already.
> > > >
> > > >> >> I'll try Sparc32 to see how this fits there.
> > > >>
> > > >> Except bootindex is not implemented for SCSI.
> > > > Will look into adding it.
> > > 
> > > Thanks. The bootindex on Sparc32 looks like this:
> > > bootindex /e...@7880/d...@1,0
> > > /ether...@/ethernet-...@0
> > > 
> > For arches other then x86 there is a lot of work left to be done :)
> > For starter exotic sparc buses should get their own get_fw_dev_path()
> > implementation.
> > 
> > > I don't think I got Lance setup right.
> > > 
> > > OF paths for the devices would be:
> > > /io...@0,1000/s...@0,10001000/esp...@5,840/e...@5,880/s...@1,0
> > > /io...@0,1000/s...@0,10001000/le...@5,8400010/l...@5,8c0
> > If qdev hierarchy does not correspond to real HW there is no much we can
> > do expect for fixing qdev.
> 
> That's bad.  This raises a concern: if these paths expose qdev
> internals, any attempt to fix this will break migration.
> 
The path expose internal HW hierarchy. It is designed to do so. Qdev
designed to do the same: describe HW hierarchy. If qdev fails to do so it
is broken. I do not see connection to migration at all since the path is
not used in migration code.

> > > 
> > > The logic for ESP is that ESP (registers at 0x7880, slot offset
> > > 0x88) is handled by the DMA controller (registers at 0x7840,
> > > slot offset 0x84), they are in a SBus slot #5, and SBus (registers
> > > at 0x10001000) is in turn handled by IOMMU (registers at 0x1000).
> > > Lance should be handled the same way.
> > > 
> > > This hierarchy is partly known by QEMU because DMA accesses use this
> > > flow, but not otherwise. There is no concept of SBus slots, DMA talks
> > > to IOMMU directly. Though in this case both ESP, Lance and their DMA
> > > controllers are on board devices in a MACIO chip. It may be possible
> > > to add the hierarchy information at each stage.
> > > 
> > > It should also be possible for BIOS to determine the device just from
> > > the physical address if we ignored OF compatibility.
> > It would be nice to be OF compatible at least at some level. Of course OF
> > spec is not strict enough to have two different implementations produce
> > exactly same device path that can be compared by strcpy.  Can we apply
> > the series now? At least for x86 it provides useful paths and work can
> > be continue for other arches by interested parties.
> > 
> > --
> > Gleb.
> 
> Something I only now realized is that we commit
> to never changing the paths for any architecture
> that supports migration.
> 
No connection to migration whatsoever.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-18 Thread Michael S. Tsirkin
On Thu, Nov 18, 2010 at 12:18:27PM +0200, Gleb Natapov wrote:
> On Wed, Nov 17, 2010 at 09:54:27PM +, Blue Swirl wrote:
> > 2010/11/16 Gleb Natapov :
> > > On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote:
> > >> >> Perhaps the FW path should use device class names if no name is 
> > >> >> specified.
> > >> > What do you mean by "device class name". We can do something like this:
> > >> > if (dev->child_bus.lh_first)
> > >> >        return dev->child_bus.lh_first->info->name;
> > >> >
> > >> > i.e if there is child bus use its bus name as fw name. This will make
> > >> > all pci devices to have "pci" as fw name automatically. The problem is
> > >> > that theoretically same device can provide different buses.
> > >>
> > >> I meant PCI class name, like "display" for display controllers,
> > >> "network" for NICs etc.
> > >>
> > > That is what my pci bus related patch is doing already.
> > >
> > >> >> I'll try Sparc32 to see how this fits there.
> > >>
> > >> Except bootindex is not implemented for SCSI.
> > > Will look into adding it.
> > 
> > Thanks. The bootindex on Sparc32 looks like this:
> > bootindex /e...@7880/d...@1,0
> > /ether...@/ethernet-...@0
> > 
> For arches other then x86 there is a lot of work left to be done :)
> For starter exotic sparc buses should get their own get_fw_dev_path()
> implementation.
> 
> > I don't think I got Lance setup right.
> > 
> > OF paths for the devices would be:
> > /io...@0,1000/s...@0,10001000/esp...@5,840/e...@5,880/s...@1,0
> > /io...@0,1000/s...@0,10001000/le...@5,8400010/l...@5,8c0
> If qdev hierarchy does not correspond to real HW there is no much we can
> do expect for fixing qdev.

That's bad.  This raises a concern: if these paths expose qdev
internals, any attempt to fix this will break migration.

> > 
> > The logic for ESP is that ESP (registers at 0x7880, slot offset
> > 0x88) is handled by the DMA controller (registers at 0x7840,
> > slot offset 0x84), they are in a SBus slot #5, and SBus (registers
> > at 0x10001000) is in turn handled by IOMMU (registers at 0x1000).
> > Lance should be handled the same way.
> > 
> > This hierarchy is partly known by QEMU because DMA accesses use this
> > flow, but not otherwise. There is no concept of SBus slots, DMA talks
> > to IOMMU directly. Though in this case both ESP, Lance and their DMA
> > controllers are on board devices in a MACIO chip. It may be possible
> > to add the hierarchy information at each stage.
> > 
> > It should also be possible for BIOS to determine the device just from
> > the physical address if we ignored OF compatibility.
> It would be nice to be OF compatible at least at some level. Of course OF
> spec is not strict enough to have two different implementations produce
> exactly same device path that can be compared by strcpy.  Can we apply
> the series now? At least for x86 it provides useful paths and work can
> be continue for other arches by interested parties.
> 
> --
>   Gleb.

Something I only now realized is that we commit
to never changing the paths for any architecture
that supports migration.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-18 Thread Gleb Natapov
On Wed, Nov 17, 2010 at 09:54:27PM +, Blue Swirl wrote:
> 2010/11/16 Gleb Natapov :
> > On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote:
> >> >> Perhaps the FW path should use device class names if no name is 
> >> >> specified.
> >> > What do you mean by "device class name". We can do something like this:
> >> > if (dev->child_bus.lh_first)
> >> >        return dev->child_bus.lh_first->info->name;
> >> >
> >> > i.e if there is child bus use its bus name as fw name. This will make
> >> > all pci devices to have "pci" as fw name automatically. The problem is
> >> > that theoretically same device can provide different buses.
> >>
> >> I meant PCI class name, like "display" for display controllers,
> >> "network" for NICs etc.
> >>
> > That is what my pci bus related patch is doing already.
> >
> >> >> I'll try Sparc32 to see how this fits there.
> >>
> >> Except bootindex is not implemented for SCSI.
> > Will look into adding it.
> 
> Thanks. The bootindex on Sparc32 looks like this:
> bootindex /e...@7880/d...@1,0
> /ether...@/ethernet-...@0
> 
For arches other then x86 there is a lot of work left to be done :)
For starter exotic sparc buses should get their own get_fw_dev_path()
implementation.

> I don't think I got Lance setup right.
> 
> OF paths for the devices would be:
> /io...@0,1000/s...@0,10001000/esp...@5,840/e...@5,880/s...@1,0
> /io...@0,1000/s...@0,10001000/le...@5,8400010/l...@5,8c0
If qdev hierarchy does not correspond to real HW there is no much we can
do expect for fixing qdev.

> 
> The logic for ESP is that ESP (registers at 0x7880, slot offset
> 0x88) is handled by the DMA controller (registers at 0x7840,
> slot offset 0x84), they are in a SBus slot #5, and SBus (registers
> at 0x10001000) is in turn handled by IOMMU (registers at 0x1000).
> Lance should be handled the same way.
> 
> This hierarchy is partly known by QEMU because DMA accesses use this
> flow, but not otherwise. There is no concept of SBus slots, DMA talks
> to IOMMU directly. Though in this case both ESP, Lance and their DMA
> controllers are on board devices in a MACIO chip. It may be possible
> to add the hierarchy information at each stage.
> 
> It should also be possible for BIOS to determine the device just from
> the physical address if we ignored OF compatibility.
It would be nice to be OF compatible at least at some level. Of course OF
spec is not strict enough to have two different implementations produce
exactly same device path that can be compared by strcpy.  Can we apply
the series now? At least for x86 it provides useful paths and work can
be continue for other arches by interested parties.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-17 Thread Blue Swirl
2010/11/16 Gleb Natapov :
> On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote:
>> >> Perhaps the FW path should use device class names if no name is specified.
>> > What do you mean by "device class name". We can do something like this:
>> > if (dev->child_bus.lh_first)
>> >        return dev->child_bus.lh_first->info->name;
>> >
>> > i.e if there is child bus use its bus name as fw name. This will make
>> > all pci devices to have "pci" as fw name automatically. The problem is
>> > that theoretically same device can provide different buses.
>>
>> I meant PCI class name, like "display" for display controllers,
>> "network" for NICs etc.
>>
> That is what my pci bus related patch is doing already.
>
>> >> I'll try Sparc32 to see how this fits there.
>>
>> Except bootindex is not implemented for SCSI.
> Will look into adding it.

Thanks. The bootindex on Sparc32 looks like this:
bootindex /e...@7880/d...@1,0
/ether...@/ethernet-...@0

I don't think I got Lance setup right.

OF paths for the devices would be:
/io...@0,1000/s...@0,10001000/esp...@5,840/e...@5,880/s...@1,0
/io...@0,1000/s...@0,10001000/le...@5,8400010/l...@5,8c0

The logic for ESP is that ESP (registers at 0x7880, slot offset
0x88) is handled by the DMA controller (registers at 0x7840,
slot offset 0x84), they are in a SBus slot #5, and SBus (registers
at 0x10001000) is in turn handled by IOMMU (registers at 0x1000).
Lance should be handled the same way.

This hierarchy is partly known by QEMU because DMA accesses use this
flow, but not otherwise. There is no concept of SBus slots, DMA talks
to IOMMU directly. Though in this case both ESP, Lance and their DMA
controllers are on board devices in a MACIO chip. It may be possible
to add the hierarchy information at each stage.

It should also be possible for BIOS to determine the device just from
the physical address if we ignored OF compatibility.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-16 Thread Gleb Natapov
On Tue, Nov 16, 2010 at 06:30:19PM +, Blue Swirl wrote:
> >> Perhaps the FW path should use device class names if no name is specified.
> > What do you mean by "device class name". We can do something like this:
> > if (dev->child_bus.lh_first)
> >        return dev->child_bus.lh_first->info->name;
> >
> > i.e if there is child bus use its bus name as fw name. This will make
> > all pci devices to have "pci" as fw name automatically. The problem is
> > that theoretically same device can provide different buses.
> 
> I meant PCI class name, like "display" for display controllers,
> "network" for NICs etc.
> 
That is what my pci bus related patch is doing already.

> >> I'll try Sparc32 to see how this fits there.
> 
> Except bootindex is not implemented for SCSI.
Will look into adding it.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-16 Thread Blue Swirl
2010/11/16 Gleb Natapov :
> On Mon, Nov 15, 2010 at 08:29:24PM +, Blue Swirl wrote:
>> 2010/11/15 Gleb Natapov :
>> > On Sun, Nov 14, 2010 at 10:50:13PM +, Blue Swirl wrote:
>> >> On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov  wrote:
>> >> >
>> >> > Signed-off-by: Gleb Natapov 
>> >> > ---
>> >> >  hw/fw_cfg.c |   14 ++
>> >> >  hw/fw_cfg.h |    4 +++-
>> >> >  sysemu.h    |    1 +
>> >> >  vl.c        |   51 +++
>> >> >  4 files changed, 69 insertions(+), 1 deletions(-)
>> >> >
>> >> > diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
>> >> > index 7b9434f..f6a67db 100644
>> >> > --- a/hw/fw_cfg.c
>> >> > +++ b/hw/fw_cfg.c
>> >> > @@ -53,6 +53,7 @@ struct FWCfgState {
>> >> >     FWCfgFiles *files;
>> >> >     uint16_t cur_entry;
>> >> >     uint32_t cur_offset;
>> >> > +    Notifier machine_ready;
>> >> >  };
>> >> >
>> >> >  static void fw_cfg_write(FWCfgState *s, uint8_t value)
>> >> > @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
>> >> > *filename, uint8_t *data,
>> >> >     return 1;
>> >> >  }
>> >> >
>> >> > +static void fw_cfg_machine_ready(struct Notifier* n)
>> >> > +{
>> >> > +    uint32_t len;
>> >> > +    char *bootindex = get_boot_devices_list(&len);
>> >> > +
>> >> > +    fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
>> >> > +                     FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
>> >> > +}
>> >> > +
>> >> >  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
>> >> >                         target_phys_addr_t ctl_addr, target_phys_addr_t 
>> >> > data_addr)
>> >> >  {
>> >> > @@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, 
>> >> > uint32_t data_port,
>> >> >     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
>> >> >     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
>> >> >
>> >> > +
>> >> > +    s->machine_ready.notify = fw_cfg_machine_ready;
>> >> > +    qemu_add_machine_init_done_notifier(&s->machine_ready);
>> >> > +
>> >> >     return s;
>> >> >  }
>> >> >
>> >> > diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
>> >> > index 856bf91..4d61410 100644
>> >> > --- a/hw/fw_cfg.h
>> >> > +++ b/hw/fw_cfg.h
>> >> > @@ -30,7 +30,9 @@
>> >> >
>> >> >  #define FW_CFG_FILE_FIRST       0x20
>> >> >  #define FW_CFG_FILE_SLOTS       0x10
>> >> > -#define FW_CFG_MAX_ENTRY        (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
>> >> > +#define FW_CFG_FILE_LAST_SLOT   (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
>> >> > +#define FW_CFG_BOOTINDEX        (FW_CFG_FILE_LAST_SLOT + 1)
>> >> > +#define FW_CFG_MAX_ENTRY        FW_CFG_BOOTINDEX
>> >>
>> >> This should be
>> >> #define FW_CFG_MAX_ENTRY        (FW_CFG_BOOTINDEX + 1)
>> >> because the check is like this:
>> >>     if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
>> >>         s->cur_entry = FW_CFG_INVALID;
>> >>
>> > Yeah, will fix.
>> >
>> >> With that change, I got the bootindex passed to OpenBIOS:
>> >> OpenBIOS for Sparc64
>> >> Configuration device id QEMU version 1 machine id 0
>> >> kernel cmdline
>> >> CPUs: 1 x SUNW,UltraSPARC-IIi
>> >> UUID: ----
>> >> bootindex num_strings 1
>> >> bootindex /p...@01fe/i...@5/dr...@1/d...@0
>> >>
>> >> The device path does not match exactly, but it's close:
>> >> /p...@1fe,0/pci-...@5/i...@600/d...@0
>> >
>> > pbm->pci should be solvable by the patch at the end. Were in the spec
>> > it is allowed to abbreviate 1fe as 1fe,0? Spec allows to drop
>> > starting zeroes but TARGET_FMT_plx definition in targphys.h has 0 after
>> > %. I can define another one without leading zeroes. Can you suggest
>> > a name?
>>
>> I think OpenBIOS for Sparc64 is not correct here, so it may be a bad
>> reference architecture. OBP on a real Ultra-5 used a path like this:
>> /p...@1f,0/p...@1,1/i...@3/d...@0,0
>>
>> p...@1f,0 specifies the PCI host bridge at UPA bus port ID of 0x1f.
> According to device name qemu creates pci controller is memory mapped
> at address 1fe and by looking at the code I can see that this
> is indeed the case. How is UPA naming works?

No idea.

>> p...@1,1 specifies a PCI-PCI bridge.
>>
>> > TARGET_FMT_lx is poisoned. As of ATA there is no open firmware
>> > binding spec for ATA, so everyone does what he pleases. I based my
>> > implementation on what open firmware showing when running on qemu x86.
>> > "pci-ata" should be "ide" according to PCI binding spec :)
>>
>> Yes, for example there is no ATA in the Ultra-5 tree but in UltraAX it 
>> exists:
>> /p...@1f,4000/i...@3/a...@0,0/c...@0,0
>>
>> > diff --git a/hw/apb_pci.c b/hw/apb_pci.c
>> > index c619112..643aa49 100644
>> > --- a/hw/apb_pci.c
>> > +++ b/hw/apb_pci.c
>> > @@ -453,6 +453,7 @@ static PCIDeviceInfo pbm_pci_host_info = {
>> >
>> >  static SysBusDeviceInfo pbm_host_info = {
>> >     .qdev.name = "pbm",
>> > +    .qdev.fw_name = "pci",
>>
>> Perhaps the FW path should use device class names if no name is specified.
> What do you mean by "device cl

Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-16 Thread Blue Swirl
On Tue, Nov 16, 2010 at 7:22 AM, Gleb Natapov  wrote:
> On Mon, Nov 15, 2010 at 09:52:19PM -0500, Kevin O'Connor wrote:
>> On Mon, Nov 15, 2010 at 03:36:25PM +0200, Gleb Natapov wrote:
>> > On Mon, Nov 15, 2010 at 08:26:35AM -0500, Kevin O'Connor wrote:
>> > > On Mon, Nov 15, 2010 at 09:40:08AM +0200, Gleb Natapov wrote:
>> > > > On Sun, Nov 14, 2010 at 10:40:33PM -0500, Kevin O'Connor wrote:
>> > > > > Why not just return a newline separated list that is null terminated?
>> > > > >
>> > > > Doing it like this will needlessly complicate firmware side. How do you
>> > > > know how much memory to allocate before reading device list?
>> > >
>> > > My preference would be for the size to be exposed via the
>> > > QEMU_CFG_FILE_DIR selector.  (My preference would be for all objects
>> > > in fw_cfg to have entries in QEMU_CFG_FILE_DIR describing their size
>> > > in a reliable manner.)
>> > >
>> > Will interface suggested by Blue will be good for you? The one with two
>> > fw_cfg ids. BOOTINDEX_LEN for len and BOOTINDEX_DATA for device list. I
>>
>> I dislike how different fw_cfg objects pass the length in different
>> ways (eg, QEMU_CFG_E820_TABLE passes length as first 4 bytes).  This
>> is a common problem - I'd prefer if we could adopt one uniform way of
>> passing length.  I think QEMU_CFG_FILE_DIR solves this problem well.
>>
> Looking at available fw cfg option I see that _SIZE _DATA is also a
> common pattern. The problem with QEMU_CFG_FILE_DIR is that we have very
> little available slots right now. If we a going to require everything to
> use it we better grow number of available slots considerably now while
> it is easily done (no option defined above file slots yet).

FW_CFG_FILE_DIR seems to be a bit poorly designed. Maybe we should
deprecate it and design a more scalable model. There are also string
variables passed to BIOS (-prom-env for Sparc/PPC) which could then
use this new model instead of NVRAM.

> I personally do not have preferences one way or the other. Blue are you
> OK with using QEMU_CFG_FILE_DIR?

That would also work.

>> I also have an ulterior motive here.  If the boot order is exposed as
>> a newline separated list via an entry in QEMU_CFG_FILE_DIR, then this
>> becomes free for coreboot users as well.  (On coreboot, the boot order
>> could be placed in a "file" in flash with no change to the seabios
>> code.)
>>
> You can define get_boot_order() function and implement it differently
> for qemu and coreboot. For coreboot it will be one linear. Just call
> cbfs_copyfile("bootorder"). BTW why newline separation is important?

Newline and zero are both OK since neither can appear inside a valid boot path.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-15 Thread Gleb Natapov
On Mon, Nov 15, 2010 at 09:52:19PM -0500, Kevin O'Connor wrote:
> On Mon, Nov 15, 2010 at 03:36:25PM +0200, Gleb Natapov wrote:
> > On Mon, Nov 15, 2010 at 08:26:35AM -0500, Kevin O'Connor wrote:
> > > On Mon, Nov 15, 2010 at 09:40:08AM +0200, Gleb Natapov wrote:
> > > > On Sun, Nov 14, 2010 at 10:40:33PM -0500, Kevin O'Connor wrote:
> > > > > Why not just return a newline separated list that is null terminated?
> > > > > 
> > > > Doing it like this will needlessly complicate firmware side. How do you
> > > > know how much memory to allocate before reading device list?
> > > 
> > > My preference would be for the size to be exposed via the
> > > QEMU_CFG_FILE_DIR selector.  (My preference would be for all objects
> > > in fw_cfg to have entries in QEMU_CFG_FILE_DIR describing their size
> > > in a reliable manner.)
> > > 
> > Will interface suggested by Blue will be good for you? The one with two
> > fw_cfg ids. BOOTINDEX_LEN for len and BOOTINDEX_DATA for device list. I
> 
> I dislike how different fw_cfg objects pass the length in different
> ways (eg, QEMU_CFG_E820_TABLE passes length as first 4 bytes).  This
> is a common problem - I'd prefer if we could adopt one uniform way of
> passing length.  I think QEMU_CFG_FILE_DIR solves this problem well.
>
Looking at available fw cfg option I see that _SIZE _DATA is also a
common pattern. The problem with QEMU_CFG_FILE_DIR is that we have very
little available slots right now. If we a going to require everything to
use it we better grow number of available slots considerably now while
it is easily done (no option defined above file slots yet).

I personally do not have preferences one way or the other. Blue are you
OK with using QEMU_CFG_FILE_DIR?

> I also have an ulterior motive here.  If the boot order is exposed as
> a newline separated list via an entry in QEMU_CFG_FILE_DIR, then this
> becomes free for coreboot users as well.  (On coreboot, the boot order
> could be placed in a "file" in flash with no change to the seabios
> code.)
> 
You can define get_boot_order() function and implement it differently
for qemu and coreboot. For coreboot it will be one linear. Just call
cbfs_copyfile("bootorder"). BTW why newline separation is important? 

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-15 Thread Kevin O'Connor
On Mon, Nov 15, 2010 at 03:36:25PM +0200, Gleb Natapov wrote:
> On Mon, Nov 15, 2010 at 08:26:35AM -0500, Kevin O'Connor wrote:
> > On Mon, Nov 15, 2010 at 09:40:08AM +0200, Gleb Natapov wrote:
> > > On Sun, Nov 14, 2010 at 10:40:33PM -0500, Kevin O'Connor wrote:
> > > > Why not just return a newline separated list that is null terminated?
> > > > 
> > > Doing it like this will needlessly complicate firmware side. How do you
> > > know how much memory to allocate before reading device list?
> > 
> > My preference would be for the size to be exposed via the
> > QEMU_CFG_FILE_DIR selector.  (My preference would be for all objects
> > in fw_cfg to have entries in QEMU_CFG_FILE_DIR describing their size
> > in a reliable manner.)
> > 
> Will interface suggested by Blue will be good for you? The one with two
> fw_cfg ids. BOOTINDEX_LEN for len and BOOTINDEX_DATA for device list. I

I dislike how different fw_cfg objects pass the length in different
ways (eg, QEMU_CFG_E820_TABLE passes length as first 4 bytes).  This
is a common problem - I'd prefer if we could adopt one uniform way of
passing length.  I think QEMU_CFG_FILE_DIR solves this problem well.

I also have an ulterior motive here.  If the boot order is exposed as
a newline separated list via an entry in QEMU_CFG_FILE_DIR, then this
becomes free for coreboot users as well.  (On coreboot, the boot order
could be placed in a "file" in flash with no change to the seabios
code.)

-Kevin
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-15 Thread Blue Swirl
2010/11/15 Gleb Natapov :
> On Sun, Nov 14, 2010 at 10:50:13PM +, Blue Swirl wrote:
>> On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov  wrote:
>> >
>> > Signed-off-by: Gleb Natapov 
>> > ---
>> >  hw/fw_cfg.c |   14 ++
>> >  hw/fw_cfg.h |    4 +++-
>> >  sysemu.h    |    1 +
>> >  vl.c        |   51 +++
>> >  4 files changed, 69 insertions(+), 1 deletions(-)
>> >
>> > diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
>> > index 7b9434f..f6a67db 100644
>> > --- a/hw/fw_cfg.c
>> > +++ b/hw/fw_cfg.c
>> > @@ -53,6 +53,7 @@ struct FWCfgState {
>> >     FWCfgFiles *files;
>> >     uint16_t cur_entry;
>> >     uint32_t cur_offset;
>> > +    Notifier machine_ready;
>> >  };
>> >
>> >  static void fw_cfg_write(FWCfgState *s, uint8_t value)
>> > @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
>> > *filename, uint8_t *data,
>> >     return 1;
>> >  }
>> >
>> > +static void fw_cfg_machine_ready(struct Notifier* n)
>> > +{
>> > +    uint32_t len;
>> > +    char *bootindex = get_boot_devices_list(&len);
>> > +
>> > +    fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
>> > +                     FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
>> > +}
>> > +
>> >  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
>> >                         target_phys_addr_t ctl_addr, target_phys_addr_t 
>> > data_addr)
>> >  {
>> > @@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t 
>> > data_port,
>> >     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
>> >     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
>> >
>> > +
>> > +    s->machine_ready.notify = fw_cfg_machine_ready;
>> > +    qemu_add_machine_init_done_notifier(&s->machine_ready);
>> > +
>> >     return s;
>> >  }
>> >
>> > diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
>> > index 856bf91..4d61410 100644
>> > --- a/hw/fw_cfg.h
>> > +++ b/hw/fw_cfg.h
>> > @@ -30,7 +30,9 @@
>> >
>> >  #define FW_CFG_FILE_FIRST       0x20
>> >  #define FW_CFG_FILE_SLOTS       0x10
>> > -#define FW_CFG_MAX_ENTRY        (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
>> > +#define FW_CFG_FILE_LAST_SLOT   (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
>> > +#define FW_CFG_BOOTINDEX        (FW_CFG_FILE_LAST_SLOT + 1)
>> > +#define FW_CFG_MAX_ENTRY        FW_CFG_BOOTINDEX
>>
>> This should be
>> #define FW_CFG_MAX_ENTRY        (FW_CFG_BOOTINDEX + 1)
>> because the check is like this:
>>     if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
>>         s->cur_entry = FW_CFG_INVALID;
>>
> Yeah, will fix.
>
>> With that change, I got the bootindex passed to OpenBIOS:
>> OpenBIOS for Sparc64
>> Configuration device id QEMU version 1 machine id 0
>> kernel cmdline
>> CPUs: 1 x SUNW,UltraSPARC-IIi
>> UUID: ----
>> bootindex num_strings 1
>> bootindex /p...@01fe/i...@5/dr...@1/d...@0
>>
>> The device path does not match exactly, but it's close:
>> /p...@1fe,0/pci-...@5/i...@600/d...@0
>
> pbm->pci should be solvable by the patch at the end. Were in the spec
> it is allowed to abbreviate 1fe as 1fe,0? Spec allows to drop
> starting zeroes but TARGET_FMT_plx definition in targphys.h has 0 after
> %. I can define another one without leading zeroes. Can you suggest
> a name?

I think OpenBIOS for Sparc64 is not correct here, so it may be a bad
reference architecture. OBP on a real Ultra-5 used a path like this:
/p...@1f,0/p...@1,1/i...@3/d...@0,0

p...@1f,0 specifies the PCI host bridge at UPA bus port ID of 0x1f.
p...@1,1 specifies a PCI-PCI bridge.

> TARGET_FMT_lx is poisoned. As of ATA there is no open firmware
> binding spec for ATA, so everyone does what he pleases. I based my
> implementation on what open firmware showing when running on qemu x86.
> "pci-ata" should be "ide" according to PCI binding spec :)

Yes, for example there is no ATA in the Ultra-5 tree but in UltraAX it exists:
/p...@1f,4000/i...@3/a...@0,0/c...@0,0

> diff --git a/hw/apb_pci.c b/hw/apb_pci.c
> index c619112..643aa49 100644
> --- a/hw/apb_pci.c
> +++ b/hw/apb_pci.c
> @@ -453,6 +453,7 @@ static PCIDeviceInfo pbm_pci_host_info = {
>
>  static SysBusDeviceInfo pbm_host_info = {
>     .qdev.name = "pbm",
> +    .qdev.fw_name = "pci",

Perhaps the FW path should use device class names if no name is specified.

I'll try Sparc32 to see how this fits there.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-15 Thread Gleb Natapov
On Mon, Nov 15, 2010 at 03:36:25PM +0200, Gleb Natapov wrote:
>   Hmm BTW I do not see proper endianness
> handling in FILE_DIR.
> 
That's just me. Everything it OK there with endianness.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-15 Thread Gleb Natapov
On Mon, Nov 15, 2010 at 08:26:35AM -0500, Kevin O'Connor wrote:
> On Mon, Nov 15, 2010 at 09:40:08AM +0200, Gleb Natapov wrote:
> > On Sun, Nov 14, 2010 at 10:40:33PM -0500, Kevin O'Connor wrote:
> > > Why not just return a newline separated list that is null terminated?
> > > 
> > Doing it like this will needlessly complicate firmware side. How do you
> > know how much memory to allocate before reading device list?
> 
> My preference would be for the size to be exposed via the
> QEMU_CFG_FILE_DIR selector.  (My preference would be for all objects
> in fw_cfg to have entries in QEMU_CFG_FILE_DIR describing their size
> in a reliable manner.)
> 
Will interface suggested by Blue will be good for you? The one with two
fw_cfg ids. BOOTINDEX_LEN for len and BOOTINDEX_DATA for device list. I
already changed my implementation to this one. Using FILE_DIR requires
us to generate synthetic name. Hmm BTW I do not see proper endianness
handling in FILE_DIR.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-15 Thread Kevin O'Connor
On Mon, Nov 15, 2010 at 09:40:08AM +0200, Gleb Natapov wrote:
> On Sun, Nov 14, 2010 at 10:40:33PM -0500, Kevin O'Connor wrote:
> > Why not just return a newline separated list that is null terminated?
> > 
> Doing it like this will needlessly complicate firmware side. How do you
> know how much memory to allocate before reading device list?

My preference would be for the size to be exposed via the
QEMU_CFG_FILE_DIR selector.  (My preference would be for all objects
in fw_cfg to have entries in QEMU_CFG_FILE_DIR describing their size
in a reliable manner.)

-Kevin
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-15 Thread Gleb Natapov
On Sun, Nov 14, 2010 at 10:50:13PM +, Blue Swirl wrote:
> On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov  wrote:
> >
> > Signed-off-by: Gleb Natapov 
> > ---
> >  hw/fw_cfg.c |   14 ++
> >  hw/fw_cfg.h |    4 +++-
> >  sysemu.h    |    1 +
> >  vl.c        |   51 +++
> >  4 files changed, 69 insertions(+), 1 deletions(-)
> >
> > diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> > index 7b9434f..f6a67db 100644
> > --- a/hw/fw_cfg.c
> > +++ b/hw/fw_cfg.c
> > @@ -53,6 +53,7 @@ struct FWCfgState {
> >     FWCfgFiles *files;
> >     uint16_t cur_entry;
> >     uint32_t cur_offset;
> > +    Notifier machine_ready;
> >  };
> >
> >  static void fw_cfg_write(FWCfgState *s, uint8_t value)
> > @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
> > *filename, uint8_t *data,
> >     return 1;
> >  }
> >
> > +static void fw_cfg_machine_ready(struct Notifier* n)
> > +{
> > +    uint32_t len;
> > +    char *bootindex = get_boot_devices_list(&len);
> > +
> > +    fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
> > +                     FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
> > +}
> > +
> >  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
> >                         target_phys_addr_t ctl_addr, target_phys_addr_t 
> > data_addr)
> >  {
> > @@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t 
> > data_port,
> >     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
> >     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
> >
> > +
> > +    s->machine_ready.notify = fw_cfg_machine_ready;
> > +    qemu_add_machine_init_done_notifier(&s->machine_ready);
> > +
> >     return s;
> >  }
> >
> > diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
> > index 856bf91..4d61410 100644
> > --- a/hw/fw_cfg.h
> > +++ b/hw/fw_cfg.h
> > @@ -30,7 +30,9 @@
> >
> >  #define FW_CFG_FILE_FIRST       0x20
> >  #define FW_CFG_FILE_SLOTS       0x10
> > -#define FW_CFG_MAX_ENTRY        (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> > +#define FW_CFG_FILE_LAST_SLOT   (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> > +#define FW_CFG_BOOTINDEX        (FW_CFG_FILE_LAST_SLOT + 1)
> > +#define FW_CFG_MAX_ENTRY        FW_CFG_BOOTINDEX
> 
> This should be
> #define FW_CFG_MAX_ENTRY(FW_CFG_BOOTINDEX + 1)
> because the check is like this:
> if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
> s->cur_entry = FW_CFG_INVALID;
> 
Yeah, will fix.

> With that change, I got the bootindex passed to OpenBIOS:
> OpenBIOS for Sparc64
> Configuration device id QEMU version 1 machine id 0
> kernel cmdline
> CPUs: 1 x SUNW,UltraSPARC-IIi
> UUID: ----
> bootindex num_strings 1
> bootindex /p...@01fe/i...@5/dr...@1/d...@0
> 
> The device path does not match exactly, but it's close:
> /p...@1fe,0/pci-...@5/i...@600/d...@0

pbm->pci should be solvable by the patch at the end. Were in the spec
it is allowed to abbreviate 1fe as 1fe,0? Spec allows to drop
starting zeroes but TARGET_FMT_plx definition in targphys.h has 0 after
%. I can define another one without leading zeroes. Can you suggest
a name?  TARGET_FMT_lx is poisoned. As of ATA there is no open firmware
binding spec for ATA, so everyone does what he pleases. I based my
implementation on what open firmware showing when running on qemu x86.
"pci-ata" should be "ide" according to PCI binding spec :) 

diff --git a/hw/apb_pci.c b/hw/apb_pci.c
index c619112..643aa49 100644
--- a/hw/apb_pci.c
+++ b/hw/apb_pci.c
@@ -453,6 +453,7 @@ static PCIDeviceInfo pbm_pci_host_info = {
 
 static SysBusDeviceInfo pbm_host_info = {
 .qdev.name = "pbm",
+.qdev.fw_name = "pci",
 .qdev.size = sizeof(APBState),
 .qdev.reset = pci_pbm_reset,
 .init = pci_pbm_init_device,
--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-15 Thread Gleb Natapov
On Mon, Nov 15, 2010 at 09:53:50AM +0200, Michael S. Tsirkin wrote:
> On Mon, Nov 15, 2010 at 09:40:08AM +0200, Gleb Natapov wrote:
> > On Sun, Nov 14, 2010 at 10:40:33PM -0500, Kevin O'Connor wrote:
> > > On Sun, Nov 14, 2010 at 05:39:41PM +0200, Gleb Natapov wrote:
> > > > +/*
> > > > + * This function returns device list as an array in a below format:
> > > > + * +-+-+---+-+---+--
> > > > + * |  n  |  l1 |   devpath1|  l2 |  devpath2 | ...
> > > > + * +-+-+---+-+---+--
> > > > + * where:
> > > > + *   n - a number of devise pathes (one byte)
> > > > + *   l - length of following device path string (one byte)
> > > > + *   devpath - non-null terminated string of length l representing
> > > > + * one device path
> > > > + */
> > > 
> > > Why not just return a newline separated list that is null terminated?
> > > 
> > Doing it like this will needlessly complicate firmware side. How do you
> > know how much memory to allocate before reading device list?
> 
> Do a memory scan, count newlines until you reach 0?
> 
To do memory scan you need to read it into memory first. To read it into
memory you need to know how much memory to allocate to know how much
memory to allocate you meed to do memory scan... Notice pattern here :)
Of course you can scan IO space too discarding everything you read first
time, but why introduce broken interface in the first place?

> > Doing it
> > like Blue suggest (have BOOTINDEX_LEN and BOOTINDEX_STRING) solves this.
> > To create nice array from bootindex string you firmware will still have
> > to do additional pass on it though.
> 
> Why is this a problem? Pass over memory is cheap, isn't it?
> 
More code, each line of code potentially introduce bug. But I will go with
Blue suggestion anyway since we already use it for other things.

> > With format like above the code
> > would look like that:
> > 
> > qemu_cfg_read(&n, 1);
> > arr = alloc(n);
> > for (i=0; i >  qemu_cfg_read(&l, 1);
> >  arr[i] = zalloc(l+1);
> >  qemu_cfg_read(arr[i], l);
> > }
> >  
> > 
> > --
> > Gleb.
> 
> 
> At this point I don't care about format.
I do.

> But I would like one without 1-byte-length limitations,
> just so we can cover whatever pci can through at us.
> 
I agree. 1-byte for one device string may be to limiting. It is still
more then 15 PCI bridges on a PC and if you have your pci bus go that
deep you are doing something very wrong. But according to spec device
name can be 32 byte long and device address may be 64bit physical
address and that makes length of one device element to be 50 byte.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Michael S. Tsirkin
On Mon, Nov 15, 2010 at 09:40:08AM +0200, Gleb Natapov wrote:
> On Sun, Nov 14, 2010 at 10:40:33PM -0500, Kevin O'Connor wrote:
> > On Sun, Nov 14, 2010 at 05:39:41PM +0200, Gleb Natapov wrote:
> > > +/*
> > > + * This function returns device list as an array in a below format:
> > > + * +-+-+---+-+---+--
> > > + * |  n  |  l1 |   devpath1|  l2 |  devpath2 | ...
> > > + * +-+-+---+-+---+--
> > > + * where:
> > > + *   n - a number of devise pathes (one byte)
> > > + *   l - length of following device path string (one byte)
> > > + *   devpath - non-null terminated string of length l representing
> > > + * one device path
> > > + */
> > 
> > Why not just return a newline separated list that is null terminated?
> > 
> Doing it like this will needlessly complicate firmware side. How do you
> know how much memory to allocate before reading device list?

Do a memory scan, count newlines until you reach 0?

> Doing it
> like Blue suggest (have BOOTINDEX_LEN and BOOTINDEX_STRING) solves this.
> To create nice array from bootindex string you firmware will still have
> to do additional pass on it though.

Why is this a problem? Pass over memory is cheap, isn't it?

> With format like above the code
> would look like that:
> 
> qemu_cfg_read(&n, 1);
> arr = alloc(n);
> for (i=0; i  qemu_cfg_read(&l, 1);
>  arr[i] = zalloc(l+1);
>  qemu_cfg_read(arr[i], l);
> }
>  
> 
> --
>   Gleb.


At this point I don't care about format.
But I would like one without 1-byte-length limitations,
just so we can cover whatever pci can through at us.

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Gleb Natapov
On Sun, Nov 14, 2010 at 10:40:33PM -0500, Kevin O'Connor wrote:
> On Sun, Nov 14, 2010 at 05:39:41PM +0200, Gleb Natapov wrote:
> > +/*
> > + * This function returns device list as an array in a below format:
> > + * +-+-+---+-+---+--
> > + * |  n  |  l1 |   devpath1|  l2 |  devpath2 | ...
> > + * +-+-+---+-+---+--
> > + * where:
> > + *   n - a number of devise pathes (one byte)
> > + *   l - length of following device path string (one byte)
> > + *   devpath - non-null terminated string of length l representing
> > + * one device path
> > + */
> 
> Why not just return a newline separated list that is null terminated?
> 
Doing it like this will needlessly complicate firmware side. How do you
know how much memory to allocate before reading device list? Doing it
like Blue suggest (have BOOTINDEX_LEN and BOOTINDEX_STRING) solves this.
To create nice array from bootindex string you firmware will still have
to do additional pass on it though. With format like above the code
would look like that:

qemu_cfg_read(&n, 1);
arr = alloc(n);
for (i=0; ihttp://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Kevin O'Connor
On Sun, Nov 14, 2010 at 05:39:41PM +0200, Gleb Natapov wrote:
> +/*
> + * This function returns device list as an array in a below format:
> + * +-+-+---+-+---+--
> + * |  n  |  l1 |   devpath1|  l2 |  devpath2 | ...
> + * +-+-+---+-+---+--
> + * where:
> + *   n - a number of devise pathes (one byte)
> + *   l - length of following device path string (one byte)
> + *   devpath - non-null terminated string of length l representing
> + * one device path
> + */

Why not just return a newline separated list that is null terminated?

-Kevin
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Blue Swirl
On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov  wrote:
>
> Signed-off-by: Gleb Natapov 
> ---
>  hw/fw_cfg.c |   14 ++
>  hw/fw_cfg.h |    4 +++-
>  sysemu.h    |    1 +
>  vl.c        |   51 +++
>  4 files changed, 69 insertions(+), 1 deletions(-)
>
> diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> index 7b9434f..f6a67db 100644
> --- a/hw/fw_cfg.c
> +++ b/hw/fw_cfg.c
> @@ -53,6 +53,7 @@ struct FWCfgState {
>     FWCfgFiles *files;
>     uint16_t cur_entry;
>     uint32_t cur_offset;
> +    Notifier machine_ready;
>  };
>
>  static void fw_cfg_write(FWCfgState *s, uint8_t value)
> @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
> *filename, uint8_t *data,
>     return 1;
>  }
>
> +static void fw_cfg_machine_ready(struct Notifier* n)
> +{
> +    uint32_t len;
> +    char *bootindex = get_boot_devices_list(&len);
> +
> +    fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
> +                     FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
> +}
> +
>  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
>                         target_phys_addr_t ctl_addr, target_phys_addr_t 
> data_addr)
>  {
> @@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t 
> data_port,
>     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
>     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
>
> +
> +    s->machine_ready.notify = fw_cfg_machine_ready;
> +    qemu_add_machine_init_done_notifier(&s->machine_ready);
> +
>     return s;
>  }
>
> diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
> index 856bf91..4d61410 100644
> --- a/hw/fw_cfg.h
> +++ b/hw/fw_cfg.h
> @@ -30,7 +30,9 @@
>
>  #define FW_CFG_FILE_FIRST       0x20
>  #define FW_CFG_FILE_SLOTS       0x10
> -#define FW_CFG_MAX_ENTRY        (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> +#define FW_CFG_FILE_LAST_SLOT   (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> +#define FW_CFG_BOOTINDEX        (FW_CFG_FILE_LAST_SLOT + 1)
> +#define FW_CFG_MAX_ENTRY        FW_CFG_BOOTINDEX

This should be
#define FW_CFG_MAX_ENTRY(FW_CFG_BOOTINDEX + 1)
because the check is like this:
if ((key & FW_CFG_ENTRY_MASK) >= FW_CFG_MAX_ENTRY) {
s->cur_entry = FW_CFG_INVALID;

With that change, I got the bootindex passed to OpenBIOS:
OpenBIOS for Sparc64
Configuration device id QEMU version 1 machine id 0
kernel cmdline
CPUs: 1 x SUNW,UltraSPARC-IIi
UUID: ----
bootindex num_strings 1
bootindex /p...@01fe/i...@5/dr...@1/d...@0

The device path does not match exactly, but it's close:
/p...@1fe,0/pci-...@5/i...@600/d...@0
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Michael S. Tsirkin
On Sun, Nov 14, 2010 at 09:13:03PM +, Blue Swirl wrote:
> On Sun, Nov 14, 2010 at 8:54 PM, Michael S. Tsirkin  wrote:
> > On Sun, Nov 14, 2010 at 08:49:59PM +, Blue Swirl wrote:
> >> On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov  wrote:
> >> >
> >> > Signed-off-by: Gleb Natapov 
> >> > ---
> >> >  hw/fw_cfg.c |   14 ++
> >> >  hw/fw_cfg.h |    4 +++-
> >> >  sysemu.h    |    1 +
> >> >  vl.c        |   51 +++
> >> >  4 files changed, 69 insertions(+), 1 deletions(-)
> >> >
> >> > diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> >> > index 7b9434f..f6a67db 100644
> >> > --- a/hw/fw_cfg.c
> >> > +++ b/hw/fw_cfg.c
> >> > @@ -53,6 +53,7 @@ struct FWCfgState {
> >> >     FWCfgFiles *files;
> >> >     uint16_t cur_entry;
> >> >     uint32_t cur_offset;
> >> > +    Notifier machine_ready;
> >> >  };
> >> >
> >> >  static void fw_cfg_write(FWCfgState *s, uint8_t value)
> >> > @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
> >> > *filename, uint8_t *data,
> >> >     return 1;
> >> >  }
> >> >
> >> > +static void fw_cfg_machine_ready(struct Notifier* n)
> >> > +{
> >> > +    uint32_t len;
> >> > +    char *bootindex = get_boot_devices_list(&len);
> >> > +
> >> > +    fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
> >> > +                     FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
> >>
> >> I started to implement this to OpenBIOS but I noticed a small issue.
> >> First the first byte must be read to determine length. Then the read
> >> routine will be called again to read the correct amount of bytes. This
> >> would work, but since there is no shortage of IDs, I'd prefer a system
> >> where one ID is used to query the length and another ID is used to
> >> read the data, without the length byte. This is similar how command
> >> line, initrd etc. are handled.
> >>
> >> This would have the advantage that since fw_cfg uses little endian
> >> format, the length value would easily scale to for example 64 bits to
> >> support terabytes of boot device lists. ;-)
> >
> > Yea. Let's just print # of devices as a property, in ASCII.
> > No endian-ness, no nothing.
> > Also - can we just NULL-terminate each ID?
> 
> No, we should use LE numbers like other IDs. To be more specific, this
> is what I meant (instead of FW_CFG_BOOTINDEX):
> FW_CFG_BOOTINDEX_LEN: get LE integer length of the boot device data.
> FW_CFG_BOOTINDEX_DATA: get the boot device data as NUL terminated C
> strings, all strings back-to-back. The reader can determine number of
> strings.

Sounds good.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Blue Swirl
On Sun, Nov 14, 2010 at 8:54 PM, Michael S. Tsirkin  wrote:
> On Sun, Nov 14, 2010 at 08:49:59PM +, Blue Swirl wrote:
>> On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov  wrote:
>> >
>> > Signed-off-by: Gleb Natapov 
>> > ---
>> >  hw/fw_cfg.c |   14 ++
>> >  hw/fw_cfg.h |    4 +++-
>> >  sysemu.h    |    1 +
>> >  vl.c        |   51 +++
>> >  4 files changed, 69 insertions(+), 1 deletions(-)
>> >
>> > diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
>> > index 7b9434f..f6a67db 100644
>> > --- a/hw/fw_cfg.c
>> > +++ b/hw/fw_cfg.c
>> > @@ -53,6 +53,7 @@ struct FWCfgState {
>> >     FWCfgFiles *files;
>> >     uint16_t cur_entry;
>> >     uint32_t cur_offset;
>> > +    Notifier machine_ready;
>> >  };
>> >
>> >  static void fw_cfg_write(FWCfgState *s, uint8_t value)
>> > @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
>> > *filename, uint8_t *data,
>> >     return 1;
>> >  }
>> >
>> > +static void fw_cfg_machine_ready(struct Notifier* n)
>> > +{
>> > +    uint32_t len;
>> > +    char *bootindex = get_boot_devices_list(&len);
>> > +
>> > +    fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
>> > +                     FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
>>
>> I started to implement this to OpenBIOS but I noticed a small issue.
>> First the first byte must be read to determine length. Then the read
>> routine will be called again to read the correct amount of bytes. This
>> would work, but since there is no shortage of IDs, I'd prefer a system
>> where one ID is used to query the length and another ID is used to
>> read the data, without the length byte. This is similar how command
>> line, initrd etc. are handled.
>>
>> This would have the advantage that since fw_cfg uses little endian
>> format, the length value would easily scale to for example 64 bits to
>> support terabytes of boot device lists. ;-)
>
> Yea. Let's just print # of devices as a property, in ASCII.
> No endian-ness, no nothing.
> Also - can we just NULL-terminate each ID?

No, we should use LE numbers like other IDs. To be more specific, this
is what I meant (instead of FW_CFG_BOOTINDEX):
FW_CFG_BOOTINDEX_LEN: get LE integer length of the boot device data.
FW_CFG_BOOTINDEX_DATA: get the boot device data as NUL terminated C
strings, all strings back-to-back. The reader can determine number of
strings.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Michael S. Tsirkin
On Sun, Nov 14, 2010 at 08:52:37PM +0200, Gleb Natapov wrote:
> > > +
> > > +len = strlen(bootpath);
> > > +list = qemu_realloc(list, total + len + 1);
> > > +list[total++] = len;
> > > +memcpy(&list[total], bootpath, len);
> > > +total += len;
> > > +c++;
> > > +qemu_free(bootpath);
> > 
> > Man, is this tricky.
> > 
> Nah, not at all.

I think it will be easier if we don't try to do this in one pass.
1. pass: calculate total length and # of devices
2. allocate
2. pass fill in

> --
>   Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Michael S. Tsirkin
On Sun, Nov 14, 2010 at 08:52:37PM +0200, Gleb Natapov wrote:
> On Sun, Nov 14, 2010 at 08:41:37PM +0200, Michael S. Tsirkin wrote:
> > On Sun, Nov 14, 2010 at 05:39:41PM +0200, Gleb Natapov wrote:
> > > 
> > > Signed-off-by: Gleb Natapov 
> > > ---
> > >  hw/fw_cfg.c |   14 ++
> > >  hw/fw_cfg.h |4 +++-
> > >  sysemu.h|1 +
> > >  vl.c|   51 +++
> > >  4 files changed, 69 insertions(+), 1 deletions(-)
> > > 
> > > diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> > > index 7b9434f..f6a67db 100644
> > > --- a/hw/fw_cfg.c
> > > +++ b/hw/fw_cfg.c
> > > @@ -53,6 +53,7 @@ struct FWCfgState {
> > >  FWCfgFiles *files;
> > >  uint16_t cur_entry;
> > >  uint32_t cur_offset;
> > > +Notifier machine_ready;
> > >  };
> > >  
> > >  static void fw_cfg_write(FWCfgState *s, uint8_t value)
> > > @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
> > > *filename, uint8_t *data,
> > >  return 1;
> > >  }
> > >  
> > > +static void fw_cfg_machine_ready(struct Notifier* n)
> > > +{
> > > +uint32_t len;
> > > +char *bootindex = get_boot_devices_list(&len);
> > > +
> > > +fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
> > > + FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
> > > +}
> > > +
> > >  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
> > >  target_phys_addr_t ctl_addr, target_phys_addr_t 
> > > data_addr)
> > >  {
> > > @@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t 
> > > data_port,
> > >  fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
> > >  fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
> > >  
> > > +
> > > +s->machine_ready.notify = fw_cfg_machine_ready;
> > > +qemu_add_machine_init_done_notifier(&s->machine_ready);
> > > +
> > >  return s;
> > >  }
> > >  
> > > diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
> > > index 856bf91..4d61410 100644
> > > --- a/hw/fw_cfg.h
> > > +++ b/hw/fw_cfg.h
> > > @@ -30,7 +30,9 @@
> > >  
> > >  #define FW_CFG_FILE_FIRST   0x20
> > >  #define FW_CFG_FILE_SLOTS   0x10
> > > -#define FW_CFG_MAX_ENTRY(FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> > > +#define FW_CFG_FILE_LAST_SLOT   (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> > > +#define FW_CFG_BOOTINDEX(FW_CFG_FILE_LAST_SLOT + 1)
> > > +#define FW_CFG_MAX_ENTRYFW_CFG_BOOTINDEX 
> > >  
> > >  #define FW_CFG_WRITE_CHANNEL0x4000
> > >  #define FW_CFG_ARCH_LOCAL   0x8000
> > > diff --git a/sysemu.h b/sysemu.h
> > > index c42f33a..38a20a3 100644
> > > --- a/sysemu.h
> > > +++ b/sysemu.h
> > > @@ -196,4 +196,5 @@ void register_devices(void);
> > >  
> > >  void add_boot_device_path(int32_t bootindex, DeviceState *dev,
> > >const char *suffix);
> > > +char *get_boot_devices_list(uint32_t *size);
> > >  #endif
> > > diff --git a/vl.c b/vl.c
> > > index 918d988..cca1e76 100644
> > > --- a/vl.c
> > > +++ b/vl.c
> > > @@ -735,6 +735,57 @@ void add_boot_device_path(int32_t bootindex, 
> > > DeviceState *dev,
> > >  QTAILQ_INSERT_TAIL(&fw_boot_order, node, link);
> > >  }
> > >  
> > > +/*
> > > + * This function returns device list as an array in a below format:
> > > + * +-+-+---+-+---+--
> > > + * |  n  |  l1 |   devpath1|  l2 |  devpath2 | ...
> > > + * +-+-+---+-+---+--
> > 
> > No one will ever want > 256 devices? Let's make it 4 byte or something.
> > 
> More then 256 _boot_ devices. I think this is more then reasonable.
> 
> > > + * where:
> > > + *   n - a number of devise pathes (one byte)
> > > + *   l - length of following device path string (one byte)
> > 
> > Might not fit: with pci we can have 256 nested buses.
> Theoretically. But will it practically happen?

Why not? It's easy to specify this on qemu command line.
You do nothing to detect this and gracefully fail either, do you?

-- 
MST
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Michael S. Tsirkin
On Sun, Nov 14, 2010 at 08:49:59PM +, Blue Swirl wrote:
> On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov  wrote:
> >
> > Signed-off-by: Gleb Natapov 
> > ---
> >  hw/fw_cfg.c |   14 ++
> >  hw/fw_cfg.h |    4 +++-
> >  sysemu.h    |    1 +
> >  vl.c        |   51 +++
> >  4 files changed, 69 insertions(+), 1 deletions(-)
> >
> > diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> > index 7b9434f..f6a67db 100644
> > --- a/hw/fw_cfg.c
> > +++ b/hw/fw_cfg.c
> > @@ -53,6 +53,7 @@ struct FWCfgState {
> >     FWCfgFiles *files;
> >     uint16_t cur_entry;
> >     uint32_t cur_offset;
> > +    Notifier machine_ready;
> >  };
> >
> >  static void fw_cfg_write(FWCfgState *s, uint8_t value)
> > @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
> > *filename, uint8_t *data,
> >     return 1;
> >  }
> >
> > +static void fw_cfg_machine_ready(struct Notifier* n)
> > +{
> > +    uint32_t len;
> > +    char *bootindex = get_boot_devices_list(&len);
> > +
> > +    fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
> > +                     FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
> 
> I started to implement this to OpenBIOS but I noticed a small issue.
> First the first byte must be read to determine length. Then the read
> routine will be called again to read the correct amount of bytes. This
> would work, but since there is no shortage of IDs, I'd prefer a system
> where one ID is used to query the length and another ID is used to
> read the data, without the length byte. This is similar how command
> line, initrd etc. are handled.
> 
> This would have the advantage that since fw_cfg uses little endian
> format, the length value would easily scale to for example 64 bits to
> support terabytes of boot device lists. ;-)

Yea. Let's just print # of devices as a property, in ASCII.
No endian-ness, no nothing.
Also - can we just NULL-terminate each ID?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Blue Swirl
On Sun, Nov 14, 2010 at 3:39 PM, Gleb Natapov  wrote:
>
> Signed-off-by: Gleb Natapov 
> ---
>  hw/fw_cfg.c |   14 ++
>  hw/fw_cfg.h |    4 +++-
>  sysemu.h    |    1 +
>  vl.c        |   51 +++
>  4 files changed, 69 insertions(+), 1 deletions(-)
>
> diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> index 7b9434f..f6a67db 100644
> --- a/hw/fw_cfg.c
> +++ b/hw/fw_cfg.c
> @@ -53,6 +53,7 @@ struct FWCfgState {
>     FWCfgFiles *files;
>     uint16_t cur_entry;
>     uint32_t cur_offset;
> +    Notifier machine_ready;
>  };
>
>  static void fw_cfg_write(FWCfgState *s, uint8_t value)
> @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
> *filename, uint8_t *data,
>     return 1;
>  }
>
> +static void fw_cfg_machine_ready(struct Notifier* n)
> +{
> +    uint32_t len;
> +    char *bootindex = get_boot_devices_list(&len);
> +
> +    fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
> +                     FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);

I started to implement this to OpenBIOS but I noticed a small issue.
First the first byte must be read to determine length. Then the read
routine will be called again to read the correct amount of bytes. This
would work, but since there is no shortage of IDs, I'd prefer a system
where one ID is used to query the length and another ID is used to
read the data, without the length byte. This is similar how command
line, initrd etc. are handled.

This would have the advantage that since fw_cfg uses little endian
format, the length value would easily scale to for example 64 bits to
support terabytes of boot device lists. ;-)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Gleb Natapov
On Sun, Nov 14, 2010 at 08:41:37PM +0200, Michael S. Tsirkin wrote:
> On Sun, Nov 14, 2010 at 05:39:41PM +0200, Gleb Natapov wrote:
> > 
> > Signed-off-by: Gleb Natapov 
> > ---
> >  hw/fw_cfg.c |   14 ++
> >  hw/fw_cfg.h |4 +++-
> >  sysemu.h|1 +
> >  vl.c|   51 +++
> >  4 files changed, 69 insertions(+), 1 deletions(-)
> > 
> > diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> > index 7b9434f..f6a67db 100644
> > --- a/hw/fw_cfg.c
> > +++ b/hw/fw_cfg.c
> > @@ -53,6 +53,7 @@ struct FWCfgState {
> >  FWCfgFiles *files;
> >  uint16_t cur_entry;
> >  uint32_t cur_offset;
> > +Notifier machine_ready;
> >  };
> >  
> >  static void fw_cfg_write(FWCfgState *s, uint8_t value)
> > @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
> > *filename, uint8_t *data,
> >  return 1;
> >  }
> >  
> > +static void fw_cfg_machine_ready(struct Notifier* n)
> > +{
> > +uint32_t len;
> > +char *bootindex = get_boot_devices_list(&len);
> > +
> > +fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
> > + FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
> > +}
> > +
> >  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
> >  target_phys_addr_t ctl_addr, target_phys_addr_t 
> > data_addr)
> >  {
> > @@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t 
> > data_port,
> >  fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
> >  fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
> >  
> > +
> > +s->machine_ready.notify = fw_cfg_machine_ready;
> > +qemu_add_machine_init_done_notifier(&s->machine_ready);
> > +
> >  return s;
> >  }
> >  
> > diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
> > index 856bf91..4d61410 100644
> > --- a/hw/fw_cfg.h
> > +++ b/hw/fw_cfg.h
> > @@ -30,7 +30,9 @@
> >  
> >  #define FW_CFG_FILE_FIRST   0x20
> >  #define FW_CFG_FILE_SLOTS   0x10
> > -#define FW_CFG_MAX_ENTRY(FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> > +#define FW_CFG_FILE_LAST_SLOT   (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> > +#define FW_CFG_BOOTINDEX(FW_CFG_FILE_LAST_SLOT + 1)
> > +#define FW_CFG_MAX_ENTRYFW_CFG_BOOTINDEX 
> >  
> >  #define FW_CFG_WRITE_CHANNEL0x4000
> >  #define FW_CFG_ARCH_LOCAL   0x8000
> > diff --git a/sysemu.h b/sysemu.h
> > index c42f33a..38a20a3 100644
> > --- a/sysemu.h
> > +++ b/sysemu.h
> > @@ -196,4 +196,5 @@ void register_devices(void);
> >  
> >  void add_boot_device_path(int32_t bootindex, DeviceState *dev,
> >const char *suffix);
> > +char *get_boot_devices_list(uint32_t *size);
> >  #endif
> > diff --git a/vl.c b/vl.c
> > index 918d988..cca1e76 100644
> > --- a/vl.c
> > +++ b/vl.c
> > @@ -735,6 +735,57 @@ void add_boot_device_path(int32_t bootindex, 
> > DeviceState *dev,
> >  QTAILQ_INSERT_TAIL(&fw_boot_order, node, link);
> >  }
> >  
> > +/*
> > + * This function returns device list as an array in a below format:
> > + * +-+-+---+-+---+--
> > + * |  n  |  l1 |   devpath1|  l2 |  devpath2 | ...
> > + * +-+-+---+-+---+--
> 
> No one will ever want > 256 devices? Let's make it 4 byte or something.
> 
More then 256 _boot_ devices. I think this is more then reasonable.

> > + * where:
> > + *   n - a number of devise pathes (one byte)
> > + *   l - length of following device path string (one byte)
> 
> Might not fit: with pci we can have 256 nested buses.
Theoretically. But will it practically happen?

> How about simply null-terminating each path?
> 
Will be harder for Seabios. I can use more then one byte for length, but
I tried to avoid endianess handling.

> > + *   devpath - non-null terminated string of length l representing
> > + * one device path
> > + */
> 
> Document return value + parameters as well?
> 
Return value is documented above :)

> > +char *get_boot_devices_list(uint32_t *size)
> > +{
> > +FWBootEntry *i;
> > +uint32_t total = 1, c = 0;
> > +char *list = qemu_malloc(1);
> > +
> > +QTAILQ_FOREACH(i, &fw_boot_order, link) {
> > +char *devpath = NULL, *bootpath;
> > +int len;
> > +
> > +if (i->dev) {
> > +devpath = qdev_get_fw_dev_path(i->dev);
> > +assert(devpath);
> > +}
> > +
> > +if (i->suffix && devpath) {
> > +bootpath = qemu_malloc(strlen(devpath) + strlen(i->suffix) + 
> > 2);
> > +sprintf(bootpath, "%s/%s", devpath, i->suffix);
> > +qemu_free(devpath);
> 
> devpath is allocated with strdup, not qemu_malloc,
> so I guess it should be freed with free?
There is code that do it like this all over qemu.

> Alternatively, let's add qemu_strdup
> Might be a good idea: fix error handling here and elsewhere.
> 
> > +} else if (devpath) {
> > +bootpath = devpath;
> > +} else {
>

Re: [PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Michael S. Tsirkin
On Sun, Nov 14, 2010 at 05:39:41PM +0200, Gleb Natapov wrote:
> 
> Signed-off-by: Gleb Natapov 
> ---
>  hw/fw_cfg.c |   14 ++
>  hw/fw_cfg.h |4 +++-
>  sysemu.h|1 +
>  vl.c|   51 +++
>  4 files changed, 69 insertions(+), 1 deletions(-)
> 
> diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
> index 7b9434f..f6a67db 100644
> --- a/hw/fw_cfg.c
> +++ b/hw/fw_cfg.c
> @@ -53,6 +53,7 @@ struct FWCfgState {
>  FWCfgFiles *files;
>  uint16_t cur_entry;
>  uint32_t cur_offset;
> +Notifier machine_ready;
>  };
>  
>  static void fw_cfg_write(FWCfgState *s, uint8_t value)
> @@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char 
> *filename, uint8_t *data,
>  return 1;
>  }
>  
> +static void fw_cfg_machine_ready(struct Notifier* n)
> +{
> +uint32_t len;
> +char *bootindex = get_boot_devices_list(&len);
> +
> +fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
> + FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
> +}
> +
>  FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
>  target_phys_addr_t ctl_addr, target_phys_addr_t 
> data_addr)
>  {
> @@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t 
> data_port,
>  fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
>  fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
>  
> +
> +s->machine_ready.notify = fw_cfg_machine_ready;
> +qemu_add_machine_init_done_notifier(&s->machine_ready);
> +
>  return s;
>  }
>  
> diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
> index 856bf91..4d61410 100644
> --- a/hw/fw_cfg.h
> +++ b/hw/fw_cfg.h
> @@ -30,7 +30,9 @@
>  
>  #define FW_CFG_FILE_FIRST   0x20
>  #define FW_CFG_FILE_SLOTS   0x10
> -#define FW_CFG_MAX_ENTRY(FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> +#define FW_CFG_FILE_LAST_SLOT   (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
> +#define FW_CFG_BOOTINDEX(FW_CFG_FILE_LAST_SLOT + 1)
> +#define FW_CFG_MAX_ENTRYFW_CFG_BOOTINDEX 
>  
>  #define FW_CFG_WRITE_CHANNEL0x4000
>  #define FW_CFG_ARCH_LOCAL   0x8000
> diff --git a/sysemu.h b/sysemu.h
> index c42f33a..38a20a3 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -196,4 +196,5 @@ void register_devices(void);
>  
>  void add_boot_device_path(int32_t bootindex, DeviceState *dev,
>const char *suffix);
> +char *get_boot_devices_list(uint32_t *size);
>  #endif
> diff --git a/vl.c b/vl.c
> index 918d988..cca1e76 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -735,6 +735,57 @@ void add_boot_device_path(int32_t bootindex, DeviceState 
> *dev,
>  QTAILQ_INSERT_TAIL(&fw_boot_order, node, link);
>  }
>  
> +/*
> + * This function returns device list as an array in a below format:
> + * +-+-+---+-+---+--
> + * |  n  |  l1 |   devpath1|  l2 |  devpath2 | ...
> + * +-+-+---+-+---+--

No one will ever want > 256 devices? Let's make it 4 byte or something.

> + * where:
> + *   n - a number of devise pathes (one byte)
> + *   l - length of following device path string (one byte)

Might not fit: with pci we can have 256 nested buses.
How about simply null-terminating each path?

> + *   devpath - non-null terminated string of length l representing
> + * one device path
> + */

Document return value + parameters as well?

> +char *get_boot_devices_list(uint32_t *size)
> +{
> +FWBootEntry *i;
> +uint32_t total = 1, c = 0;
> +char *list = qemu_malloc(1);
> +
> +QTAILQ_FOREACH(i, &fw_boot_order, link) {
> +char *devpath = NULL, *bootpath;
> +int len;
> +
> +if (i->dev) {
> +devpath = qdev_get_fw_dev_path(i->dev);
> +assert(devpath);
> +}
> +
> +if (i->suffix && devpath) {
> +bootpath = qemu_malloc(strlen(devpath) + strlen(i->suffix) + 2);
> +sprintf(bootpath, "%s/%s", devpath, i->suffix);
> +qemu_free(devpath);

devpath is allocated with strdup, not qemu_malloc,
so I guess it should be freed with free?
Alternatively, let's add qemu_strdup
Might be a good idea: fix error handling here and elsewhere.

> +} else if (devpath) {
> +bootpath = devpath;
> +} else {
> +bootpath = strdup(i->suffix);
> +}

assert(bootpath).

> +
> +len = strlen(bootpath);
> +list = qemu_realloc(list, total + len + 1);
> +list[total++] = len;
> +memcpy(&list[total], bootpath, len);
> +total += len;
> +c++;
> +qemu_free(bootpath);

Man, is this tricky.

> +}
> +
> +list[0] = c;
> +*size = total;
> +
> +return list;
> +}
> +
>  static void numa_add(const char *optarg)
>  {
>  char option[128];
> -- 
> 1.7.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo i

[PATCHv4 15/15] Pass boot device list to firmware.

2010-11-14 Thread Gleb Natapov

Signed-off-by: Gleb Natapov 
---
 hw/fw_cfg.c |   14 ++
 hw/fw_cfg.h |4 +++-
 sysemu.h|1 +
 vl.c|   51 +++
 4 files changed, 69 insertions(+), 1 deletions(-)

diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index 7b9434f..f6a67db 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -53,6 +53,7 @@ struct FWCfgState {
 FWCfgFiles *files;
 uint16_t cur_entry;
 uint32_t cur_offset;
+Notifier machine_ready;
 };
 
 static void fw_cfg_write(FWCfgState *s, uint8_t value)
@@ -315,6 +316,15 @@ int fw_cfg_add_file(FWCfgState *s,  const char *filename, 
uint8_t *data,
 return 1;
 }
 
+static void fw_cfg_machine_ready(struct Notifier* n)
+{
+uint32_t len;
+char *bootindex = get_boot_devices_list(&len);
+
+fw_cfg_add_bytes(container_of(n, FWCfgState, machine_ready),
+ FW_CFG_BOOTINDEX, (uint8_t*)bootindex, len);
+}
+
 FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t data_port,
 target_phys_addr_t ctl_addr, target_phys_addr_t 
data_addr)
 {
@@ -343,6 +353,10 @@ FWCfgState *fw_cfg_init(uint32_t ctl_port, uint32_t 
data_port,
 fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
 fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
 
+
+s->machine_ready.notify = fw_cfg_machine_ready;
+qemu_add_machine_init_done_notifier(&s->machine_ready);
+
 return s;
 }
 
diff --git a/hw/fw_cfg.h b/hw/fw_cfg.h
index 856bf91..4d61410 100644
--- a/hw/fw_cfg.h
+++ b/hw/fw_cfg.h
@@ -30,7 +30,9 @@
 
 #define FW_CFG_FILE_FIRST   0x20
 #define FW_CFG_FILE_SLOTS   0x10
-#define FW_CFG_MAX_ENTRY(FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
+#define FW_CFG_FILE_LAST_SLOT   (FW_CFG_FILE_FIRST+FW_CFG_FILE_SLOTS)
+#define FW_CFG_BOOTINDEX(FW_CFG_FILE_LAST_SLOT + 1)
+#define FW_CFG_MAX_ENTRYFW_CFG_BOOTINDEX 
 
 #define FW_CFG_WRITE_CHANNEL0x4000
 #define FW_CFG_ARCH_LOCAL   0x8000
diff --git a/sysemu.h b/sysemu.h
index c42f33a..38a20a3 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -196,4 +196,5 @@ void register_devices(void);
 
 void add_boot_device_path(int32_t bootindex, DeviceState *dev,
   const char *suffix);
+char *get_boot_devices_list(uint32_t *size);
 #endif
diff --git a/vl.c b/vl.c
index 918d988..cca1e76 100644
--- a/vl.c
+++ b/vl.c
@@ -735,6 +735,57 @@ void add_boot_device_path(int32_t bootindex, DeviceState 
*dev,
 QTAILQ_INSERT_TAIL(&fw_boot_order, node, link);
 }
 
+/*
+ * This function returns device list as an array in a below format:
+ * +-+-+---+-+---+--
+ * |  n  |  l1 |   devpath1|  l2 |  devpath2 | ...
+ * +-+-+---+-+---+--
+ * where:
+ *   n - a number of devise pathes (one byte)
+ *   l - length of following device path string (one byte)
+ *   devpath - non-null terminated string of length l representing
+ * one device path
+ */
+char *get_boot_devices_list(uint32_t *size)
+{
+FWBootEntry *i;
+uint32_t total = 1, c = 0;
+char *list = qemu_malloc(1);
+
+QTAILQ_FOREACH(i, &fw_boot_order, link) {
+char *devpath = NULL, *bootpath;
+int len;
+
+if (i->dev) {
+devpath = qdev_get_fw_dev_path(i->dev);
+assert(devpath);
+}
+
+if (i->suffix && devpath) {
+bootpath = qemu_malloc(strlen(devpath) + strlen(i->suffix) + 2);
+sprintf(bootpath, "%s/%s", devpath, i->suffix);
+qemu_free(devpath);
+} else if (devpath) {
+bootpath = devpath;
+} else {
+bootpath = strdup(i->suffix);
+}
+
+len = strlen(bootpath);
+list = qemu_realloc(list, total + len + 1);
+list[total++] = len;
+memcpy(&list[total], bootpath, len);
+total += len;
+c++;
+qemu_free(bootpath);
+}
+
+list[0] = c;
+*size = total;
+
+return list;
+}
+
 static void numa_add(const char *optarg)
 {
 char option[128];
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html