RE: [PATCH v8 03/15] pcie_sriov: Reset SR-IOV extended capability

2024-02-28 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Wednesday, 28 February 2024 12:33
> To: Philippe Mathieu-Daudé ; Michael S. Tsirkin
> ; Marcel Apfelbaum ;
> Alex Williamson ; Cédric Le Goater
> ; Paolo Bonzini ; Daniel P.
> Berrangé ; Eduardo Habkost
> ; Sriram Yagnaraman
> ; Jason Wang ;
> Keith Busch ; Klaus Jensen ; Markus
> Armbruster 
> Cc: qemu-devel@nongnu.org; qemu-bl...@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH v8 03/15] pcie_sriov: Reset SR-IOV extended capability
> 
> pcie_sriov_pf_disable_vfs() is called when resetting the PF, but it only 
> disables
> VFs and does not reset SR-IOV extended capability, leaking the state and
> making the VF Enable register inconsistent with the actual state.
> 
> Replace pcie_sriov_pf_disable_vfs() with pcie_sriov_pf_reset(), which does
> not only disable VFs but also resets the capability.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  include/hw/pci/pcie_sriov.h |  4 ++--
>  hw/net/igb.c|  2 +-
>  hw/nvme/ctrl.c  |  2 +-
>  hw/pci/pcie_sriov.c | 26 ++
>  4 files changed, 22 insertions(+), 12 deletions(-)
> 
> diff --git a/include/hw/pci/pcie_sriov.h b/include/hw/pci/pcie_sriov.h index
> 095fb0c9edf9..b77eb7bf58ac 100644
> --- a/include/hw/pci/pcie_sriov.h
> +++ b/include/hw/pci/pcie_sriov.h
> @@ -58,8 +58,8 @@ void pcie_sriov_pf_add_sup_pgsize(PCIDevice *dev,
> uint16_t opt_sup_pgsize);  void pcie_sriov_config_write(PCIDevice *dev,
> uint32_t address,
>   uint32_t val, int len);
> 
> -/* Reset SR/IOV VF Enable bit to unregister all VFs */ -void
> pcie_sriov_pf_disable_vfs(PCIDevice *dev);
> +/* Reset SR/IOV */
> +void pcie_sriov_pf_reset(PCIDevice *dev);
> 
>  /* Get logical VF number of a VF - only valid for VFs */  uint16_t
> pcie_sriov_vf_number(PCIDevice *dev); diff --git a/hw/net/igb.c
> b/hw/net/igb.c index 0b5c31a58bba..9345506f81ec 100644
> --- a/hw/net/igb.c
> +++ b/hw/net/igb.c
> @@ -493,7 +493,7 @@ static void igb_qdev_reset_hold(Object *obj)
> 
>  trace_e1000e_cb_qdev_reset_hold();
> 
> -pcie_sriov_pf_disable_vfs(d);
> +pcie_sriov_pf_reset(d);
>  igb_core_reset(>core);
>  }
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index
> 7a56e7b79b4d..7c0d3f108724 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -7116,7 +7116,7 @@ static void nvme_ctrl_reset(NvmeCtrl *n,
> NvmeResetType rst)
>  }
> 
>  if (rst != NVME_RESET_CONTROLLER) {
> -pcie_sriov_pf_disable_vfs(pci_dev);
> +pcie_sriov_pf_reset(pci_dev);
>  }
>  }
> 
> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index
> da209b7f47fd..51b66d1bb342 100644
> --- a/hw/pci/pcie_sriov.c
> +++ b/hw/pci/pcie_sriov.c
> @@ -249,16 +249,26 @@ void pcie_sriov_config_write(PCIDevice *dev,
> uint32_t address,  }
> 
> 
> -/* Reset SR/IOV VF Enable bit to trigger an unregister of all VFs */ -void
> pcie_sriov_pf_disable_vfs(PCIDevice *dev)
> +/* Reset SR/IOV */
> +void pcie_sriov_pf_reset(PCIDevice *dev)
>  {
>  uint16_t sriov_cap = dev->exp.sriov_cap;
> -if (sriov_cap) {
> -uint32_t val = pci_get_byte(dev->config + sriov_cap + 
> PCI_SRIOV_CTRL);
> -if (val & PCI_SRIOV_CTRL_VFE) {
> -val &= ~PCI_SRIOV_CTRL_VFE;
> -pcie_sriov_config_write(dev, sriov_cap + PCI_SRIOV_CTRL, val, 1);
> -}
> +if (!sriov_cap) {
> +return;
> +}
> +
> +pci_set_word(dev->config + sriov_cap + PCI_SRIOV_CTRL, 0);
> +unregister_vfs(dev);
> +
> +/*
> + * Default is to use 4K pages, software can modify it
> + * to any of the supported bits
> + */
> +pci_set_word(dev->config + sriov_cap + PCI_SRIOV_SYS_PGSIZE, 0x1);
> +

Just curious, do we need this?
On Linux, I thought the PCI subsystem restores the page size after reset.

Otherwise, 
Assuming change of my mail address from sriram.yagnara...@est.tech to 
@ericsson.com is accepted,
Reviewed-by: Sriram Yagnaraman 

> +for (uint16_t i = 0; i < PCI_NUM_REGIONS; i++) {
> +pci_set_quad(dev->config + sriov_cap + PCI_SRIOV_BAR + i * 4,
> + dev->exp.sriov_pf.vf_bar_type[i]);
>  }
>  }
> 
> 
> --
> 2.43.2


RE: [PATCH v8 05/15] hw/pci: Always call pcie_sriov_pf_reset()

2024-02-28 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Wednesday, 28 February 2024 12:33
> To: Philippe Mathieu-Daudé ; Michael S. Tsirkin
> ; Marcel Apfelbaum ;
> Alex Williamson ; Cédric Le Goater
> ; Paolo Bonzini ; Daniel P.
> Berrangé ; Eduardo Habkost
> ; Sriram Yagnaraman
> ; Jason Wang ;
> Keith Busch ; Klaus Jensen ; Markus
> Armbruster 
> Cc: qemu-devel@nongnu.org; qemu-bl...@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH v8 05/15] hw/pci: Always call pcie_sriov_pf_reset()
> 
> Call pcie_sriov_pf_reset() from pci_do_device_reset() just as we do for
> msi_reset() and msix_reset() to prevent duplicating code for each SR-IOV PF.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb.c   | 2 --
>  hw/nvme/ctrl.c | 4 
>  hw/pci/pci.c   | 1 +
>  3 files changed, 1 insertion(+), 6 deletions(-)
> 
> diff --git a/hw/net/igb.c b/hw/net/igb.c index 9345506f81ec..9b37523d6df8
> 100644
> --- a/hw/net/igb.c
> +++ b/hw/net/igb.c
> @@ -488,12 +488,10 @@ static void igb_pci_uninit(PCIDevice *pci_dev)
> 
>  static void igb_qdev_reset_hold(Object *obj)  {
> -PCIDevice *d = PCI_DEVICE(obj);
>  IGBState *s = IGB(obj);
> 
>  trace_e1000e_cb_qdev_reset_hold();
> 
> -pcie_sriov_pf_reset(d);
>  igb_core_reset(>core);
>  }
> 
> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index
> 7c0d3f108724..c1af4b87b34a 100644
> --- a/hw/nvme/ctrl.c
> +++ b/hw/nvme/ctrl.c
> @@ -7114,10 +7114,6 @@ static void nvme_ctrl_reset(NvmeCtrl *n,
> NvmeResetType rst)
>  sctrl = >sec_ctrl_list.sec[i];
>  nvme_virt_set_state(n, le16_to_cpu(sctrl->scid), false);
>  }
> -
> -if (rst != NVME_RESET_CONTROLLER) {
> -pcie_sriov_pf_reset(pci_dev);
> -}
>  }
> 
>  if (rst != NVME_RESET_CONTROLLER) { diff --git a/hw/pci/pci.c
> b/hw/pci/pci.c index 6496d027ca61..e7a39cb203ae 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -409,6 +409,7 @@ static void pci_do_device_reset(PCIDevice *dev)
> 
>  msi_reset(dev);
>  msix_reset(dev);
> +pcie_sriov_pf_reset(dev);
>  }
> 
>  /*
> 
> --
> 2.43.2

Assuming change of my mail address from sriram.yagnara...@est.tech to 
@ericsson.com is accepted,
Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v8 02/15] pcie_sriov: Validate NumVFs

2024-02-28 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Wednesday, 28 February 2024 12:33
> To: Philippe Mathieu-Daudé ; Michael S. Tsirkin
> ; Marcel Apfelbaum ;
> Alex Williamson ; Cédric Le Goater
> ; Paolo Bonzini ; Daniel P.
> Berrangé ; Eduardo Habkost
> ; Sriram Yagnaraman
> ; Jason Wang ;
> Keith Busch ; Klaus Jensen ; Markus
> Armbruster 
> Cc: qemu-devel@nongnu.org; qemu-bl...@nongnu.org; Akihiko Odaki
> ; qemu-sta...@nongnu.org
> Subject: [PATCH v8 02/15] pcie_sriov: Validate NumVFs
> 
> The guest may write NumVFs greater than TotalVFs and that can lead to buffer
> overflow in VF implementations.
> 
> Cc: qemu-sta...@nongnu.org
> Fixes: CVE-2024-26327
> Fixes: 7c0fa8dff811 ("pcie: Add support for Single Root I/O Virtualization
> (SR/IOV)")
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/pci/pcie_sriov.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index
> a1fe65f5d801..da209b7f47fd 100644
> --- a/hw/pci/pcie_sriov.c
> +++ b/hw/pci/pcie_sriov.c
> @@ -176,6 +176,9 @@ static void register_vfs(PCIDevice *dev)
> 
>  assert(sriov_cap > 0);
>  num_vfs = pci_get_word(dev->config + sriov_cap + PCI_SRIOV_NUM_VF);
> +if (num_vfs > pci_get_word(dev->config + sriov_cap +
> PCI_SRIOV_TOTAL_VF)) {
> +return;
> +}
> 
>  dev->exp.sriov_pf.vf = g_new(PCIDevice *, num_vfs);
> 
> 
> --
> 2.43.2

Assuming change of my mail address from sriram.yagnara...@est.tech to 
@ericsson.com is accepted,
Reviewed-by: Sriram Yagnaraman 


RE: [PATCH] MAINTAINERS: Update Sriram Yagnaraman mail address

2024-02-28 Thread Sriram Yagnaraman
Hi Philippe,

> -Original Message-
> From: Philippe Mathieu-Daudé 
> Sent: Wednesday, 28 February 2024 16:25
> To: Sriram Yagnaraman ; qemu-
> de...@nongnu.org
> Subject: Re: [PATCH] MAINTAINERS: Update Sriram Yagnaraman mail address
> 
> Hi Sriram,
> 
> On 28/2/24 09:06, Sriram Yagnaraman wrote:
> > Due to company policies, I have changed my mail address. Updating
> > MAINTAINERS and .mailmap to show my latest mail address.
> >
> > Signed-off-by: Sriram Yagnaraman 
> > ---
> >   .mailmap| 1 +
> >   MAINTAINERS | 2 +-
> >   2 files changed, 2 insertions(+), 1 deletion(-)
> 
> 
> > diff --git a/MAINTAINERS b/MAINTAINERS index 65dfdc9677..0a3294f698
> > 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -2474,7 +2474,7 @@ F: tests/qtest/libqos/e1000e.*
> >
> >   igb
> >   M: Akihiko Odaki 
> > -R: Sriram Yagnaraman 
> 
> Could you confirm this from your  address?

Unfortunately, I don't have access to that mail address anymore.
It was a chicken and egg situation for me, I was not allowed to be a reviewer 
with @ericsson.com when I still had @est.tech. :/

Is there any other way to prove I am the same person?

> 
> > +R: Sriram Yagnaraman 
> >   S: Maintained
> >   F: docs/system/devices/igb.rst
> >   F: hw/net/igb*


[PATCH] MAINTAINERS: Update Sriram Yagnaraman mail address

2024-02-28 Thread Sriram Yagnaraman
Due to company policies, I have changed my mail address. Updating
MAINTAINERS and .mailmap to show my latest mail address.

Signed-off-by: Sriram Yagnaraman 
---
 .mailmap| 1 +
 MAINTAINERS | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index 88fb68143e..ef1b8a53f4 100644
--- a/.mailmap
+++ b/.mailmap
@@ -100,6 +100,7 @@ Philippe Mathieu-Daudé  
 Philippe Mathieu-Daudé  
 Philippe Mathieu-Daudé  
 Roman Bolshakov  
+Sriram Yagnaraman  
 Stefan Brankovic  
 Stefan Weil  Stefan Weil 
 Taylor Simpson  
diff --git a/MAINTAINERS b/MAINTAINERS
index 65dfdc9677..0a3294f698 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2474,7 +2474,7 @@ F: tests/qtest/libqos/e1000e.*
 
 igb
 M: Akihiko Odaki 
-R: Sriram Yagnaraman 
+R: Sriram Yagnaraman 
 S: Maintained
 F: docs/system/devices/igb.rst
 F: hw/net/igb*
-- 
2.31.1




RE: [PATCH 1/2] igb: Add a VF reset handler

2023-08-30 Thread Sriram Yagnaraman
> -Original Message-
> From: Cédric Le Goater 
> Sent: Tuesday, 29 August 2023 11:05
> To: qemu-devel@nongnu.org
> Cc: Akihiko Odaki ; Sriram Yagnaraman
> ; Jason Wang ; Cédric
> Le Goater 
> Subject: [PATCH 1/2] igb: Add a VF reset handler
> 
> From: Cédric Le Goater 
> 
> Export the igb_vf_reset() helper routine from the PF model to let the IGBVF
> model implement its own device reset.
> 
> Cc: Akihiko Odaki 
> Suggested-by: Sriram Yagnaraman 
> Signed-off-by: Cédric Le Goater 
> ---
>  hw/net/igb_common.h |  1 +
>  hw/net/igb_core.h   |  3 +++
>  hw/net/igb.c|  6 ++
>  hw/net/igb_core.c   |  6 --
>  hw/net/igbvf.c  | 10 ++
>  hw/net/trace-events |  1 +
>  6 files changed, 25 insertions(+), 2 deletions(-)
> 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH 2/2] igb: Add Function Level Reset to PF and VF

2023-08-30 Thread Sriram Yagnaraman


> -Original Message-
> From: Cédric Le Goater 
> Sent: Tuesday, 29 August 2023 11:05
> To: qemu-devel@nongnu.org
> Cc: Akihiko Odaki ; Sriram Yagnaraman
> ; Jason Wang ; Cédric
> Le Goater 
> Subject: [PATCH 2/2] igb: Add Function Level Reset to PF and VF
> 
> From: Cédric Le Goater 
> 
> The Intel 82576EB GbE Controller say that the Physical and Virtual Functions
> support Function Level Reset. Add the capability to each device model.
> 
> Cc:  Sriram Yagnaraman 
> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
> Reviewed-by: Akihiko Odaki 
> Tested-by: Akihiko Odaki 
> Signed-off-by: Cédric Le Goater 
> ---
>  hw/net/igb.c   | 3 +++
>  hw/net/igbvf.c | 3 +++
>  2 files changed, 6 insertions(+)
> 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH] igb: Add Function Level Reset to PF and VF

2023-05-30 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Tuesday, 30 May 2023 04:02
> To: Cédric Le Goater ; Sriram Yagnaraman
> ; qemu-devel@nongnu.org
> Cc: Jason Wang 
> Subject: Re: [PATCH] igb: Add Function Level Reset to PF and VF
> 
> On 2023/05/30 0:07, Cédric Le Goater wrote:
> > On 5/29/23 09:45, Akihiko Odaki wrote:
> >> On 2023/05/29 16:01, Cédric Le Goater wrote:
> >>> On 5/29/23 04:45, Akihiko Odaki wrote:
> >>>> On 2023/05/28 19:50, Sriram Yagnaraman wrote:
> >>>>>
> >>>>>> -Original Message-
> >>>>>> From: Cédric Le Goater 
> >>>>>> Sent: Friday, 26 May 2023 19:31
> >>>>>> To: qemu-devel@nongnu.org
> >>>>>> Cc: Akihiko Odaki ; Sriram Yagnaraman
> >>>>>> ; Jason Wang
> ;
> >>>>>> Cédric Le Goater 
> >>>>>> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
> >>>>>>
> >>>>>> The Intel 82576EB GbE Controller say that the Physical and
> >>>>>> Virtual Functions support Function Level Reset. Add the
> >>>>>> capability to each device model.
> >>>>>>
> >>>>>> Cc: Akihiko Odaki 
> >>>>>> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
> >>>>>> Signed-off-by: Cédric Le Goater 
> >>>>>> ---
> >>>>>>   hw/net/igb.c   | 3 +++
> >>>>>>   hw/net/igbvf.c | 3 +++
> >>>>>>   2 files changed, 6 insertions(+)
> >>>>>>
> >>>>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index
> >>>>>> 1c989d767725..08e389338dca
> >>>>>> 100644
> >>>>>> --- a/hw/net/igb.c
> >>>>>> +++ b/hw/net/igb.c
> >>>>>> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev,
> >>>>>> uint32_t addr,
> >>>>>>
> >>>>>>   trace_igb_write_config(addr, val, len);
> >>>>>>   pci_default_write_config(dev, addr, val, len);
> >>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
> >>>>>>
> >>>>>>   if (range_covers_byte(addr, len, PCI_COMMAND) &&
> >>>>>>   (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) {
> @@
> >>>>>> -427,6
> >>>>>> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error
> >>>>>> **errp)
> >>>>>>   }
> >>>>>>
> >>>>>>   /* PCIe extended capabilities (in order) */
> >>>>>> +    pcie_cap_flr_init(pci_dev);
> >>>>>> +
> >>>>>>   if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
> >>>>>>   hw_error("Failed to initialize AER capability");
> >>>>>>   }
> >>>>>> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
> >>>>>> 284ea611848b..0a58dad06802 100644
> >>>>>> --- a/hw/net/igbvf.c
> >>>>>> +++ b/hw/net/igbvf.c
> >>>>>> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice
> >>>>>> *dev, uint32_t addr, uint32_t val,  {
> >>>>>>   trace_igbvf_write_config(addr, val, len);
> >>>>>>   pci_default_write_config(dev, addr, val, len);
> >>>>>> +    pcie_cap_flr_write_config(dev, addr, val, len);
> >>>>>>   }
> >>>>>>
> >>>>>>   static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr,
> >>>>>> unsigned size) @@ -266,6 +267,8 @@ static void
> >>>>>> igbvf_pci_realize(PCIDevice *dev, Error
> >>>>>> **errp)
> >>>>>>   hw_error("Failed to initialize PCIe capability");
> >>>>>>   }
> >>>>>>
> >>>>>> +    pcie_cap_flr_init(dev);
> >>>>>
> >>>>> Sorry for my naive question, and perhaps not related to your
> >>>>> patch, IGBVF device class doesn't seem to have any reset functions
> >>>>> registered via igbvf_class_init(). So, I am guessing an FLR will
> >>>>> not trigger igb_vf_reset(), which is probably what we want.
> >>>
> >>> I

RE: [PATCH] igb: Add Function Level Reset to PF and VF

2023-05-28 Thread Sriram Yagnaraman
> -Original Message-
> From: qemu-devel-bounces+sriram.yagnaraman=est.t...@nongnu.org
>  On Behalf
> Of Sriram Yagnaraman
> Sent: Sunday, 28 May 2023 12:51
> To: Cédric Le Goater ; qemu-devel@nongnu.org
> Cc: Akihiko Odaki ; Jason Wang
> 
> Subject: RE: [PATCH] igb: Add Function Level Reset to PF and VF
> 
> 
> > -Original Message-
> > From: Cédric Le Goater 
> > Sent: Friday, 26 May 2023 19:31
> > To: qemu-devel@nongnu.org
> > Cc: Akihiko Odaki ; Sriram Yagnaraman
> > ; Jason Wang ;
> Cédric
> > Le Goater 
> > Subject: [PATCH] igb: Add Function Level Reset to PF and VF
> >
> > The Intel 82576EB GbE Controller say that the Physical and Virtual
> > Functions support Function Level Reset. Add the capability to each device
> model.
> >
> > Cc: Akihiko Odaki 
> > Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
> > Signed-off-by: Cédric Le Goater 
> > ---
> >  hw/net/igb.c   | 3 +++
> >  hw/net/igbvf.c | 3 +++
> >  2 files changed, 6 insertions(+)
> >
> > diff --git a/hw/net/igb.c b/hw/net/igb.c index
> > 1c989d767725..08e389338dca
> > 100644
> > --- a/hw/net/igb.c
> > +++ b/hw/net/igb.c
> > @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev,
> > uint32_t addr,
> >
> >  trace_igb_write_config(addr, val, len);
> >  pci_default_write_config(dev, addr, val, len);
> > +pcie_cap_flr_write_config(dev, addr, val, len);
> >
> >  if (range_covers_byte(addr, len, PCI_COMMAND) &&
> >  (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ -
> 427,6
> > +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error
> > +**errp)
> >  }
> >
> >  /* PCIe extended capabilities (in order) */
> > +pcie_cap_flr_init(pci_dev);
> > +
> >  if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
> >  hw_error("Failed to initialize AER capability");
> >  }
> > diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
> > 284ea611848b..0a58dad06802 100644
> > --- a/hw/net/igbvf.c
> > +++ b/hw/net/igbvf.c
> > @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
> > uint32_t addr, uint32_t val,  {
> >  trace_igbvf_write_config(addr, val, len);
> >  pci_default_write_config(dev, addr, val, len);
> > +pcie_cap_flr_write_config(dev, addr, val, len);
> >  }
> >
> >  static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned
> > size) @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice
> > *dev, Error
> > **errp)
> >  hw_error("Failed to initialize PCIe capability");
> >  }
> >
> > +pcie_cap_flr_init(dev);
> 
> Sorry for my naive question, and perhaps not related to your patch, IGBVF
> device class doesn't seem to have any reset functions registered via
> igbvf_class_init(). So, I am guessing an FLR will not trigger igb_vf_reset(), 
> which
> is probably what we want.
> 

Something like this perhaps? Not compile tested, just an idea.
diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
index 284ea61184..9f07983bc9 100644
--- a/hw/net/igbvf.c
+++ b/hw/net/igbvf.c
@@ -283,9 +283,17 @@ static void igbvf_pci_uninit(PCIDevice *dev)
 msix_uninit(dev, >msix, >msix);
 }
 
+static void igbvf_qdev_reset_hold(Object *obj)
+{
+trace_e1000e_cb_qdev_reset_hold();
+
+igbvf_mmio_write(obj, E1000_CTRL, E1000_CTRL_RST, 0x4); /* Write to VTCTRL 
to trigger VF reset */
+}
+
 static void igbvf_class_init(ObjectClass *class, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(class);
+ResettableClass *rc = RESETTABLE_CLASS(class);
 PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
 
 c->realize = igbvf_pci_realize;
@@ -295,6 +303,8 @@ static void igbvf_class_init(ObjectClass *class, void *data)
 c->revision = 1;
 c->class_id = PCI_CLASS_NETWORK_ETHERNET;
 
+rc->phases.hold = igbvf_qdev_reset_hold;
+
 dc->desc = "Intel 82576 Virtual Function";
 dc->user_creatable = false;

> > +
> >  if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
> >  hw_error("Failed to initialize AER capability");
> >  }
> > --
> > 2.40.1



RE: [PATCH] igb: Add Function Level Reset to PF and VF

2023-05-28 Thread Sriram Yagnaraman

> -Original Message-
> From: Cédric Le Goater 
> Sent: Friday, 26 May 2023 19:31
> To: qemu-devel@nongnu.org
> Cc: Akihiko Odaki ; Sriram Yagnaraman
> ; Jason Wang ; Cédric
> Le Goater 
> Subject: [PATCH] igb: Add Function Level Reset to PF and VF
> 
> The Intel 82576EB GbE Controller say that the Physical and Virtual Functions
> support Function Level Reset. Add the capability to each device model.
> 
> Cc: Akihiko Odaki 
> Fixes: 3a977deebe6b ("Intrdocue igb device emulation")
> Signed-off-by: Cédric Le Goater 
> ---
>  hw/net/igb.c   | 3 +++
>  hw/net/igbvf.c | 3 +++
>  2 files changed, 6 insertions(+)
> 
> diff --git a/hw/net/igb.c b/hw/net/igb.c index 1c989d767725..08e389338dca
> 100644
> --- a/hw/net/igb.c
> +++ b/hw/net/igb.c
> @@ -101,6 +101,7 @@ static void igb_write_config(PCIDevice *dev, uint32_t
> addr,
> 
>  trace_igb_write_config(addr, val, len);
>  pci_default_write_config(dev, addr, val, len);
> +pcie_cap_flr_write_config(dev, addr, val, len);
> 
>  if (range_covers_byte(addr, len, PCI_COMMAND) &&
>  (dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { @@ -427,6
> +428,8 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
>  }
> 
>  /* PCIe extended capabilities (in order) */
> +pcie_cap_flr_init(pci_dev);
> +
>  if (pcie_aer_init(pci_dev, 1, 0x100, 0x40, errp) < 0) {
>  hw_error("Failed to initialize AER capability");
>  }
> diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index
> 284ea611848b..0a58dad06802 100644
> --- a/hw/net/igbvf.c
> +++ b/hw/net/igbvf.c
> @@ -204,6 +204,7 @@ static void igbvf_write_config(PCIDevice *dev,
> uint32_t addr, uint32_t val,  {
>  trace_igbvf_write_config(addr, val, len);
>  pci_default_write_config(dev, addr, val, len);
> +pcie_cap_flr_write_config(dev, addr, val, len);
>  }
> 
>  static uint64_t igbvf_mmio_read(void *opaque, hwaddr addr, unsigned size)
> @@ -266,6 +267,8 @@ static void igbvf_pci_realize(PCIDevice *dev, Error
> **errp)
>  hw_error("Failed to initialize PCIe capability");
>  }
> 
> +pcie_cap_flr_init(dev);

Sorry for my naive question, and perhaps not related to your patch, IGBVF 
device class doesn't seem to have any reset functions registered via 
igbvf_class_init(). So, I am guessing an FLR will not trigger igb_vf_reset(), 
which is probably what we want.

> +
>  if (pcie_aer_init(dev, 1, 0x100, 0x40, errp) < 0) {
>  hw_error("Failed to initialize AER capability");
>  }
> --
> 2.40.1



RE: [PATCH v6 4/7] igb: RX payload guest writting refactoring

2023-05-15 Thread Sriram Yagnaraman


> -Original Message-
> From: Tomasz Dzieciol 
> Sent: Friday, 12 May 2023 17:44
> To: qemu-devel@nongnu.org; akihiko.od...@daynix.com
> Cc: Sriram Yagnaraman ;
> jasow...@redhat.com; k.kwiec...@samsung.com;
> m.socha...@samsung.com
> Subject: [PATCH v6 4/7] igb: RX payload guest writting refactoring
> 
> Refactoring is done in preparation for support of multiple advanced 
> descriptors
> RX modes, especially packet-split modes.
> 
> Signed-off-by: Tomasz Dzieciol 
> ---
>  hw/net/e1000e_core.c |  18 ++--
>  hw/net/igb_core.c| 216 +--
>  tests/qtest/libqos/igb.c |   5 +
>  3 files changed, 153 insertions(+), 86 deletions(-)
> 
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index
> b2e54fe802..f9ff31fd70 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore
> *core,  }
> 
>  static void
> -e1000e_write_to_rx_buffers(E1000ECore *core,
> -   hwaddr ba[MAX_PS_BUFFERS],
> -   e1000e_ba_state *bastate,
> -   const char *data,
> -   dma_addr_t data_len)
> +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
> +hwaddr ba[MAX_PS_BUFFERS],
> +e1000e_ba_state *bastate,
> +const char *data,
> +dma_addr_t data_len)
>  {
>  while (data_len > 0) {
>  uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
> @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore
> *core, struct NetRxPkt *pkt,
>  while (copy_size) {
>  iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
> 
> -e1000e_write_to_rx_buffers(core, ba, ,
> -iov->iov_base + iov_ofs, 
> iov_copy);
> +e1000e_write_payload_frag_to_rx_buffers(core, ba, 
> ,
> +iov->iov_base +
> +iov_ofs,
> +iov_copy);
> 
>  copy_size -= iov_copy;
>  iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@
> e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
> 
>  if (desc_offset + desc_size >= total_size) {
>  /* Simulate FCS checksum presence in the last descriptor 
> */
> -e1000e_write_to_rx_buffers(core, ba, ,
> +e1000e_write_payload_frag_to_rx_buffers(core, ba,
> + ,
>(const char *) _pad, 
> e1000x_fcs_len(core->mac));
>  }
>  }
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 774b34fc92..0eabe7106e 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -941,6 +941,14 @@ igb_has_rxbufs(IGBCore *core, const E1000ERingInfo
> *r, size_t total_size)
>   bufsize;
>  }
> 
> +static uint32_t
> +igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000ERingInfo
> +*r) {

Would be nice to have similar names for igb_rxbufsize and 
igb_get_queue_rx_header_buf_size. 
If we want to keep igb_rxbufsize due to its similarity with e1000e equivalent, 
how about igb_rxhdrbufsize() for this new function?

> +uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
> +return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >>
> +   E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; }
> +
>  void
>  igb_start_recv(IGBCore *core)
>  {
> @@ -1231,6 +1239,21 @@ igb_read_adv_rx_descr(IGBCore *core, union
> e1000_adv_rx_desc *desc,
>  *buff_addr = le64_to_cpu(desc->read.pkt_addr);  }
> 
> +typedef struct IGBPacketRxDMAState {
> +size_t size;
> +size_t total_size;
> +size_t ps_hdr_len;
> +size_t desc_size;
> +size_t desc_offset;
> +uint32_t rx_desc_packet_buf_size;
> +uint32_t rx_desc_header_buf_size;
> +struct iovec *iov;
> +size_t iov_ofs;
> +bool is_first;
> +uint16_t written;
> +hwaddr ba;
> +} IGBPacketRxDMAState;
> +
>  static inline void
>  igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
>hwaddr *buff_addr)
> @@ -1512,19 +1535,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core,
> PCIDevice *dev, dma_addr_t addr,
>  }
>  }
> 
> -static void
> -igb_write_to_rx_buffers(IGBCore *core

RE: [PATCH v6 5/7] igb: add IPv6 extended headers traffic detection

2023-05-15 Thread Sriram Yagnaraman


> -Original Message-
> From: Tomasz Dzieciol 
> Sent: Friday, 12 May 2023 17:44
> To: qemu-devel@nongnu.org; akihiko.od...@daynix.com
> Cc: Sriram Yagnaraman ;
> jasow...@redhat.com; k.kwiec...@samsung.com;
> m.socha...@samsung.com
> Subject: [PATCH v6 5/7] igb: add IPv6 extended headers traffic detection
> 
> Signed-off-by: Tomasz Dzieciol 
> ---
>  hw/net/igb_core.c | 4 +++-
>  hw/net/igb_regs.h | 1 +
>  2 files changed, 4 insertions(+), 1 deletion(-)
>

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v6 1/7] igb: remove TCP ACK detection

2023-05-15 Thread Sriram Yagnaraman


> -Original Message-
> From: Tomasz Dzieciol 
> Sent: Friday, 12 May 2023 17:43
> To: qemu-devel@nongnu.org; akihiko.od...@daynix.com
> Cc: Sriram Yagnaraman ;
> jasow...@redhat.com; k.kwiec...@samsung.com;
> m.socha...@samsung.com
> Subject: [PATCH v6 1/7] igb: remove TCP ACK detection
> 
> TCP ACK detection is no longer present in igb.
> 
> Signed-off-by: Tomasz Dzieciol 
> ---
>  hw/net/igb_core.c | 5 -
>  1 file changed, 5 deletions(-)
> 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v6 2/7] igb: rename E1000E_RingInfo_st

2023-05-15 Thread Sriram Yagnaraman


> -Original Message-
> From: Tomasz Dzieciol 
> Sent: Friday, 12 May 2023 17:44
> To: qemu-devel@nongnu.org; akihiko.od...@daynix.com
> Cc: Sriram Yagnaraman ;
> jasow...@redhat.com; k.kwiec...@samsung.com;
> m.socha...@samsung.com
> Subject: [PATCH v6 2/7] igb: rename E1000E_RingInfo_st
> 
> Rename E1000E_RingInfo_st and E1000E_RingInfo according to qemu typdefs
> guide.
> 
> Signed-off-by: Tomasz Dzieciol 
> ---
>  hw/net/e1000e_core.c | 34 +-
>  hw/net/igb_core.c| 42 +-
>  2 files changed, 38 insertions(+), 38 deletions(-)
> 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup

2023-05-03 Thread Sriram Yagnaraman


> -Original Message-
> From: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung
> Electronics 
> Sent: Tuesday, 2 May 2023 16:01
> To: Sriram Yagnaraman ; qemu-
> de...@nongnu.org; akihiko.od...@daynix.com
> Cc: jasow...@redhat.com; k.kwiec...@samsung.com;
> m.socha...@samsung.com
> Subject: RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup
> 
> Not Linux/DPDK/FreeBSD for IGB.
> 
> Change here adds additional condition (RXCSUM.IPPCSE set) to enable putting
> IP ID into descriptor, besides clearing RXCSUM.PCSD (required according to
> Intel 82576 datasheet) that was not present in the e1000e code.
> 

Yes, we can't even use ethtool to set this field.
My suggestion is to not add/maintain code that we cannot test. I leave it up to 
Akhikho to decide if we really need to implement IPPCSE.
The default value of RXCSUM.IPPCSE is unset, so we could as well ignore this 
field until there is a user who sets this.

Anyhow, I will wait with futher comments, until you respin this after splitting 
the changes as requested.


RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup

2023-04-30 Thread Sriram Yagnaraman


> -Original Message-
> From: Tomasz Dzieciol 
> Sent: Thursday, 27 April 2023 12:48
> To: qemu-devel@nongnu.org; akihiko.od...@daynix.com
> Cc: Sriram Yagnaraman ;
> jasow...@redhat.com; k.kwiec...@samsung.com;
> m.socha...@samsung.com
> Subject: [PATCH v3 1/2] igb: RX descriptors handling cleanup
> 
> Format of Intel 82576 was changed in comparison to Intel 82574 extended
> descriptors. This change updates filling of advanced descriptors fields
> accordingly:
> * remove TCP ACK detection
> * add IPv6 with extensions traffic detection
> * fragment checksum and IP ID is filled only when RXCSUM.IPPCSE is set (in
>   addition to RXCSUM.PCSD bit cleared condition)

Just curious if any device driver still uses IP payload checksum enable 
(IPPCSE)?

> 
> Refactoring is done in preparation for support of multiple advanced 
> descriptors
> RX modes, especially packet-split modes.
> 
> Signed-off-by: Tomasz Dzieciol 
> ---
>  hw/net/e1000e_core.c |  18 +-
>  hw/net/e1000x_regs.h |   1 +
>  hw/net/igb_core.c| 478 ---
>  hw/net/igb_regs.h|  12 +-
>  hw/net/trace-events  |   6 +-
>  tests/qtest/libqos/igb.c |   3 +
>  6 files changed, 316 insertions(+), 202 deletions(-)
> 
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index
> 78373d7db7..0085ad53c2 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore
> *core,  }
> 
>  static void
> -e1000e_write_to_rx_buffers(E1000ECore *core,
> -   hwaddr ba[MAX_PS_BUFFERS],
> -   e1000e_ba_state *bastate,
> -   const char *data,
> -   dma_addr_t data_len)
> +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
> +hwaddr ba[MAX_PS_BUFFERS],
> +e1000e_ba_state *bastate,
> +const char *data,
> +dma_addr_t data_len)
>  {
>  while (data_len > 0) {
>  uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
> @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore
> *core, struct NetRxPkt *pkt,
>  while (copy_size) {
>  iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
> 
> -e1000e_write_to_rx_buffers(core, ba, ,
> -iov->iov_base + iov_ofs, 
> iov_copy);
> +e1000e_write_payload_frag_to_rx_buffers(core, ba, 
> ,
> +iov->iov_base +
> +iov_ofs,
> +iov_copy);
> 
>  copy_size -= iov_copy;
>  iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@
> e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
> 
>  if (desc_offset + desc_size >= total_size) {
>  /* Simulate FCS checksum presence in the last descriptor 
> */
> -e1000e_write_to_rx_buffers(core, ba, ,
> +e1000e_write_payload_frag_to_rx_buffers(core, ba,
> + ,
>(const char *) _pad, 
> e1000x_fcs_len(core->mac));
>  }
>  }
> diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index
> 13760c66d3..344fd10359 100644
> --- a/hw/net/e1000x_regs.h
> +++ b/hw/net/e1000x_regs.h
> @@ -827,6 +827,7 @@ union e1000_rx_desc_packet_split {
>  /* Receive Checksum Control bits */
>  #define E1000_RXCSUM_IPOFLD 0x100   /* IP Checksum Offload Enable */
>  #define E1000_RXCSUM_TUOFLD 0x200   /* TCP/UDP Checksum Offload
> Enable */
> +#define E1000_RXCSUM_IPPCSE 0x1000  /* IP Payload Checksum enable */
>  #define E1000_RXCSUM_PCSD   0x2000  /* Packet Checksum Disable */
> 
>  #define E1000_RING_DESC_LEN   (16)
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 96b7335b31..1cb64402aa 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -267,6 +267,21 @@ igb_rx_use_legacy_descriptor(IGBCore *core)
>  return false;
>  }
> 
> +typedef struct E1000E_RingInfo_st {
> +int dbah;
> +int dbal;
> +int dlen;
> +int dh;
> +int dt;
> +int idx;
> +} E1000E_RingInfo;
> +
> +static uint32_t
> +igb_rx_queue_desctyp_get(IGBCore *core, const E1000E_RingInfo *r) {
> +return core->mac[E1000_SRRCTL(r->idx) >> 2] &
> +

RE: [PATCH v4 27/48] igb: Clear EICR bits for delayed MSI-X interrupts

2023-04-26 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Wednesday, 26 April 2023 12:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v4 27/48] igb: Clear EICR bits for delayed MSI-X interrupts
> 
> Section 7.3.4.1 says:
> > When auto-clear is enabled for an interrupt cause, the EICR bit is set
> > when a cause event mapped to this vector occurs. When the EITR Counter
> > reaches zero, the MSI-X message is sent on PCIe. Then the EICR bit is
> > cleared and enabled to be set by a new cause event
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 21 ++++-
>  1 file changed, 12 insertions(+), 9 deletions(-)
> 
Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v4 45/48] igb: Clear-on-read ICR when ICR.INTA is set

2023-04-26 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Wednesday, 26 April 2023 12:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v4 45/48] igb: Clear-on-read ICR when ICR.INTA is set
> 
> For GPIE.NSICR, Section 7.3.2.1.2 says:
> > ICR bits are cleared on register read. If GPIE.NSICR = 0b, then the
> > clear on read occurs only if no bit is set in the IMS or at least one
> > bit is set in the IMS and there is a true interrupt as reflected in
> > ICR.INTA.
> 
> e1000e does similar though it checks for CTRL_EXT.IAME, which does not exist
> on igb.
> 
> Suggested-by: Sriram Yagnaraman 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> b68e24c9ee..29190054c6 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -2598,6 +2598,8 @@ igb_mac_icr_read(IGBCore *core, int index)
>  } else if (core->mac[IMS] == 0) {
>  trace_e1000e_irq_icr_clear_zero_ims();
>  igb_lower_interrupts(core, ICR, 0x);
> +} else if (core->mac[ICR] & E1000_ICR_INT_ASSERTED) {
> +igb_lower_interrupts(core, ICR, 0x);
>  } else if (!msix_enabled(core->owner)) {
>      trace_e1000e_irq_icr_clear_nonmsix_icr_read();
>  igb_lower_interrupts(core, ICR, 0x);
> --
> 2.40.0

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v4 44/48] igb: Notify only new interrupts

2023-04-26 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Wednesday, 26 April 2023 12:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v4 44/48] igb: Notify only new interrupts
> 
> This follows the corresponding change for e1000e. This fixes:
> tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 201 --
>  hw/net/trace-events   |  11 +-
>  .../org.centos/stream/8/x86_64/test-avocado   |   1 +
>  tests/avocado/netdev-ethtool.py   |   4 -
>  4 files changed, 87 insertions(+), 130 deletions(-)
> 

LGTM
Tested-by: Sriram Yagnaraman 


RE: [PATCH v3 44/47] igb: Notify only new interrupts

2023-04-25 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Monday, 24 April 2023 13:50
> To: Sriram Yagnaraman 
> Cc: Jason Wang ; Dmitry Fleytman
> ; Michael S . Tsirkin ; Alex
> Bennée ; Philippe Mathieu-Daudé
> ; Thomas Huth ; Wainer dos Santos
> Moschetta ; Beraldo Leal ;
> Cleber Rosa ; Laurent Vivier ; Paolo
> Bonzini ; qemu-devel@nongnu.org; Tomasz Dzieciol
> 
> Subject: Re: [PATCH v3 44/47] igb: Notify only new interrupts
> 
> On 2023/04/24 20:41, Sriram Yagnaraman wrote:
> >
> >
> >> -Original Message-
> >> From: Akihiko Odaki 
> >> Sent: Sunday, 23 April 2023 06:19
> >> Cc: Sriram Yagnaraman ; Jason Wang
> >> ; Dmitry Fleytman ;
> >> Michael S . Tsirkin ; Alex Bennée
> >> ; Philippe Mathieu-Daudé ;
> >> Thomas Huth ; Wainer dos Santos Moschetta
> >> ; Beraldo Leal ; Cleber Rosa
> >> ; Laurent Vivier ; Paolo
> >> Bonzini ; qemu-devel@nongnu.org; Tomasz
> Dzieciol
> >> ; Akihiko Odaki
> >> 
> >> Subject: [PATCH v3 44/47] igb: Notify only new interrupts
> >>
> >> This follows the corresponding change for e1000e. This fixes:
> >> tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb
> >>
> >> Signed-off-by: Akihiko Odaki 
> >> ---
> >>   hw/net/igb_core.c | 201 --
> >>   hw/net/trace-events   |  11 +-
> >>   .../org.centos/stream/8/x86_64/test-avocado   |   1 +
> >>   tests/avocado/netdev-ethtool.py   |   4 -
> >>   4 files changed, 87 insertions(+), 130 deletions(-)
> >>
> >
> > This is a good change, makes a clear distinction on whether we are setting
> EICR or ICR or MBVFICR.
> >
> >> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> >> 1519a90aa6..96b7335b31 100644
> >> --- a/hw/net/igb_core.c
> >> +++ b/hw/net/igb_core.c
> >> @@ -94,10 +94,7 @@ static ssize_t
> >>   igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
> >>bool has_vnet, bool *external_tx);
> >>
> >> -static inline void
> >> -igb_set_interrupt_cause(IGBCore *core, uint32_t val);
> >> -
> >> -static void igb_update_interrupt_state(IGBCore *core);
> >> +static void igb_raise_interrupts(IGBCore *core, size_t index,
> >> +uint32_t causes);
> >>   static void igb_reset(IGBCore *core, bool sw);
> >>
> >>   static inline void
> >> @@ -913,8 +910,8 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing
> *txr)
> >>   }
> >>
> >>   if (eic) {
> >> -core->mac[EICR] |= eic;
> >> -igb_set_interrupt_cause(core, E1000_ICR_TXDW);
> >> +igb_raise_interrupts(core, EICR, eic);
> >> +igb_raise_interrupts(core, ICR, E1000_ICR_TXDW);
> >>   }
> >>
> >>   net_tx_pkt_reset(txr->tx->tx_pkt, net_tx_pkt_unmap_frag_pci,
> >> d); @@ -
> >> 1686,6 +1683,7 @@ igb_receive_internal(IGBCore *core, const struct
> >> iovec *iov, int iovcnt,  {
> >>   uint16_t queues = 0;
> >>   uint32_t causes = 0;
> >> +uint32_t ecauses = 0;
> >>   union {
> >>   L2Header l2_header;
> >>   uint8_t octets[ETH_ZLEN];
> >> @@ -1788,13 +1786,14 @@ igb_receive_internal(IGBCore *core, const
> >> struct iovec *iov, int iovcnt,
> >>   causes |= E1000_ICS_RXDMT0;
> >>   }
> >>
> >> -core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx);
> >> +ecauses |= igb_rx_wb_eic(core, rxr.i->idx);
> >>
> >>   trace_e1000e_rx_written_to_guest(rxr.i->idx);
> >>   }
> >>
> >>   trace_e1000e_rx_interrupt_set(causes);
> >> -igb_set_interrupt_cause(core, causes);
> >> +igb_raise_interrupts(core, EICR, ecauses);
> >> +igb_raise_interrupts(core, ICR, causes);
> >>
> >>   return orig_size;
> >>   }
> >> @@ -1854,7 +1853,7 @@ void igb_core_set_link_status(IGBCore *core)
> >>   }
> >>
> >>   if (core->mac[STATUS] != old_status) {
> >> -igb_set_interrupt_cause(core, E1000_ICR_LSC);
> >> +igb_raise_interrupts(core, ICR, E1000_ICR_LSC);
> >>   }
> >>   }
> >>
> >> @@ -1934,13 +1933,6 @@ igb_set_rx_control(IGBCore *core, int index,
> >> uint32_t val)
> >>   }
> &

RE: [PATCH v3 44/47] igb: Notify only new interrupts

2023-04-24 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Sunday, 23 April 2023 06:19
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v3 44/47] igb: Notify only new interrupts
> 
> This follows the corresponding change for e1000e. This fixes:
> tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 201 --
>  hw/net/trace-events   |  11 +-
>  .../org.centos/stream/8/x86_64/test-avocado   |   1 +
>  tests/avocado/netdev-ethtool.py   |   4 -
>  4 files changed, 87 insertions(+), 130 deletions(-)
> 

This is a good change, makes a clear distinction on whether we are setting EICR 
or ICR or MBVFICR.

> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 1519a90aa6..96b7335b31 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -94,10 +94,7 @@ static ssize_t
>  igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
>   bool has_vnet, bool *external_tx);
> 
> -static inline void
> -igb_set_interrupt_cause(IGBCore *core, uint32_t val);
> -
> -static void igb_update_interrupt_state(IGBCore *core);
> +static void igb_raise_interrupts(IGBCore *core, size_t index, uint32_t
> +causes);
>  static void igb_reset(IGBCore *core, bool sw);
> 
>  static inline void
> @@ -913,8 +910,8 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
>  }
> 
>  if (eic) {
> -core->mac[EICR] |= eic;
> -igb_set_interrupt_cause(core, E1000_ICR_TXDW);
> +igb_raise_interrupts(core, EICR, eic);
> +igb_raise_interrupts(core, ICR, E1000_ICR_TXDW);
>  }
> 
>  net_tx_pkt_reset(txr->tx->tx_pkt, net_tx_pkt_unmap_frag_pci, d); @@ -
> 1686,6 +1683,7 @@ igb_receive_internal(IGBCore *core, const struct iovec
> *iov, int iovcnt,  {
>  uint16_t queues = 0;
>  uint32_t causes = 0;
> +uint32_t ecauses = 0;
>  union {
>  L2Header l2_header;
>  uint8_t octets[ETH_ZLEN];
> @@ -1788,13 +1786,14 @@ igb_receive_internal(IGBCore *core, const struct
> iovec *iov, int iovcnt,
>  causes |= E1000_ICS_RXDMT0;
>  }
> 
> -core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx);
> +ecauses |= igb_rx_wb_eic(core, rxr.i->idx);
> 
>  trace_e1000e_rx_written_to_guest(rxr.i->idx);
>  }
> 
>  trace_e1000e_rx_interrupt_set(causes);
> -igb_set_interrupt_cause(core, causes);
> +igb_raise_interrupts(core, EICR, ecauses);
> +igb_raise_interrupts(core, ICR, causes);
> 
>  return orig_size;
>  }
> @@ -1854,7 +1853,7 @@ void igb_core_set_link_status(IGBCore *core)
>  }
> 
>  if (core->mac[STATUS] != old_status) {
> -igb_set_interrupt_cause(core, E1000_ICR_LSC);
> +igb_raise_interrupts(core, ICR, E1000_ICR_LSC);
>  }
>  }
> 
> @@ -1934,13 +1933,6 @@ igb_set_rx_control(IGBCore *core, int index,
> uint32_t val)
>  }
>  }
> 
> -static inline void
> -igb_clear_ims_bits(IGBCore *core, uint32_t bits) -{
> -trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] &
> ~bits);
> -core->mac[IMS] &= ~bits;
> -}
> -
>  static inline bool
>  igb_postpone_interrupt(IGBIntrDelayTimer *timer)  { @@ -1963,9 +1955,8
> @@ igb_eitr_should_postpone(IGBCore *core, int idx)
>  return igb_postpone_interrupt(>eitr[idx]);
>  }
> 
> -static void igb_send_msix(IGBCore *core)
> +static void igb_send_msix(IGBCore *core, uint32_t causes)
>  {
> -uint32_t causes = core->mac[EICR] & core->mac[EIMS];
>  int vector;
> 
>  for (vector = 0; vector < IGB_INTR_NUM; ++vector) { @@ -1988,124
> +1979,116 @@ igb_fix_icr_asserted(IGBCore *core)
>  trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]);
>  }
> 
> -static void
> -igb_update_interrupt_state(IGBCore *core)
> +static void igb_raise_interrupts(IGBCore *core, size_t index, uint32_t
> +causes)
>  {
> -uint32_t icr;
> -uint32_t causes;
> +uint32_t old_causes = core->mac[ICR] & core->mac[IMS];
> +uint32_t old_ecauses = core->mac[EICR] & core->mac[EIMS];
> +uint32_t raised_causes;
> +uint32_t raised_ecauses;
>  uint32_t int_alloc;
> 
> -icr = core->mac[ICR] & cor

RE: [PATCH v3 09/47] igb: Always copy ethernet header

2023-04-24 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Sunday, 23 April 2023 06:18
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v3 09/47] igb: Always copy ethernet header
> 
> igb_receive_internal() used to check the iov length to determine copy the iovs
> to a contiguous buffer, but the check is flawed in two
> ways:
> - It does not ensure that iovcnt > 0.
> - It does not take virtio-net header into consideration.
> 
> The size of this copy is just 22 octets, which can be even less than the code 
> size
> required for checks. This (wrong) optimization is probably not worth so just
> remove it. Removing this also allows igb to assume aligned accesses for the
> ethernet header.
> 
> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 39 +--
>  1 file changed, 21 insertions(+), 18 deletions(-)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 21a8d9ada4..1d7f913e5a 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -67,6 +67,11 @@ typedef struct IGBTxPktVmdqCallbackContext {
>  NetClientState *nc;
>  } IGBTxPktVmdqCallbackContext;
> 
> +typedef struct L2Header {
> +struct eth_header eth;
> +struct vlan_header vlan;
> +} L2Header;
> +
>  static ssize_t
>  igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
>   bool has_vnet, bool *external_tx); @@ -961,15 +966,16 @@
> igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
>  return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);  }
> 
> -static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header
> *ehdr,
> +static uint16_t igb_receive_assign(IGBCore *core, const L2Header
> +*l2_header,
> size_t size, E1000E_RSSInfo *rss_info,
> bool *external_tx)  {
>  static const int ta_shift[] = { 4, 3, 2, 0 };
> +const struct eth_header *ehdr = _header->eth;
>  uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
>  uint16_t queues = 0;
>  uint16_t oversized = 0;
> -uint16_t vid = lduw_be_p(_GET_VLAN_HDR(ehdr)->h_tci) &
> VLAN_VID_MASK;
> +uint16_t vid = be16_to_cpu(l2_header->vlan.h_tci) & VLAN_VID_MASK;
>  bool accepted = false;
>  int i;
> 
> @@ -1590,14 +1596,13 @@ static ssize_t
>  igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
>   bool has_vnet, bool *external_tx)  {
> -static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4);
> -
>  uint16_t queues = 0;
>  uint32_t n = 0;
> -uint8_t min_buf[ETH_ZLEN];
> +union {
> +L2Header l2_header;
> +uint8_t octets[ETH_ZLEN];
> +} min_buf;

I would call this^ buf/filter_buf instead of min_buf. But it is upto you to 
decide.
In any case, 
Reviewed-by: Sriram Yagnaraman 

>  struct iovec min_iov;
> -struct eth_header *ehdr;
> -uint8_t *filter_buf;
>  size_t size, orig_size;
>  size_t iov_ofs = 0;
>  E1000E_RxRing rxr;
> @@ -1623,24 +1628,21 @@ igb_receive_internal(IGBCore *core, const struct
> iovec *iov, int iovcnt,
>  net_rx_pkt_unset_vhdr(core->rx_pkt);
>  }
> 
> -filter_buf = iov->iov_base + iov_ofs;
>  orig_size = iov_size(iov, iovcnt);
>  size = orig_size - iov_ofs;
> 
>  /* Pad to minimum Ethernet frame length */
>  if (size < sizeof(min_buf)) {
> -iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size);
> -memset(_buf[size], 0, sizeof(min_buf) - size);
> +iov_to_buf(iov, iovcnt, iov_ofs, _buf, size);
> +memset(_buf.octets[size], 0, sizeof(min_buf) - size);
>  e1000x_inc_reg_if_not_full(core->mac, RUC);
> -min_iov.iov_base = filter_buf = min_buf;
> +min_iov.iov_base = _buf;
>  min_iov.iov_len = size = sizeof(min_buf);
>  iovcnt = 1;
>  iov = _iov;
>  iov_ofs = 0;
> -} else if (iov->iov_len < maximum_ethernet_hdr_len) {
> -/* This is very unlikely, but may happen. */
> -iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len);
> -filter_buf = min_buf;
> +} else {
> +iov_to_buf(iov, iovcnt, iov_ofs, _buf,
> + sizeof(min_buf.l2_header));
>  }
> 
&g

RE: [PATCH v3 06/47] igb: Clear IMS bits when committing ICR access

2023-04-24 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Monday, 24 April 2023 12:52
> To: Sriram Yagnaraman 
> Cc: Jason Wang ; Dmitry Fleytman
> ; Michael S . Tsirkin ; Alex
> Bennée ; Philippe Mathieu-Daudé
> ; Thomas Huth ; Wainer dos Santos
> Moschetta ; Beraldo Leal ;
> Cleber Rosa ; Laurent Vivier ; Paolo
> Bonzini ; qemu-devel@nongnu.org; Tomasz Dzieciol
> 
> Subject: Re: [PATCH v3 06/47] igb: Clear IMS bits when committing ICR access
> 
> On 2023/04/24 18:29, Sriram Yagnaraman wrote:
> >> -Original Message-
> >> From: Akihiko Odaki 
> >> Sent: Sunday, 23 April 2023 06:18
> >> Cc: Sriram Yagnaraman ; Jason Wang
> >> ; Dmitry Fleytman ;
> >> Michael S . Tsirkin ; Alex Bennée
> >> ; Philippe Mathieu-Daudé ;
> >> Thomas Huth ; Wainer dos Santos Moschetta
> >> ; Beraldo Leal ; Cleber Rosa
> >> ; Laurent Vivier ; Paolo
> >> Bonzini ; qemu-devel@nongnu.org; Tomasz
> Dzieciol
> >> ; Akihiko Odaki
> >> 
> >> Subject: [PATCH v3 06/47] igb: Clear IMS bits when committing ICR
> >> access
> >>
> >> The datasheet says contradicting statements regarding ICR accesses so
> >> it is not reliable to determine the behavior of ICR accesses.
> >> However, e1000e does clear IMS bits when reading ICR accesses and
> >> Linux also expects ICR accesses will clear IMS bits according to:
> >> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tr
> >> ee/drivers/
> >> net/ethernet/intel/igb/igb_main.c?h=v6.2#n8048
> >>
> >> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> >> Signed-off-by: Akihiko Odaki 
> >> ---
> >>   hw/net/igb_core.c | 8 
> >>   1 file changed, 4 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> >> 96a118b6c1..eaca5bd2b6 100644
> >> --- a/hw/net/igb_core.c
> >> +++ b/hw/net/igb_core.c
> >> @@ -2452,16 +2452,16 @@ igb_set_ims(IGBCore *core, int index,
> >> uint32_t
> >> val)  static void igb_commit_icr(IGBCore *core)  {
> >>   /*
> >> - * If GPIE.NSICR = 0, then the copy of IAM to IMS will occur only if 
> >> at
> >> + * If GPIE.NSICR = 0, then the clear of IMS will occur only if
> >> + at
> >>* least one bit is set in the IMS and there is a true interrupt as
> >>* reflected in ICR.INTA.
> >>*/
> >>   if ((core->mac[GPIE] & E1000_GPIE_NSICR) ||
> >>   (core->mac[IMS] && (core->mac[ICR] & E1000_ICR_INT_ASSERTED))) {
> >> -igb_set_ims(core, IMS, core->mac[IAM]);
> >> -} else {
> >> -igb_update_interrupt_state(core);
> >> +igb_clear_ims_bits(core, core->mac[IAM]);
> >>   }
> >> +
> >> +igb_update_interrupt_state(core);
> >>   }
> >>
> >>   static void igb_set_icr(IGBCore *core, int index, uint32_t val)
> >> --
> >> 2.40.0
> >
> > Reviewed-by: Sriram Yagnaraman 
> >
> > An additional question, should ICR be cleared if an actual interrupt was
> asserted?
> > (According to 7.3.2.11 GPIE: Non Selective Interrupt clear on read:
> > When set, every read of ICR clears it. When this bit is cleared, an ICR read
> causes it to be cleared only if an actual interrupt was asserted or IMS = 0b.)
> Something like this?
> 
> That is handled in igb_commit_icr(), which is renamed to igb_nsicr() in patch
> "igb: Notify only new interrupts".
> 

Mm, I must be missing something, but I still don't see the ICR bits being 
cleared igb_commit_icr/igb_nsicr(). 
For e.g. e1000e_mac_icr_read does this explicitly:
if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
trace_e1000e_irq_icr_clear_iame();
core->mac[ICR] = 0;
trace_e1000e_irq_icr_process_iame();
e1000e_clear_ims_bits(core, core->mac[IAM]);
}


> >
> > diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> > eaca5bd2b6..aaaf80e4a3 100644
> > --- a/hw/net/igb_core.c
> > +++ b/hw/net/igb_core.c
> > @@ -2529,6 +2529,9 @@ igb_mac_icr_read(IGBCore *core, int index)
> >   } else if (core->mac[IMS] == 0) {
> >   trace_e1000e_irq_icr_clear_zero_ims();
> >   core->mac[ICR] = 0;
> > +} else if (core->mac[ICR] & E1000_ICR_INT_ASSERTED) {
> > +e1000e_irq_icr_clear_iame();
> > +core->mac[ICR] = 0;
> >   } else if (!msix_enabled(core->owner)) {
> >   trace_e1000e_irq_icr_clear_nonmsix_icr_read();
> >   core->mac[ICR] = 0;


RE: [PATCH v3 40/47] igb: Implement igb-specific oversize check

2023-04-24 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Sunday, 23 April 2023 06:18
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v3 40/47] igb: Implement igb-specific oversize check
> 
> igb has a configurable size limit for LPE, and uses different limits 
> depending on
> whether the packet is treated as a VLAN packet.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 36 +---
>  1 file changed, 21 insertions(+), 15 deletions(-)
> 
Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v3 06/47] igb: Clear IMS bits when committing ICR access

2023-04-24 Thread Sriram Yagnaraman
> -Original Message-
> From: Akihiko Odaki 
> Sent: Sunday, 23 April 2023 06:18
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v3 06/47] igb: Clear IMS bits when committing ICR access
> 
> The datasheet says contradicting statements regarding ICR accesses so it is 
> not
> reliable to determine the behavior of ICR accesses. However, e1000e does
> clear IMS bits when reading ICR accesses and Linux also expects ICR accesses
> will clear IMS bits according to:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/
> net/ethernet/intel/igb/igb_main.c?h=v6.2#n8048
> 
> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 96a118b6c1..eaca5bd2b6 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -2452,16 +2452,16 @@ igb_set_ims(IGBCore *core, int index, uint32_t
> val)  static void igb_commit_icr(IGBCore *core)  {
>  /*
> - * If GPIE.NSICR = 0, then the copy of IAM to IMS will occur only if at
> + * If GPIE.NSICR = 0, then the clear of IMS will occur only if at
>   * least one bit is set in the IMS and there is a true interrupt as
>   * reflected in ICR.INTA.
>   */
>  if ((core->mac[GPIE] & E1000_GPIE_NSICR) ||
>  (core->mac[IMS] && (core->mac[ICR] & E1000_ICR_INT_ASSERTED))) {
> -igb_set_ims(core, IMS, core->mac[IAM]);
> -} else {
> -igb_update_interrupt_state(core);
> +igb_clear_ims_bits(core, core->mac[IAM]);
>  }
> +
> +igb_update_interrupt_state(core);
>  }
> 
>  static void igb_set_icr(IGBCore *core, int index, uint32_t val)
> --
> 2.40.0

Reviewed-by: Sriram Yagnaraman 

An additional question, should ICR be cleared if an actual interrupt was 
asserted?
(According to 7.3.2.11 GPIE: Non Selective Interrupt clear on read: When set, 
every read of ICR clears it. When this bit is cleared, an ICR read causes it to 
be cleared only if an actual interrupt was asserted or IMS = 0b.)
Something like this?

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index eaca5bd2b6..aaaf80e4a3 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -2529,6 +2529,9 @@ igb_mac_icr_read(IGBCore *core, int index)
 } else if (core->mac[IMS] == 0) {
 trace_e1000e_irq_icr_clear_zero_ims();
 core->mac[ICR] = 0;
+} else if (core->mac[ICR] & E1000_ICR_INT_ASSERTED) {
+e1000e_irq_icr_clear_iame();
+core->mac[ICR] = 0;
 } else if (!msix_enabled(core->owner)) {
 trace_e1000e_irq_icr_clear_nonmsix_icr_read();
 core->mac[ICR] = 0;


RE: [PATCH v3 37/47] igb: Implement Tx SCTP CSO

2023-04-24 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Sunday, 23 April 2023 06:18
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v3 37/47] igb: Implement Tx SCTP CSO
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/net_tx_pkt.h |  8 
>  hw/net/igb_core.c   | 12 +++-
>  hw/net/net_tx_pkt.c | 18 ++
>  3 files changed, 33 insertions(+), 5 deletions(-)
> 
Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v3 29/47] igb: Rename a variable in igb_receive_internal()

2023-04-24 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Sunday, 23 April 2023 06:18
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v3 29/47] igb: Rename a variable in igb_receive_internal()
> 
> Rename variable "n" to "causes", which properly represents the content of the
> variable.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 12 ++++++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v3 36/47] igb: Implement Rx SCTP CSO

2023-04-24 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Sunday, 23 April 2023 06:18
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v3 36/47] igb: Implement Rx SCTP CSO
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_regs.h |  1 +
>  include/net/eth.h |  4 ++-
>  include/qemu/crc32c.h |  1 +
>  hw/net/e1000e_core.c  |  5 
>  hw/net/igb_core.c | 15 +-
>  hw/net/net_rx_pkt.c   | 64 +++
>  net/eth.c |  4 +++
>  util/crc32c.c |  8 ++
>  8 files changed, 89 insertions(+), 13 deletions(-)
> 

LGTM.
Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v2 03/41] e1000x: Fix BPRC and MPRC

2023-04-20 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:46
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 03/41] e1000x: Fix BPRC and MPRC
> 
> Before this change, e1000 and the common code updated BPRC and MPRC
> depending on the matched filter, but e1000e and igb decided to update those
> counters by deriving the packet type independently. This inconsistency caused
> a multicast packet to be counted twice.
> 
> Updating BPRC and MPRC depending on are fundamentally flawed anyway as a
> filter can be used for different types of packets. For example, it is 
> possible to
> filter broadcast packets with MTA.
> 
> Always determine what counters to update by inspecting the packets.
> 
> Fixes: 3b27430177 ("e1000: Implementing various counters")
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/e1000x_common.h |  5 +++--
>  hw/net/e1000.c |  6 +++---
>  hw/net/e1000e_core.c   | 20 +++-
>  hw/net/e1000x_common.c | 25 +++--
>  hw/net/igb_core.c  | 22 +-
>  5 files changed, 33 insertions(+), 45 deletions(-)
> 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v2 28/41] hw/net/net_rx_pkt: Enforce alignment for eth_header

2023-04-20 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:47
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 28/41] hw/net/net_rx_pkt: Enforce alignment for
> eth_header
> 
> eth_strip_vlan and eth_strip_vlan_ex refers to ehdr_buf as struct eth_header.
> Enforce alignment for the structure.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/net_rx_pkt.c | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c index
> 6125a063d7..1de42b4f51 100644
> --- a/hw/net/net_rx_pkt.c
> +++ b/hw/net/net_rx_pkt.c
> @@ -23,7 +23,10 @@
> 
>  struct NetRxPkt {
>  struct virtio_net_hdr virt_hdr;
> -uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)];
> +struct {
> +struct eth_header eth;
> +struct vlan_header vlan;
> +} ehdr_buf;
>  struct iovec *vec;
>  uint16_t vec_len_total;
>  uint16_t vec_len;
> @@ -89,7 +92,7 @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
>  if (pkt->ehdr_buf_len) {
>  net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
> 
> -pkt->vec[0].iov_base = pkt->ehdr_buf;
> +pkt->vec[0].iov_base = >ehdr_buf;
>  pkt->vec[0].iov_len = pkt->ehdr_buf_len;
> 
>  pkt->tot_len = pllen + pkt->ehdr_buf_len; @@ -120,7 +123,7 @@ void
> net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
>  assert(pkt);
> 
>  if (strip_vlan) {
> -pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, 
> pkt->ehdr_buf,
> +pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff,
> + >ehdr_buf,
> , );
>  } else {
>  pkt->ehdr_buf_len = 0;
> @@ -142,7 +145,7 @@ void net_rx_pkt_attach_iovec_ex(struct NetRxPkt
> *pkt,
> 
>  if (strip_vlan) {
>  pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
> -  pkt->ehdr_buf,
> +  >ehdr_buf,
>, );
>  } else {
>  pkt->ehdr_buf_len = 0;
> --
> 2.40.0

LGTM


RE: [PATCH v2 27/41] net/eth: Always add VLAN tag

2023-04-20 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:47
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 27/41] net/eth: Always add VLAN tag
> 
> It is possible to have another VLAN tag even if the packet is already tagged.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  include/net/eth.h   |  4 ++--
>  hw/net/net_tx_pkt.c | 16 +++-
>  net/eth.c   | 22 ++
>  3 files changed, 15 insertions(+), 27 deletions(-)
> 
> diff --git a/include/net/eth.h b/include/net/eth.h index
> 95ff24d6b8..048e434685 100644
> --- a/include/net/eth.h
> +++ b/include/net/eth.h
> @@ -353,8 +353,8 @@ eth_strip_vlan_ex(const struct iovec *iov, int iovcnt,
> size_t iovoff,  uint16_t  eth_get_l3_proto(const struct iovec *l2hdr_iov, int
> iovcnt, size_t l2hdr_len);
> 
> -void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
> -uint16_t vlan_ethtype, bool *is_new);
> +void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size,
> +uint16_t vlan_tag, uint16_t vlan_ethtype);
> 
> 
>  uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t 
> l4proto);
> diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c index
> ce6b102391..af8f77a3f0 100644
> --- a/hw/net/net_tx_pkt.c
> +++ b/hw/net/net_tx_pkt.c
> @@ -40,7 +40,10 @@ struct NetTxPkt {
> 
>  struct iovec *vec;
> 
> -uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
> +struct {
> +struct eth_header eth;
> +struct vlan_header vlan[3];
> +} l2_hdr;
>  union {
>  struct ip_header ip;
>  struct ip6_header ip6;
> @@ -365,18 +368,13 @@ bool net_tx_pkt_build_vheader(struct NetTxPkt
> *pkt, bool tso_enable,  void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt
> *pkt,
>  uint16_t vlan, uint16_t vlan_ethtype)  {
> -bool is_new;
>  assert(pkt);
> 
>  eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
> -vlan, vlan_ethtype, _new);
> +   >vec[NET_TX_PKT_L2HDR_FRAG].iov_len,
> +   vlan, vlan_ethtype);
> 
> -/* update l2hdrlen */
> -if (is_new) {
> -pkt->hdr_len += sizeof(struct vlan_header);
> -pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len +=
> -sizeof(struct vlan_header);
> -}
> +pkt->hdr_len += sizeof(struct vlan_header);
>  }
> 
>  bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t
> len) diff --git a/net/eth.c b/net/eth.c index f7ffbda600..5307978486 100644
> --- a/net/eth.c
> +++ b/net/eth.c
> @@ -21,26 +21,16 @@
>  #include "net/checksum.h"
>  #include "net/tap.h"
> 
> -void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
> -uint16_t vlan_ethtype, bool *is_new)
> +void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size,
> +uint16_t vlan_tag, uint16_t vlan_ethtype)
>  {
>  struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
> 
> -switch (be16_to_cpu(ehdr->h_proto)) {
> -case ETH_P_VLAN:
> -case ETH_P_DVLAN:
> -/* vlan hdr exists */
> -*is_new = false;
> -break;
> -
> -default:
> -/* No VLAN header, put a new one */
> -vhdr->h_proto = ehdr->h_proto;
> -ehdr->h_proto = cpu_to_be16(vlan_ethtype);
> -*is_new = true;
> -break;
> -}
> +memmove(vhdr + 1, vhdr, *ehdr_size - ETH_HLEN);

Do we need a check that we are not overflowing the vlan array size?

>  vhdr->h_tci = cpu_to_be16(vlan_tag);
> +vhdr->h_proto = ehdr->h_proto;
> +ehdr->h_proto = cpu_to_be16(vlan_ethtype);
> +*ehdr_size += sizeof(*vhdr);
>  }
> 
>  uint8_t
> --
> 2.40.0



RE: [PATCH v2 32/41] igb: Implement Rx SCTP CSO

2023-04-20 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:47
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 32/41] igb: Implement Rx SCTP CSO
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_regs.h |  1 +
>  include/net/eth.h |  4 ++-
>  include/qemu/crc32c.h |  1 +
>  hw/net/e1000e_core.c  |  5 
>  hw/net/igb_core.c | 15 +-
>  hw/net/net_rx_pkt.c   | 64 +++
>  net/eth.c |  4 +++
>  util/crc32c.c |  8 ++
>  8 files changed, 89 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index
> e6ac26dc0e..4b4ebd3369 100644
> --- a/hw/net/igb_regs.h
> +++ b/hw/net/igb_regs.h
> @@ -670,6 +670,7 @@ union e1000_adv_rx_desc {  #define
> E1000_ADVRXD_PKT_IP6 BIT(6)  #define E1000_ADVRXD_PKT_TCP BIT(8)
> #define E1000_ADVRXD_PKT_UDP BIT(9)
> +#define E1000_ADVRXD_PKT_SCTP BIT(10)
> 
>  static inline uint8_t igb_ivar_entry_rx(uint8_t i)  { diff --git 
> a/include/net/eth.h
> b/include/net/eth.h index 048e434685..75e7f1551c 100644
> --- a/include/net/eth.h
> +++ b/include/net/eth.h
> @@ -224,6 +224,7 @@ struct tcp_hdr {
>  #define IP_HEADER_VERSION_6   (6)
>  #define IP_PROTO_TCP  (6)
>  #define IP_PROTO_UDP  (17)
> +#define IP_PROTO_SCTP (132)
>  #define IPTOS_ECN_MASK0x03
>  #define IPTOS_ECN(x)  ((x) & IPTOS_ECN_MASK)
>  #define IPTOS_ECN_CE  0x03
> @@ -379,7 +380,8 @@ typedef struct eth_ip4_hdr_info_st {  typedef enum
> EthL4HdrProto {
>  ETH_L4_HDR_PROTO_INVALID,
>  ETH_L4_HDR_PROTO_TCP,
> -ETH_L4_HDR_PROTO_UDP
> +ETH_L4_HDR_PROTO_UDP,
> +ETH_L4_HDR_PROTO_SCTP
>  } EthL4HdrProto;
> 
>  typedef struct eth_l4_hdr_info_st {
> diff --git a/include/qemu/crc32c.h b/include/qemu/crc32c.h index
> 5b78884c38..88b4d2b3b3 100644
> --- a/include/qemu/crc32c.h
> +++ b/include/qemu/crc32c.h
> @@ -30,5 +30,6 @@
> 
> 
>  uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length);
> +uint32_t iov_crc32c(uint32_t crc, const struct iovec *iov, size_t
> +iov_cnt);
> 
>  #endif
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index
> 27124bba07..8b35735799 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -1114,6 +1114,11 @@ e1000e_verify_csum_in_sw(E1000ECore *core,
>  return;
>  }
> 
> +if (l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
> +l4hdr_proto != ETH_L4_HDR_PROTO_UDP) {
> +return;
> +}
> +
>  if (!net_rx_pkt_validate_l4_csum(pkt, _valid)) {
>  trace_e1000e_rx_metadata_l4_csum_validation_failed();
>  return;
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 4dc8e3ae7b..b7f7e765a5 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -1212,7 +1212,7 @@ igb_build_rx_metadata(IGBCore *core,
>uint16_t *vlan_tag)  {
>  struct virtio_net_hdr *vhdr;
> -bool hasip4, hasip6;
> +bool hasip4, hasip6, csum_valid;
>  EthL4HdrProto l4hdr_proto;
> 
>  *status_flags = E1000_RXD_STAT_DD;
> @@ -1272,6 +1272,10 @@ igb_build_rx_metadata(IGBCore *core,
>  *pkt_info |= E1000_ADVRXD_PKT_UDP;
>  break;
> 
> +case ETH_L4_HDR_PROTO_SCTP:
> +*pkt_info |= E1000_ADVRXD_PKT_SCTP;
> +break;
> +
>  default:
>  break;
>  }
> @@ -1304,6 +1308,15 @@ igb_build_rx_metadata(IGBCore *core,
> 
>  if (igb_rx_l4_cso_enabled(core)) {
>  switch (l4hdr_proto) {
> +case ETH_L4_HDR_PROTO_SCTP:
> +if (!net_rx_pkt_validate_l4_csum(pkt, _valid)) {

Forgive my naive question, doesn't tap device validate SCTP checksum? Is it 
something we can improve? I can help with adding in linux tap device if you 
think that would make this code simpler, just like UDP/TCP.

> +trace_e1000e_rx_metadata_l4_csum_validation_failed();
> +goto func_exit;
> +}
> +if (!csum_valid) {
> +*status_flags |= E1000_RXDEXT_STATERR_TCPE;
> +}
> +/* fall through */
>  case ETH_L4_HDR_PROTO_TCP:
>  *status_flags |= E1000_RXD_STAT_TCPCS;
>  break;
> diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c index

RE: [PATCH v2 04/41] igb: Fix Rx packet type encoding

2023-04-20 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:46
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 04/41] igb: Fix Rx packet type encoding
> 
> igb's advanced descriptor uses a packet type encoding different from one used
> in e1000e's extended descriptor. Fix the logic to encode Rx packet type
> accordingly.
> 
> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_regs.h |  5 +
>  hw/net/igb_core.c | 38 +++-------
>  2 files changed, 24 insertions(+), 19 deletions(-)
> 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v2 38/41] igb: Implement Tx timestamp

2023-04-20 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:47
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 38/41] igb: Implement Tx timestamp
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_regs.h | 3 +++
>  hw/net/igb_core.c | 7 +++
>  2 files changed, 10 insertions(+)
> 

Reviewed-by: Sriram Yagnaraman 



RE: [PATCH v2 23/41] igb: Add more definitions for Tx descriptor

2023-04-20 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:47
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 23/41] igb: Add more definitions for Tx descriptor
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_regs.h | 32 +++-
> hw/net/igb_core.c |  4 ++--
>  2 files changed, 29 insertions(+), 7 deletions(-)
> 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v2 16/41] e1000x: Rename TcpIpv6 into TcpIpv6Ex

2023-04-20 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:47
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 16/41] e1000x: Rename TcpIpv6 into TcpIpv6Ex
> 
> e1000e and igb employs NetPktRssIpV6TcpEx for RSS hash if TcpIpv6 MRQC bit
> is set. Moreover, igb also has a MRQC bit for NetPktRssIpV6Tcp though it is 
> not
> implemented yet. Rename it to TcpIpv6Ex to avoid confusion.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/e1000x_regs.h | 24   hw/net/e1000e_core.c |
> 8 
>  hw/net/igb_core.c|  8 
>  hw/net/trace-events  |  2 +-
>  4 files changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index
> 6d3c4c6d3a..13760c66d3 100644
> --- a/hw/net/e1000x_regs.h
> +++ b/hw/net/e1000x_regs.h
> @@ -290,18 +290,18 @@
>  #define E1000_RETA_IDX(hash)((hash) & (BIT(7) - 1))
>  #define E1000_RETA_VAL(reta, hash)  (((uint8_t
> *)(reta))[E1000_RETA_IDX(hash)])
> 
> -#define E1000_MRQC_EN_TCPIPV4(mrqc) ((mrqc) & BIT(16))
> -#define E1000_MRQC_EN_IPV4(mrqc)((mrqc) & BIT(17))
> -#define E1000_MRQC_EN_TCPIPV6(mrqc) ((mrqc) & BIT(18)) -#define
> E1000_MRQC_EN_IPV6EX(mrqc)  ((mrqc) & BIT(19))
> -#define E1000_MRQC_EN_IPV6(mrqc)((mrqc) & BIT(20))
> -
> -#define E1000_MRQ_RSS_TYPE_NONE (0)
> -#define E1000_MRQ_RSS_TYPE_IPV4TCP  (1)
> -#define E1000_MRQ_RSS_TYPE_IPV4 (2)
> -#define E1000_MRQ_RSS_TYPE_IPV6TCP  (3)
> -#define E1000_MRQ_RSS_TYPE_IPV6EX   (4)
> -#define E1000_MRQ_RSS_TYPE_IPV6 (5)
> +#define E1000_MRQC_EN_TCPIPV4(mrqc)   ((mrqc) & BIT(16))
> +#define E1000_MRQC_EN_IPV4(mrqc)  ((mrqc) & BIT(17))
> +#define E1000_MRQC_EN_TCPIPV6EX(mrqc) ((mrqc) & BIT(18))
> +#define E1000_MRQC_EN_IPV6EX(mrqc)((mrqc) & BIT(19))
> +#define E1000_MRQC_EN_IPV6(mrqc)  ((mrqc) & BIT(20))
> +
> +#define E1000_MRQ_RSS_TYPE_NONE   (0)
> +#define E1000_MRQ_RSS_TYPE_IPV4TCP(1)
> +#define E1000_MRQ_RSS_TYPE_IPV4   (2)
> +#define E1000_MRQ_RSS_TYPE_IPV6TCPEX  (3)
> +#define E1000_MRQ_RSS_TYPE_IPV6EX (4)
> +#define E1000_MRQ_RSS_TYPE_IPV6   (5)
> 
>  #define E1000_ICR_ASSERTED BIT(31)
>  #define E1000_EIAC_MASK0x01F0
> diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index
> 743b36ddfb..481db41931 100644
> --- a/hw/net/e1000e_core.c
> +++ b/hw/net/e1000e_core.c
> @@ -537,7 +537,7 @@ e1000e_rss_get_hash_type(E1000ECore *core, struct
> NetRxPkt *pkt)
>  ip6info->rss_ex_dst_valid,
>  ip6info->rss_ex_src_valid,
>  core->mac[MRQC],
> -E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]),
> +
> + E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC]),

Some formatting gone wrong here?  Otherwise,
Reviewed-by: Sriram Yagnaraman 

>  E1000_MRQC_EN_IPV6EX(core->mac[MRQC]),
>  E1000_MRQC_EN_IPV6(core->mac[MRQC]));
> 
> @@ -546,8 +546,8 @@ e1000e_rss_get_hash_type(E1000ECore *core, struct
> NetRxPkt *pkt)
>ip6info->rss_ex_src_valid))) {
> 
>  if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP &&
> -E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) {
> -return E1000_MRQ_RSS_TYPE_IPV6TCP;
> +E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC])) {
> +return E1000_MRQ_RSS_TYPE_IPV6TCPEX;
>  }
> 
>  if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) { @@ -581,7 +581,7
> @@ e1000e_rss_calc_hash(E1000ECore *core,
>  case E1000_MRQ_RSS_TYPE_IPV4TCP:
>  type = NetPktRssIpV4Tcp;
>  break;
> -case E1000_MRQ_RSS_TYPE_IPV6TCP:
> +case E1000_MRQ_RSS_TYPE_IPV6TCPEX:
>  type = NetPktRssIpV6TcpEx;
>  break;
>  case E1000_MRQ_RSS_TYPE_IPV6:
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 4b9131e566..0182880adf 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -301,7 +301,7 @@ igb_rss_get_hash_type(IGBCore *core, struct NetRxPkt
> *pkt)
>  ip6info->rss_ex_dst_valid,
>  ip6info->rss_ex_src_valid,
>  core->mac[MRQC],
> -E1000_MRQC_EN_TCPIPV

RE: [PATCH v2 31/41] igb: Use UDP for RSS hash

2023-04-20 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:47
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 31/41] igb: Use UDP for RSS hash
> 
> e1000e does not support using UDP for RSS hash, but igb does.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_regs.h |  3 +++
>  hw/net/igb_core.c | 16 ++++
>  2 files changed, 19 insertions(+)
> 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH v2 29/41] tests/qtest/libqos/igb: Set GPIE.Multiple_MSIX

2023-04-20 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Thursday, 20 April 2023 07:47
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S . Tsirkin ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Tomasz Dzieciol
> ; Akihiko Odaki
> 
> Subject: [PATCH v2 29/41] tests/qtest/libqos/igb: Set GPIE.Multiple_MSIX
> 
> GPIE.Multiple_MSIX is not set by default, and needs to be set to get 
> interrupts
> from multiple MSI-X vectors.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  tests/qtest/libqos/igb.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index
> 12fb531bf0..a603468beb 100644
> --- a/tests/qtest/libqos/igb.c
> +++ b/tests/qtest/libqos/igb.c
> @@ -114,6 +114,7 @@ static void igb_pci_start_hw(QOSGraphObject *obj)
>  e1000e_macreg_write(>e1000e, E1000_RCTL, E1000_RCTL_EN);
> 
>  /* Enable all interrupts */
> +e1000e_macreg_write(>e1000e, E1000_GPIE,
> E1000_GPIE_MSIX_MODE);
>  e1000e_macreg_write(>e1000e, E1000_IMS,  0xFFFF);
>  e1000e_macreg_write(>e1000e, E1000_EIMS, 0x);
> 
> --
> 2.40.0

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH 30/40] igb: Implement igb-specific oversize check

2023-04-16 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 30/40] igb: Implement igb-specific oversize check
> 
> igb has a configurable size limit for LPE, and uses different limits 
> depending on
> whether the packet is treated as a VLAN packet.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 41 +++--
>  1 file changed, 27 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 2013a9a53d..569897fb99 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -954,16 +954,21 @@ igb_rx_l4_cso_enabled(IGBCore *core)
>  return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);  }
> 
> -static bool

The convention in seems to be to declare return value in first line and then 
the function name in the next line. 

> -igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
> +static bool igb_rx_is_oversized(IGBCore *core, const struct eth_header *ehdr,
> +size_t size, bool lpe, uint16_t rlpml)
>  {
> -uint16_t pool = qn % IGB_NUM_VM_POOLS;
> -bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
> -int max_ethernet_lpe_size =
> -core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
> -int max_ethernet_vlan_size = 1522;
> +size += 4;

Is the above 4 CRC bytes?

> +
> +if (lpe) {
> +return size > rlpml;
> +}
> +
> +if (e1000x_is_vlan_packet(ehdr, core->mac[VET] & 0x) &&
> +e1000x_vlan_rx_filter_enabled(core->mac)) {
> +return size > 1522;
> +}

Should a check for 1526 bytes if extended VLAN is present be added?
Maybe in "igb: Strip the second VLAN tag for extended VLAN"?

> 
> -return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
> +return size > 1518;
>  }
> 
>  static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
> @@ -976,6 +981,8 @@ static uint16_t igb_receive_assign(IGBCore *core,
> const L2Header *l2_header,
>  uint16_t queues = 0;
>  uint16_t oversized = 0;
>  uint16_t vid = be16_to_cpu(l2_header->vlan[0].h_tci) & VLAN_VID_MASK;
> +bool lpe;
> +uint16_t rlpml;
>  int i;
> 
>  memset(rss_info, 0, sizeof(E1000E_RSSInfo)); @@ -984,6 +991,14 @@
> static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
>  *external_tx = true;
>  }
> 
> +lpe = !!(core->mac[RCTL] & E1000_RCTL_LPE);
> +rlpml = core->mac[RLPML];
> +if (!(core->mac[RCTL] & E1000_RCTL_SBP) &&
> +igb_rx_is_oversized(core, ehdr, size, lpe, rlpml)) {
> +trace_e1000x_rx_oversized(size);
> +return queues;
> +}
> +
>  if (e1000x_is_vlan_packet(ehdr, core->mac[VET] & 0x) &&
>  !e1000x_rx_vlan_filter(core->mac, PKT_GET_VLAN_HDR(ehdr))) {
>  return queues;
> @@ -1067,7 +1082,10 @@ static uint16_t igb_receive_assign(IGBCore *core,
> const L2Header *l2_header,
>  queues &= core->mac[VFRE];
>  if (queues) {
>  for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
> -if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) 
> {
> +lpe = !!(core->mac[VMOLR0 + i] & E1000_VMOLR_LPE);
> +rlpml = core->mac[VMOLR0 + i] & E1000_VMOLR_RLPML_MASK;
> +if ((queues & BIT(i)) &&
> +igb_rx_is_oversized(core, ehdr, size, lpe, rlpml))
> + {
>  oversized |= BIT(i);
>  }
>  }
> @@ -1609,11 +1627,6 @@ igb_receive_internal(IGBCore *core, const struct
> iovec *iov, int iovcnt,
>  iov_to_buf(iov, iovcnt, iov_ofs, _buf, 
> sizeof(min_buf.l2_header));
>  }
> 
> -/* Discard oversized packets if !LPE and !SBP. */
> -if (e1000x_is_oversized(core->mac, size)) {
> -return orig_size;
> -}
> -
>  net_rx_pkt_set_packet_type(core->rx_pkt,
> get_eth_packet_type(_buf.l2_header.eth));
>  net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs);
> --
> 2.40.0



RE: [PATCH 37/40] igb: Implement Tx timestamp

2023-04-15 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:38
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 37/40] igb: Implement Tx timestamp
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 7 +++
>  hw/net/igb_regs.h | 3 +++
>  2 files changed, 10 insertions(+)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> c716f400fd..38b53676d4 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -614,6 +614,13 @@ igb_process_tx_desc(IGBCore *core,
>  tx->first_olinfo_status = 
> le32_to_cpu(tx_desc->read.olinfo_status);
>  tx->first = false;
>  }
> +
> +if ((cmd_type_len & E1000_ADVTXD_MAC_TSTAMP) &&

Should ^ be tx->first_cmd_type_len?
Otherwise, Reviewed-by: Sriram Yagnaraman 

> +(core->mac[TSYNCTXCTL] & E1000_TSYNCTXCTL_ENABLED) &&
> +!(core->mac[TSYNCTXCTL] & E1000_TSYNCTXCTL_VALID)) {
> +core->mac[TSYNCTXCTL] |= E1000_TSYNCTXCTL_VALID;
> +e1000x_timestamp(core->mac, core->timadj, TXSTMPL, TXSTMPH);
> +}
>  } else if ((cmd_type_len & E1000_ADVTXD_DTYP_CTXT) ==
> E1000_ADVTXD_DTYP_CTXT) {
>  /* advanced transmit context descriptor */ diff --git
> a/hw/net/igb_regs.h b/hw/net/igb_regs.h index b88dc9f1f1..808b587a36
> 100644
> --- a/hw/net/igb_regs.h
> +++ b/hw/net/igb_regs.h
> @@ -322,6 +322,9 @@ union e1000_adv_rx_desc {
>  /* E1000_EITR_CNT_IGNR is only for 82576 and newer */
>  #define E1000_EITR_CNT_IGNR 0x8000 /* Don't reset counters on
> write */
> 
> +#define E1000_TSYNCTXCTL_VALID0x0001 /* tx timestamp valid */
> +#define E1000_TSYNCTXCTL_ENABLED  0x0010 /* enable tx
> timestampping
> +*/
> +
>  /* PCI Express Control */
>  #define E1000_GCR_CMPL_TMOUT_MASK   0xF000
>  #define E1000_GCR_CMPL_TMOUT_10ms   0x1000
> --
> 2.40.0



RE: [PATCH 31/40] igb: Use UDP for RSS hash

2023-04-15 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 31/40] igb: Use UDP for RSS hash
> 
> e1000e does not support using UDP for RSS hash, but igb does.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 16   hw/net/igb_regs.h |  3 +++
>  2 files changed, 19 insertions(+)

Reviewed-by: Sriram Yagnaraman 

UDP hash types look good to me, but while reviewing this patch I realized MRQC 
bit 18 is different between igb and e1000e.
igb: MRQC BIT(18) -> TcpIPv6Ex
igb: MRQC BIT(21) -> TcpIPv6
e1000e: MRQC BIT(18) -> TcpIPv6


> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 569897fb99..3ad81b15d0 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -279,6 +279,11 @@ igb_rss_get_hash_type(IGBCore *core, struct
> NetRxPkt *pkt)
>  return E1000_MRQ_RSS_TYPE_IPV4TCP;
>  }
> 
> +if (l4hdr_proto == ETH_L4_HDR_PROTO_UDP &&
> +(core->mac[MRQC] & E1000_MRQC_RSS_FIELD_IPV4_UDP)) {
> +return E1000_MRQ_RSS_TYPE_IPV4UDP;
> +}
> +
>  if (E1000_MRQC_EN_IPV4(core->mac[MRQC])) {
>  return E1000_MRQ_RSS_TYPE_IPV4;
>  }
> @@ -314,6 +319,11 @@ igb_rss_get_hash_type(IGBCore *core, struct
> NetRxPkt *pkt)
>  return E1000_MRQ_RSS_TYPE_IPV6TCP;
>  }
> 
> +if (l4hdr_proto == ETH_L4_HDR_PROTO_UDP &&
> +(core->mac[MRQC] & E1000_MRQC_RSS_FIELD_IPV6_UDP)) {
> +return E1000_MRQ_RSS_TYPE_IPV6UDP;
> +}
> +
>  if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) {
>  return E1000_MRQ_RSS_TYPE_IPV6EX;
>  }
> @@ -352,6 +362,12 @@ igb_rss_calc_hash(IGBCore *core, struct NetRxPkt
> *pkt, E1000E_RSSInfo *info)
>  case E1000_MRQ_RSS_TYPE_IPV6EX:
>  type = NetPktRssIpV6Ex;
>  break;
> +case E1000_MRQ_RSS_TYPE_IPV4UDP:
> +type = NetPktRssIpV4Udp;
> +break;
> +case E1000_MRQ_RSS_TYPE_IPV6UDP:
> +type = NetPktRssIpV6Udp;
> +break;
>  default:
>  assert(false);
>  return 0;
> diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index
> 22ce909173..03486edb2e 100644
> --- a/hw/net/igb_regs.h
> +++ b/hw/net/igb_regs.h
> @@ -659,6 +659,9 @@ union e1000_adv_rx_desc {
> 
>  #define E1000_RSS_QUEUE(reta, hash) (E1000_RETA_VAL(reta, hash) & 0x0F)
> 
> +#define E1000_MRQ_RSS_TYPE_IPV4UDP 7
> +#define E1000_MRQ_RSS_TYPE_IPV6UDP 8
> +
>  #define E1000_STATUS_IOV_MODE 0x0004
> 
>  #define E1000_STATUS_NUM_VFS_SHIFT 14
> --
> 2.40.0



RE: [PATCH 05/40] igb: Do not require CTRL.VME for tx VLAN tagging

2023-04-15 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 05/40] igb: Do not require CTRL.VME for tx VLAN tagging
> 
> While the datasheet of e1000e says it checks CTRL.VME for tx VLAN tagging,
> igb's datasheet has no such statements. It also says for
> "CTRL.VLE":
> > This register only affects the VLAN Strip in Rx it does not have any
> > influence in the Tx path in the 82576.
> (Appendix A. Changes from the 82575)
> 
> There is no "CTRL.VLE" so it is more likely that it is a mistake of CTRL.VME.
> 
> Fixes: fba7c3b788 ("igb: respect VMVIR and VMOLR for VLAN")
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH 29/40] igb: Implement MSI-X single vector mode

2023-04-15 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 29/40] igb: Implement MSI-X single vector mode
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 429b0ebc03..2013a9a53d 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -1870,7 +1870,7 @@ igb_update_interrupt_state(IGBCore *core)
> 
>  icr = core->mac[ICR] & core->mac[IMS];
> 
> -if (msix_enabled(core->owner)) {
> +if (core->mac[GPIE] & E1000_GPIE_MSIX_MODE) {
>  if (icr) {
>  causes = 0;
>  if (icr & E1000_ICR_DRSTA) { @@ -1905,7 +1905,12 @@
> igb_update_interrupt_state(IGBCore *core)
>  trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
>  core->mac[ICR], core->mac[IMS]);
> 
> -if (msi_enabled(core->owner)) {
> +if (msix_enabled(core->owner)) {
> +if (icr) {
> +trace_e1000e_irq_msix_notify_vec(0);
> +msix_notify(core->owner, 0);
> +    }
> +} else if (msi_enabled(core->owner)) {
>  if (icr) {
>  msi_notify(core->owner, 0);
>  }
> --
> 2.40.0

Reviewed-by: Sriram Yagnaraman 



RE: [PATCH 14/40] e1000x: Share more Rx filtering logic

2023-04-15 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 14/40] e1000x: Share more Rx filtering logic
> 
> This saves some code and enables tracepoint for e1000's VLAN filtering.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/e1000.c | 35 +--
>  hw/net/e1000e_core.c   | 47 +-
>  hw/net/e1000x_common.c | 44 +
> --
>  hw/net/e1000x_common.h |  4 +++-
>  hw/net/igb_core.c  | 41 +++-
>  hw/net/trace-events|  4 ++--
>  6 files changed, 56 insertions(+), 119 deletions(-)

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH 18/40] igb: Remove goto

2023-04-15 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 18/40] igb: Remove goto
> 
> The goto is a bit confusing as it changes the control flow only if L4 
> protocol is
> not recognized. It is also different from e1000e, and noisy when comparing
> e1000e and igb.
> 
> Signed-off-by: Akihiko Odaki 

Reviewed-by: Sriram Yagnaraman 



RE: [PATCH 06/40] net/net_rx_pkt: Use iovec for net_rx_pkt_set_protocols()

2023-04-15 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 06/40] net/net_rx_pkt: Use iovec for
> net_rx_pkt_set_protocols()
> 
> igb does not properly ensure the buffer passed to
> net_rx_pkt_set_protocols() is contiguous for the entire L2/L3/L4 header.
> Allow it to pass scattered data to net_rx_pkt_set_protocols().
> 
> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c   |  2 +-
>  hw/net/net_rx_pkt.c | 14 +-  hw/net/net_rx_pkt.h | 10 ++
> hw/net/virtio-net.c |  7 +--
>  hw/net/vmxnet3.c|  7 ++-
>  include/net/eth.h   |  6 +++---
>  net/eth.c   | 18 ------
>  7 files changed, 34 insertions(+), 30 deletions(-)
> 

Very nice. 
Reviewed-by: Sriram Yagnaraman 

> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 5d4884b834..53f60fc3d3 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -1650,7 +1650,7 @@ igb_receive_internal(IGBCore *core, const struct
> iovec *iov, int iovcnt,
> 
>  ehdr = PKT_GET_ETH_HDR(filter_buf);
>  net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
> -net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
> +net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs);
> 
>  queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
>  if (!queues) {
> diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c index
> 39cdea06de..63be6e05ad 100644
> --- a/hw/net/net_rx_pkt.c
> +++ b/hw/net/net_rx_pkt.c
> @@ -103,7 +103,7 @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
>  iov, iovcnt, ploff, pkt->tot_len);
>  }
> 
> -eth_get_protocols(pkt->vec, pkt->vec_len, >hasip4, >hasip6,
> +eth_get_protocols(pkt->vec, pkt->vec_len, 0, >hasip4,
> + >hasip6,
>>l3hdr_off, >l4hdr_off, >l5hdr_off,
>>ip6hdr_info, >ip4hdr_info, 
> >l4hdr_info);
> 
> @@ -186,17 +186,13 @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt
> *pkt)
>  return pkt->tot_len;
>  }
> 
> -void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
> -  size_t len)
> +void net_rx_pkt_set_protocols(struct NetRxPkt *pkt,
> +  const struct iovec *iov, size_t iovcnt,
> +  size_t iovoff)
>  {
> -const struct iovec iov = {
> -.iov_base = (void *)data,
> -.iov_len = len
> -};
> -
>  assert(pkt);
> 
> -eth_get_protocols(, 1, >hasip4, >hasip6,
> +eth_get_protocols(iov, iovcnt, iovoff, >hasip4, >hasip6,
>>l3hdr_off, >l4hdr_off, >l5hdr_off,
>>ip6hdr_info, >ip4hdr_info, 
> >l4hdr_info);  } diff --
> git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h index
> d00b484900..a06f5c2675 100644
> --- a/hw/net/net_rx_pkt.h
> +++ b/hw/net/net_rx_pkt.h
> @@ -55,12 +55,14 @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt);
>   * parse and set packet analysis results
>   *
>   * @pkt:packet
> - * @data:   pointer to the data buffer to be parsed
> - * @len:data length
> + * @iov:received data scatter-gather list
> + * @iovcnt: number of elements in iov
> + * @iovoff: data start offset in the iov
>   *
>   */
> -void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
> -  size_t len);
> +void net_rx_pkt_set_protocols(struct NetRxPkt *pkt,
> +  const struct iovec *iov, size_t iovcnt,
> +  size_t iovoff);
> 
>  /**
>   * fetches packet analysis results
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index
> 53e1c32643..37551fd854 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -1835,9 +1835,12 @@ static int virtio_net_process_rss(NetClientState
> *nc, const uint8_t *buf,
>  VIRTIO_NET_HASH_REPORT_UDPv6,
>  VIRTIO_NET_HASH_REPORT_UDPv6_EX
>  };
> +struct iovec iov = {
> +.iov_base = (void *)buf,
> +.iov_len = size
> +};
> 
> -net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
> -  

RE: [PATCH 03/40] igb: Fix Rx packet type encoding

2023-04-15 Thread Sriram Yagnaraman
> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 03/40] igb: Fix Rx packet type encoding
> 
> igb's advanced descriptor uses a packet type encoding different from one used
> in e1000e's extended descriptor. Fix the logic to encode Rx packet type
> accordingly.
> 
> Fixes: 3a977deebe ("Intrdocue igb device emulation")
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 38 +++---
>  1 file changed, 19 insertions(+), 19 deletions(-)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> 464a41d0aa..55de212447 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -1227,7 +1227,6 @@ igb_build_rx_metadata(IGBCore *core,
>  struct virtio_net_hdr *vhdr;
>  bool hasip4, hasip6;
>  EthL4HdrProto l4hdr_proto;
> -uint32_t pkt_type;
> 
>  *status_flags = E1000_RXD_STAT_DD;
> 
> @@ -1266,28 +1265,29 @@ igb_build_rx_metadata(IGBCore *core,
>  trace_e1000e_rx_metadata_ack();
>  }
> 
> -if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
> -trace_e1000e_rx_metadata_ipv6_filtering_disabled();
> -pkt_type = E1000_RXD_PKT_MAC;
> -} else if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP ||
> -   l4hdr_proto == ETH_L4_HDR_PROTO_UDP) {
> -pkt_type = hasip4 ? E1000_RXD_PKT_IP4_XDP :
> E1000_RXD_PKT_IP6_XDP;
> -} else if (hasip4 || hasip6) {
> -pkt_type = hasip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6;
> -} else {
> -pkt_type = E1000_RXD_PKT_MAC;
> -}
> +if (pkt_info) {
> +*pkt_info = rss_info->enabled ? rss_info->type : 0;
> 
> -trace_e1000e_rx_metadata_pkt_type(pkt_type);
> +if (hasip4) {
> +*pkt_info |= BIT(4);

DPDK seems to care about the packet type. 
Would it make sense to introduce a new set of macros similar to E1000_RXD_PKT* 
for igb instead of these magic numbers?
In any case, 
Reviewed-by: Sriram Yagnaraman 

> +}
> 
> -if (pkt_info) {
> -if (rss_info->enabled) {
> -*pkt_info = rss_info->type;
> +if (hasip6) {
> +*pkt_info |= BIT(6);
>  }
> 
> -*pkt_info |= (pkt_type << 4);
> -} else {
> -*status_flags |= E1000_RXD_PKT_TYPE(pkt_type);
> +switch (l4hdr_proto) {
> +case ETH_L4_HDR_PROTO_TCP:
> +*pkt_info |= BIT(8);
> +break;
> +
> +case ETH_L4_HDR_PROTO_UDP:
> +*pkt_info |= BIT(9);
> +break;
> +
> +default:
> +break;
> +}
>  }
> 
>  if (hdr_info) {
> --
> 2.40.0



RE: [PATCH 22/40] igb: Add more definitions for Tx descriptor

2023-04-15 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 22/40] igb: Add more definitions for Tx descriptor
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c |  2 +-
>  hw/net/igb_regs.h | 32 +++-
>  2 files changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> e5a7021c0e..350462c40c 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -418,7 +418,7 @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx
> *tx)  {
>  if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
>  uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
> -uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
> +uint32_t mss = tx->ctx[idx].mss_l4len_idx >>
> + E1000_ADVTXD_MSS_SHIFT;
>  if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
>  return false;
>  }
> diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index
> c5c5b3c3b8..22ce909173 100644
> --- a/hw/net/igb_regs.h
> +++ b/hw/net/igb_regs.h
> @@ -42,11 +42,6 @@ union e1000_adv_tx_desc {
>  } wb;
>  };
> 
> -#define E1000_ADVTXD_DTYP_CTXT  0x0020 /* Advanced Context
> Descriptor */ -#define E1000_ADVTXD_DTYP_DATA  0x0030 /* Advanced
> Data Descriptor */ -#define E1000_ADVTXD_DCMD_DEXT  0x2000 /*
> Descriptor Extension (1=Adv) */
> -#define E1000_ADVTXD_DCMD_TSE   0x8000 /* TCP/UDP Segmentation
> Enable */
> -
>  #define E1000_ADVTXD_POTS_IXSM  0x0100 /* Insert TCP/UDP
> Checksum */  #define E1000_ADVTXD_POTS_TXSM  0x0200 /* Insert
> TCP/UDP Checksum */
> 
> @@ -151,6 +146,10 @@ union e1000_adv_rx_desc {
>  #define IGB_82576_VF_DEV_ID0x10CA
>  #define IGB_I350_VF_DEV_ID 0x1520
> 
> +/* VLAN info */
> +#define IGB_TX_FLAGS_VLAN_MASK 0x
> +#define IGB_TX_FLAGS_VLAN_SHIFT16
> +

Doesn't seem to be used anywhere, added by mistake? 

>  /* from igb/e1000_82575.h */
> 
>  #define E1000_MRQC_ENABLE_RSS_MQ0x0002
> @@ -160,6 +159,29 @@ union e1000_adv_rx_desc {
>  #define E1000_MRQC_RSS_FIELD_IPV6_UDP   0x0080
>  #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX0x0100
> 
> +/* Adv Transmit Descriptor Config Masks */
> +#define E1000_ADVTXD_MAC_TSTAMP   0x0008 /* IEEE1588
> Timestamp packet */
> +#define E1000_ADVTXD_DTYP_CTXT0x0020 /* Advanced Context
> Descriptor */
> +#define E1000_ADVTXD_DTYP_DATA0x0030 /* Advanced Data
> Descriptor */
> +#define E1000_ADVTXD_DCMD_EOP 0x0100 /* End of Packet */
> +#define E1000_ADVTXD_DCMD_IFCS0x0200 /* Insert FCS (Ethernet
> CRC) */
> +#define E1000_ADVTXD_DCMD_RS  0x0800 /* Report Status */
> +#define E1000_ADVTXD_DCMD_DEXT0x2000 /* Descriptor extension
> (1=Adv) */
> +#define E1000_ADVTXD_DCMD_VLE 0x4000 /* VLAN pkt enable */

nit; You could use the above definition instead of E1000_TXD_CMD_VLE in 
igb_tx_insert_vlan()?

> +#define E1000_ADVTXD_DCMD_TSE 0x8000 /* TCP Seg enable */
> +#define E1000_ADVTXD_PAYLEN_SHIFT14 /* Adv desc PAYLEN shift */
> +
> +#define E1000_ADVTXD_MACLEN_SHIFT9  /* Adv ctxt desc mac len shift */
> +#define E1000_ADVTXD_TUCMD_L4T_UDP 0x  /* L4 Packet TYPE
> of UDP */
> +#define E1000_ADVTXD_TUCMD_IPV40x0400  /* IP Packet Type:
> 1=IPv4 */
> +#define E1000_ADVTXD_TUCMD_L4T_TCP 0x0800  /* L4 Packet TYPE of
> TCP
> +*/ #define E1000_ADVTXD_TUCMD_L4T_SCTP 0x1000 /* L4 packet
> TYPE of
> +SCTP */
> +/* IPSec Encrypt Enable for ESP */
> +#define E1000_ADVTXD_L4LEN_SHIFT 8  /* Adv ctxt L4LEN shift */
> +#define E1000_ADVTXD_MSS_SHIFT  16  /* Adv ctxt MSS shift */
> +/* Adv ctxt IPSec SA IDX mask */
> +/* Adv ctxt IPSec ESP len mask */
> +
>  /* Additional Transmit Descriptor Control definitions */  #define
> E1000_TXDCTL_QUEUE_ENABLE  0x0200 /* Enable specific Tx Queue */
> 
> --
> 2.40.0



RE: [PATCH 23/40] igb: Share common VF constants

2023-04-15 Thread Sriram Yagnaraman

> -Original Message-
> From: Philippe Mathieu-Daudé 
> Sent: Friday, 14 April 2023 17:09
> To: Akihiko Odaki 
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org
> Subject: Re: [PATCH 23/40] igb: Share common VF constants
> 
> On 14/4/23 13:37, Akihiko Odaki wrote:
> > The constants need to be consistent between the PF and VF.
> >
> > Signed-off-by: Akihiko Odaki 
> > ---
> >   hw/net/igb.c| 10 +-
> >   hw/net/igb_common.h |  8 
> >   hw/net/igbvf.c  |  7 ---
> >   3 files changed, 13 insertions(+), 12 deletions(-)
> 
> Reviewed-by: Philippe Mathieu-Daudé 

Reviewed-by: Sriram Yagnaraman 


RE: [PATCH 19/40] igb: Read DCMD.VLE of the first Tx descriptor

2023-04-15 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 14 April 2023 13:37
> Cc: Sriram Yagnaraman ; Jason Wang
> ; Dmitry Fleytman ;
> Michael S. Tsirkin ; Alex Bennée ;
> Philippe Mathieu-Daudé ; Thomas Huth
> ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; qemu-devel@nongnu.org; Akihiko Odaki
> 
> Subject: [PATCH 19/40] igb: Read DCMD.VLE of the first Tx descriptor
> 
> Section 7.2.2.3 Advanced Transmit Data Descriptor says:
> > For frames that spans multiple descriptors, all fields apart from
> > DCMD.EOP, DCMD.RS, DCMD.DEXT, DTALEN, Address and DTYP are valid only
> > in the first descriptors and are ignored in the subsequent ones.
> 
> Signed-off-by: Akihiko Odaki 
> ---
>  hw/net/igb_core.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> cca71611fe..e5a7021c0e 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -613,7 +613,7 @@ igb_process_tx_desc(IGBCore *core,
>  idx = (tx->first_olinfo_status >> 4) & 1;
>  igb_tx_insert_vlan(core, queue_index, tx,
>  tx->ctx[idx].vlan_macip_lens >> 16,
> -!!(cmd_type_len & E1000_TXD_CMD_VLE));
> +!!(tx->first_cmd_type_len & E1000_TXD_CMD_VLE));
> 
>  if (igb_tx_pkt_send(core, tx, queue_index)) {
>  igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
> --
> 2.40.0

Reviewed-by: Sriram Yagnaraman 



[PATCH v10 1/8] MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer

2023-03-24 Thread Sriram Yagnaraman
I would like to review and be informed on changes to igb device

Signed-off-by: Sriram Yagnaraman 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9b56ccdd92..a9ed6143f5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2252,6 +2252,7 @@ F: tests/qtest/libqos/e1000e.*
 
 igb
 M: Akihiko Odaki 
+R: Sriram Yagnaraman 
 S: Maintained
 F: docs/system/devices/igb.rst
 F: hw/net/igb*
-- 
2.34.1




[PATCH v10 5/8] igb: check oversized packets for VMDq

2023-03-24 Thread Sriram Yagnaraman
Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 41 -
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 753f17b40c..38aa4596b1 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -921,12 +921,26 @@ igb_rx_l4_cso_enabled(IGBCore *core)
 return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
 }
 
+static bool
+igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
+{
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
+int max_ethernet_lpe_size =
+core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
+int max_ethernet_vlan_size = 1522;
+
+return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
+}
+
 static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header 
*ehdr,
-   E1000E_RSSInfo *rss_info, bool *external_tx)
+   size_t size, E1000E_RSSInfo *rss_info,
+   bool *external_tx)
 {
 static const int ta_shift[] = { 4, 3, 2, 0 };
 uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
 uint16_t queues = 0;
+uint16_t oversized = 0;
 uint16_t vid = lduw_be_p(_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
 bool accepted = false;
 int i;
@@ -1026,9 +1040,26 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 
 queues &= core->mac[VFRE];
-igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
-if (rss_info->queue & 1) {
-queues <<= 8;
+if (queues) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
+oversized |= BIT(i);
+}
+}
+/* 8.19.37 increment ROC if packet is oversized for all queues */
+if (oversized == queues) {
+trace_e1000x_rx_oversized(size);
+e1000x_inc_reg_if_not_full(core->mac, ROC);
+}
+queues &= ~oversized;
+}
+
+if (queues) {
+igb_rss_parse_packet(core, core->rx_pkt,
+ external_tx != NULL, rss_info);
+if (rss_info->queue & 1) {
+queues <<= 8;
+}
 }
 } else {
 switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
@@ -1576,7 +1607,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
e1000x_vlan_enabled(core->mac),
core->mac[VET] & 0x);
 
-queues = igb_receive_assign(core, ehdr, _info, external_tx);
+queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
 trace_e1000e_rx_flt_dropped();
 return orig_size;
-- 
2.34.1




[PATCH v10 3/8] igb: add ICR_RXDW

2023-03-24 Thread Sriram Yagnaraman
IGB uses RXDW ICR bit to indicate that rx descriptor has been written
back. This is the same as RXT0 bit in older HW.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/e1000x_regs.h | 4 
 hw/net/igb_core.c| 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
index c0832fa23d..6d3c4c6d3a 100644
--- a/hw/net/e1000x_regs.h
+++ b/hw/net/e1000x_regs.h
@@ -335,6 +335,7 @@
 #define E1000_ICR_RXDMT00x0010 /* rx desc min. threshold (0) */
 #define E1000_ICR_RXO   0x0040 /* rx overrun */
 #define E1000_ICR_RXT0  0x0080 /* rx timer intr (ring 0) */
+#define E1000_ICR_RXDW  0x0080 /* rx desc written back */
 #define E1000_ICR_MDAC  0x0200 /* MDIO access complete */
 #define E1000_ICR_RXCFG 0x0400 /* RX /c/ ordered set */
 #define E1000_ICR_GPI_EN0   0x0800 /* GP Int 0 */
@@ -378,6 +379,7 @@
 #define E1000_ICS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_ICS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_ICS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_ICS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_ICS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_ICS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -407,6 +409,7 @@
 #define E1000_IMS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -441,6 +444,7 @@
 #define E1000_IMC_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMC_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMC_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMC_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMC_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMC_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 6ba9696637..9ab90e8576 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1583,7 +1583,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 continue;
 }
 
-n |= E1000_ICR_RXT0;
+n |= E1000_ICR_RXDW;
 
 igb_rx_fix_l4_csum(core, core->rx_pkt);
 igb_write_packet_to_guest(core, core->rx_pkt, , _info);
-- 
2.34.1




[PATCH v10 6/8] igb: respect E1000_VMOLR_RSSE

2023-03-24 Thread Sriram Yagnaraman
RSS for VFs is only enabled if VMOLR[n].RSSE is set.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 38aa4596b1..fd61c6c550 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1057,8 +1057,15 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 if (queues) {
 igb_rss_parse_packet(core, core->rx_pkt,
  external_tx != NULL, rss_info);
+/* Sec 8.26.1: PQn = VFn + VQn*8 */
 if (rss_info->queue & 1) {
-queues <<= 8;
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) &&
+(core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
+queues |= BIT(i + IGB_NUM_VM_POOLS);
+queues &= ~BIT(i);
+}
+}
 }
 }
 } else {
-- 
2.34.1




[PATCH v10 2/8] igb: handle PF/VF reset properly

2023-03-24 Thread Sriram Yagnaraman
Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF
is reset.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c   | 38 ++
 hw/net/igb_regs.h   |  3 +++
 hw/net/trace-events |  2 ++
 3 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 78d30738e6..6ba9696637 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1898,14 +1898,6 @@ static void igb_set_eims(IGBCore *core, int index, 
uint32_t val)
 igb_update_interrupt_state(core);
 }
 
-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
-{
-/* TODO: Reset of the queue enable and the interrupt registers of the VF. 
*/
-
-core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
-core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
-}
-
 static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
 {
 uint32_t ent = core->mac[VTIVAR_MISC + vfn];
@@ -1983,6 +1975,17 @@ static void igb_set_vfmailbox(IGBCore *core, int index, 
uint32_t val)
 }
 }
 
+static void igb_vf_reset(IGBCore *core, uint16_t vfn)
+{
+/* disable Rx and Tx for the VF*/
+core->mac[VFTE] &= ~BIT(vfn);
+core->mac[VFRE] &= ~BIT(vfn);
+/* indicate VF reset to PF */
+core->mac[VFLRE] |= BIT(vfn);
+/* VFLRE and mailbox use the same interrupt cause */
+mailbox_interrupt_to_pf(core);
+}
+
 static void igb_w1c(IGBCore *core, int index, uint32_t val)
 {
 core->mac[index] &= ~val;
@@ -2237,14 +2240,20 @@ igb_set_status(IGBCore *core, int index, uint32_t val)
 static void
 igb_set_ctrlext(IGBCore *core, int index, uint32_t val)
 {
-trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
- !!(val & E1000_CTRL_EXT_SPD_BYPS));
-
-/* TODO: PFRSTD */
+trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
+  !!(val & E1000_CTRL_EXT_SPD_BYPS),
+  !!(val & E1000_CTRL_EXT_PFRSTD));
 
 /* Zero self-clearing bits */
 val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
 core->mac[CTRL_EXT] = val;
+
+if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) {
+for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
+core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
+core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD;
+}
+}
 }
 
 static void
@@ -4027,6 +4036,11 @@ static void igb_reset(IGBCore *core, bool sw)
 
 e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
 
+for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
+/* Set RSTI, so VF can identify a PF reset is in progress */
+core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTI;
+}
+
 for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
 tx = >tx[i];
 net_tx_pkt_reset(tx->tx_pkt, NULL);
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index 00934d4f20..a658f9b53f 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -240,6 +240,9 @@ union e1000_adv_rx_desc {
 
 /* from igb/e1000_defines.h */
 
+/* Physical Func Reset Done Indication */
+#define E1000_CTRL_EXT_PFRSTD   0x4000
+
 #define E1000_IVAR_VALID 0x80
 #define E1000_GPIE_NSICR 0x0001
 #define E1000_GPIE_MSIX_MODE 0x0010
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 65753411fc..d35554fce8 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -280,6 +280,8 @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: 
PHY[%u] UNHANDLED"
 igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
 igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
 
+igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) 
"Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset 
done: %d"
+
 igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
 igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, 
uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
 
-- 
2.34.1




[PATCH v10 7/8] igb: implement VF Tx and Rx stats

2023-03-24 Thread Sriram Yagnaraman
Please note that loopback counters for VM to VM traffic is not
implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index fd61c6c550..162ba8becf 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -492,7 +492,7 @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int 
queue_index)
 }
 
 static void
-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
+igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
 {
 static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 PTC1023, PTC1522 };
@@ -519,6 +519,13 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt 
*tx_pkt)
 core->mac[GPTC] = core->mac[TPT];
 core->mac[GOTCL] = core->mac[TOTL];
 core->mac[GOTCH] = core->mac[TOTH];
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
+core->mac[PVFGPTC0 + (pool * 64)]++;
+}
 }
 
 static void
@@ -583,7 +590,7 @@ igb_process_tx_desc(IGBCore *core,
 net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
 }
 if (igb_tx_pkt_send(core, tx, queue_index)) {
-igb_on_tx_done_update_stats(core, tx->tx_pkt);
+igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
 }
 }
 
@@ -1409,7 +1416,8 @@ igb_write_to_rx_buffers(IGBCore *core,
 }
 
 static void
-igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
+igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+size_t data_size, size_t data_fcs_size)
 {
 e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
 
@@ -1425,6 +1433,16 @@ igb_update_rx_stats(IGBCore *core, size_t data_size, 
size_t data_fcs_size)
 default:
 break;
 }
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
+core->mac[PVFGPRC0 + (pool * 64)]++;
+if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
+core->mac[PVFMPRC0 + (pool * 64)]++;
+}
+}
 }
 
 static inline bool
@@ -1526,7 +1544,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 
 } while (desc_offset < total_size);
 
-igb_update_rx_stats(core, size, total_size);
+igb_update_rx_stats(core, rxi, size, total_size);
 }
 
 static inline void
-- 
2.34.1




[PATCH v10 8/8] igb: respect VMVIR and VMOLR for VLAN

2023-03-24 Thread Sriram Yagnaraman
Add support for stripping/inserting VLAN for VFs.

Had to move CSUM calculation back into the for loop, since packet data
is pulled inside the loop based on strip VLAN decision for every VF.

net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for
igb. Work for a future patch.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 62 +--
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 162ba8becf..d733fed6cf 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -386,6 +386,28 @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, 
bool tx,
 info->queue = E1000_RSS_QUEUE(>mac[RETA], info->hash);
 }
 
+static void
+igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
+uint16_t vlan, bool insert_vlan)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
+/* always insert default VLAN */
+insert_vlan = true;
+vlan = core->mac[VMVIR0 + pool] & 0x;
+} else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
+insert_vlan = false;
+}
+}
+
+if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
+net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
+core->mac[VET] & 0x);
+}
+}
+
 static bool
 igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
 {
@@ -583,12 +605,11 @@ igb_process_tx_desc(IGBCore *core,
 
 if (cmd_type_len & E1000_TXD_CMD_EOP) {
 if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
-if (cmd_type_len & E1000_TXD_CMD_VLE) {
-idx = (tx->first_olinfo_status >> 4) & 1;
-uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
-uint16_t vet = core->mac[VET] & 0x;
-net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
-}
+idx = (tx->first_olinfo_status >> 4) & 1;
+igb_tx_insert_vlan(core, queue_index, tx,
+tx->ctx[idx].vlan_macip_lens >> 16,
+!!(cmd_type_len & E1000_TXD_CMD_VLE));
+
 if (igb_tx_pkt_send(core, tx, queue_index)) {
 igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
 }
@@ -1547,6 +1568,20 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 igb_update_rx_stats(core, rxi, size, total_size);
 }
 
+static bool
+igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+/* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
+return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
+core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
+core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
+}
+
+return e1000x_vlan_enabled(core->mac);
+}
+
 static inline void
 igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt)
 {
@@ -1627,10 +1662,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 ehdr = PKT_GET_ETH_HDR(filter_buf);
 net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
-
-net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
-   e1000x_vlan_enabled(core->mac),
-   core->mac[VET] & 0x);
+net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
 
 queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
@@ -1638,9 +1670,6 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 return orig_size;
 }
 
-total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
-e1000x_fcs_len(core->mac);
-
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 if (!(queues & BIT(i)) ||
 !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
@@ -1649,6 +1678,13 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 igb_rx_ring_init(core, , i);
 
+net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
+   igb_rx_strip_vlan(core, rxr.i),
+   core->mac[VET] & 0x);
+
+total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
+e1000x_fcs_len(core->mac);
+
 if (!igb_has_rxbufs(core, rxr.i, total_size)) {
 n |= E1000_ICS_RXO;
 trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
-- 
2.34.1




[PATCH v10 4/8] igb: implement VFRE and VFTE registers

2023-03-24 Thread Sriram Yagnaraman
Also introduce:
- Checks for RXDCTL/TXDCTL queue enable bits
- IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7)

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 38 +++---
 hw/net/igb_core.h |  1 +
 hw/net/igb_regs.h |  3 +++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 9ab90e8576..753f17b40c 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -784,6 +784,18 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
 return igb_tx_wb_eic(core, txi->idx);
 }
 
+static inline bool
+igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+{
+bool vmdq = core->mac[MRQC] & 1;
+uint16_t qn = txi->idx;
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+return (core->mac[TCTL] & E1000_TCTL_EN) &&
+(!vmdq || core->mac[VFTE] & BIT(pool)) &&
+(core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
+}
+
 static void
 igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 {
@@ -793,8 +805,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 const E1000E_RingInfo *txi = txr->i;
 uint32_t eic = 0;
 
-/* TODO: check if the queue itself is enabled too. */
-if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
+if (!igb_tx_enabled(core, txi)) {
 trace_e1000e_tx_disabled();
 return;
 }
@@ -872,6 +883,9 @@ igb_can_receive(IGBCore *core)
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 E1000E_RxRing rxr;
+if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
+continue;
+}
 
 igb_rx_ring_init(core, , i);
 if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
@@ -938,7 +952,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 
 if (core->mac[MRQC] & 1) {
 if (is_broadcast_ether_addr(ehdr->h_dest)) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
 queues |= BIT(i);
 }
@@ -972,7 +986,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
 f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
 if (macp[f >> 5] & (1 << (f & 0x1f))) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
 queues |= BIT(i);
 }
@@ -995,7 +1009,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 }
 } else {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
 mask |= BIT(i);
 }
@@ -1011,6 +1025,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
 }
 
+queues &= core->mac[VFRE];
 igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
 if (rss_info->queue & 1) {
 queues <<= 8;
@@ -1571,7 +1586,8 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 e1000x_fcs_len(core->mac);
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
-if (!(queues & BIT(i))) {
+if (!(queues & BIT(i)) ||
+!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
 continue;
 }
 
@@ -1977,9 +1993,16 @@ static void igb_set_vfmailbox(IGBCore *core, int index, 
uint32_t val)
 
 static void igb_vf_reset(IGBCore *core, uint16_t vfn)
 {
+uint16_t qn0 = vfn;
+uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
+
 /* disable Rx and Tx for the VF*/
-core->mac[VFTE] &= ~BIT(vfn);
+core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
 core->mac[VFRE] &= ~BIT(vfn);
+core->mac[VFTE] &= ~BIT(vfn);
 /* indicate VF reset to PF */
 core->mac[VFLRE] |= BIT(vfn);
 /* VFLRE and mailbox use the same interrupt cause */
@@ -3914,6 +3937,7 @@ igb_phy_reg_init[] = {
 static const uint32_t igb_mac_reg_init[] = {
 [LEDCTL]= 2 | (3 <<

[PATCH v10 0/8] igb: merge changes from <20221229190817.25500-1-sriram.yagnara...@est.tech>

2023-03-24 Thread Sriram Yagnaraman
Based-on: <20230324095434.44973-1-akihiko.od...@daynix.com>
([PATCH for 8.0 0/4] igb fixes for 8.0)

Now that Akhiko's patchset for introducing igb device is merged, I have
rebased my changes on master. The changes proposed here adds support
for 
 - Correct PF/VF reset handling
 - Introduce ICR_RXDW register definition
 - Impement support for VFTE/VFRE/VMOLR_RSSE/VMVIR/VMOLR registers
 - Check oversized packet for VMDq
 - VF statistics

Changes since v9:
- Yet another rebase

Changes since v8:
- Set RSTI bit on PF reset

Changes since v7:
- Adapt to "igb: Save more Tx states" patch from Akhiko
- Fix bug in VMVIR patch for inserting vlan, do not overwrite context

Changes since v6:
- Rebased on latest, which includes igb device introduction.

Changes since v5:
- Added back an unecessarily removed empty line

Changes since v4:
- Removed the change implementing VTCTL.IGMAC, it needs more thought
  and implementation of DTXSWC.LLE and VLVF.LVLAN first

Changes since v3:
- Fix comments
- Rebased on latest patchset from Akihiko
- Remove Rx loop improvements that Akihiko has pulled into his patchset

Changes since v2:
- Fixed more comments from Akhiko
- Reordered the patches to make changes easier to understand

Changes since v1:
- Fix review comments from Akihiko


Sriram Yagnaraman (8):
  MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
  igb: handle PF/VF reset properly
  igb: add ICR_RXDW
  igb: implement VFRE and VFTE registers
  igb: check oversized packets for VMDq
  igb: respect E1000_VMOLR_RSSE
  igb: implement VF Tx and Rx stats
  igb: respect VMVIR and VMOLR for VLAN

 MAINTAINERS  |   1 +
 hw/net/e1000x_regs.h |   4 +
 hw/net/igb_core.c| 212 ++-
 hw/net/igb_core.h|   1 +
 hw/net/igb_regs.h|   6 ++
 hw/net/trace-events  |   2 +
 6 files changed, 185 insertions(+), 41 deletions(-)

-- 
2.34.1




[PATCH v9 2/8] igb: handle PF/VF reset properly

2023-03-22 Thread Sriram Yagnaraman
Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF
is reset.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c   | 38 ++
 hw/net/igb_regs.h   |  3 +++
 hw/net/trace-events |  2 ++
 3 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 596039aab8..0fde8ef854 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1895,14 +1895,6 @@ static void igb_set_eims(IGBCore *core, int index, 
uint32_t val)
 igb_update_interrupt_state(core);
 }
 
-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
-{
-/* TODO: Reset of the queue enable and the interrupt registers of the VF. 
*/
-
-core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
-core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
-}
-
 static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
 {
 uint32_t ent = core->mac[VTIVAR_MISC + vfn];
@@ -1980,6 +1972,17 @@ static void igb_set_vfmailbox(IGBCore *core, int index, 
uint32_t val)
 }
 }
 
+static void igb_vf_reset(IGBCore *core, uint16_t vfn)
+{
+/* disable Rx and Tx for the VF*/
+core->mac[VFTE] &= ~BIT(vfn);
+core->mac[VFRE] &= ~BIT(vfn);
+/* indicate VF reset to PF */
+core->mac[VFLRE] |= BIT(vfn);
+/* VFLRE and mailbox use the same interrupt cause */
+mailbox_interrupt_to_pf(core);
+}
+
 static void igb_w1c(IGBCore *core, int index, uint32_t val)
 {
 core->mac[index] &= ~val;
@@ -2234,14 +2237,20 @@ igb_set_status(IGBCore *core, int index, uint32_t val)
 static void
 igb_set_ctrlext(IGBCore *core, int index, uint32_t val)
 {
-trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
- !!(val & E1000_CTRL_EXT_SPD_BYPS));
-
-/* TODO: PFRSTD */
+trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
+  !!(val & E1000_CTRL_EXT_SPD_BYPS),
+  !!(val & E1000_CTRL_EXT_PFRSTD));
 
 /* Zero self-clearing bits */
 val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
 core->mac[CTRL_EXT] = val;
+
+if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) {
+for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
+core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
+core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD;
+}
+}
 }
 
 static void
@@ -4024,6 +4033,11 @@ static void igb_reset(IGBCore *core, bool sw)
 
 e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
 
+for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
+/* Set RSTI, so VF can identify a PF reset is in progress */
+core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTI;
+}
+
 for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
 tx = >tx[i];
 net_tx_pkt_reset(tx->tx_pkt);
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index 00934d4f20..a658f9b53f 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -240,6 +240,9 @@ union e1000_adv_rx_desc {
 
 /* from igb/e1000_defines.h */
 
+/* Physical Func Reset Done Indication */
+#define E1000_CTRL_EXT_PFRSTD   0x4000
+
 #define E1000_IVAR_VALID 0x80
 #define E1000_GPIE_NSICR 0x0001
 #define E1000_GPIE_MSIX_MODE 0x0010
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 65753411fc..d35554fce8 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -280,6 +280,8 @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: 
PHY[%u] UNHANDLED"
 igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
 igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
 
+igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) 
"Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset 
done: %d"
+
 igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
 igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, 
uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
 
-- 
2.34.1




[PATCH v9 4/8] igb: implement VFRE and VFTE registers

2023-03-22 Thread Sriram Yagnaraman
Also introduce:
- Checks for RXDCTL/TXDCTL queue enable bits
- IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7)

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 38 +++---
 hw/net/igb_core.h |  1 +
 hw/net/igb_regs.h |  3 +++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index f799f7f84a..af979b4739 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -783,6 +783,18 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
 return igb_tx_wb_eic(core, txi->idx);
 }
 
+static inline bool
+igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+{
+bool vmdq = core->mac[MRQC] & 1;
+uint16_t qn = txi->idx;
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+return (core->mac[TCTL] & E1000_TCTL_EN) &&
+(!vmdq || core->mac[VFTE] & BIT(pool)) &&
+(core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
+}
+
 static void
 igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 {
@@ -792,8 +804,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 const E1000E_RingInfo *txi = txr->i;
 uint32_t eic = 0;
 
-/* TODO: check if the queue itself is enabled too. */
-if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
+if (!igb_tx_enabled(core, txi)) {
 trace_e1000e_tx_disabled();
 return;
 }
@@ -869,6 +880,9 @@ igb_can_receive(IGBCore *core)
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 E1000E_RxRing rxr;
+if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
+continue;
+}
 
 igb_rx_ring_init(core, , i);
 if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
@@ -935,7 +949,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 
 if (core->mac[MRQC] & 1) {
 if (is_broadcast_ether_addr(ehdr->h_dest)) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
 queues |= BIT(i);
 }
@@ -969,7 +983,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
 f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
 if (macp[f >> 5] & (1 << (f & 0x1f))) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
 queues |= BIT(i);
 }
@@ -992,7 +1006,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 }
 } else {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
 mask |= BIT(i);
 }
@@ -1008,6 +1022,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
 }
 
+queues &= core->mac[VFRE];
 igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
 if (rss_info->queue & 1) {
 queues <<= 8;
@@ -1568,7 +1583,8 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 e1000x_fcs_len(core->mac);
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
-if (!(queues & BIT(i))) {
+if (!(queues & BIT(i)) ||
+!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
 continue;
 }
 
@@ -1974,9 +1990,16 @@ static void igb_set_vfmailbox(IGBCore *core, int index, 
uint32_t val)
 
 static void igb_vf_reset(IGBCore *core, uint16_t vfn)
 {
+uint16_t qn0 = vfn;
+uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
+
 /* disable Rx and Tx for the VF*/
-core->mac[VFTE] &= ~BIT(vfn);
+core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
 core->mac[VFRE] &= ~BIT(vfn);
+core->mac[VFTE] &= ~BIT(vfn);
 /* indicate VF reset to PF */
 core->mac[VFLRE] |= BIT(vfn);
 /* VFLRE and mailbox use the same interrupt cause */
@@ -3911,6 +3934,7 @@ igb_phy_reg_init[] = {
 static const uint32_t igb_mac_reg_init[] = {
 [LEDCTL]= 2 | (3 <<

[PATCH v9 5/8] igb: check oversized packets for VMDq

2023-03-22 Thread Sriram Yagnaraman
Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 41 -
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index af979b4739..0c5019fd6c 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -918,12 +918,26 @@ igb_rx_l4_cso_enabled(IGBCore *core)
 return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
 }
 
+static bool
+igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
+{
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
+int max_ethernet_lpe_size =
+core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
+int max_ethernet_vlan_size = 1522;
+
+return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
+}
+
 static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header 
*ehdr,
-   E1000E_RSSInfo *rss_info, bool *external_tx)
+   size_t size, E1000E_RSSInfo *rss_info,
+   bool *external_tx)
 {
 static const int ta_shift[] = { 4, 3, 2, 0 };
 uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
 uint16_t queues = 0;
+uint16_t oversized = 0;
 uint16_t vid = lduw_be_p(_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
 bool accepted = false;
 int i;
@@ -1023,9 +1037,26 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 
 queues &= core->mac[VFRE];
-igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
-if (rss_info->queue & 1) {
-queues <<= 8;
+if (queues) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
+oversized |= BIT(i);
+}
+}
+/* 8.19.37 increment ROC if packet is oversized for all queues */
+if (oversized == queues) {
+trace_e1000x_rx_oversized(size);
+e1000x_inc_reg_if_not_full(core->mac, ROC);
+}
+queues &= ~oversized;
+}
+
+if (queues) {
+igb_rss_parse_packet(core, core->rx_pkt,
+ external_tx != NULL, rss_info);
+if (rss_info->queue & 1) {
+queues <<= 8;
+}
 }
 } else {
 switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
@@ -1573,7 +1604,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
e1000x_vlan_enabled(core->mac),
core->mac[VET] & 0x);
 
-queues = igb_receive_assign(core, ehdr, _info, external_tx);
+queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
 trace_e1000e_rx_flt_dropped();
 return orig_size;
-- 
2.34.1




[PATCH v9 3/8] igb: add ICR_RXDW

2023-03-22 Thread Sriram Yagnaraman
IGB uses RXDW ICR bit to indicate that rx descriptor has been written
back. This is the same as RXT0 bit in older HW.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/e1000x_regs.h | 4 
 hw/net/igb_core.c| 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
index c0832fa23d..6d3c4c6d3a 100644
--- a/hw/net/e1000x_regs.h
+++ b/hw/net/e1000x_regs.h
@@ -335,6 +335,7 @@
 #define E1000_ICR_RXDMT00x0010 /* rx desc min. threshold (0) */
 #define E1000_ICR_RXO   0x0040 /* rx overrun */
 #define E1000_ICR_RXT0  0x0080 /* rx timer intr (ring 0) */
+#define E1000_ICR_RXDW  0x0080 /* rx desc written back */
 #define E1000_ICR_MDAC  0x0200 /* MDIO access complete */
 #define E1000_ICR_RXCFG 0x0400 /* RX /c/ ordered set */
 #define E1000_ICR_GPI_EN0   0x0800 /* GP Int 0 */
@@ -378,6 +379,7 @@
 #define E1000_ICS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_ICS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_ICS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_ICS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_ICS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_ICS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -407,6 +409,7 @@
 #define E1000_IMS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -441,6 +444,7 @@
 #define E1000_IMC_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMC_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMC_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMC_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMC_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMC_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 0fde8ef854..f799f7f84a 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1580,7 +1580,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 continue;
 }
 
-n |= E1000_ICR_RXT0;
+n |= E1000_ICR_RXDW;
 
 igb_rx_fix_l4_csum(core, core->rx_pkt);
 igb_write_packet_to_guest(core, core->rx_pkt, , _info);
-- 
2.34.1




[PATCH v9 1/8] MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer

2023-03-22 Thread Sriram Yagnaraman
I would like to review and be informed on changes to igb device

Signed-off-by: Sriram Yagnaraman 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9b56ccdd92..a9ed6143f5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2252,6 +2252,7 @@ F: tests/qtest/libqos/e1000e.*
 
 igb
 M: Akihiko Odaki 
+R: Sriram Yagnaraman 
 S: Maintained
 F: docs/system/devices/igb.rst
 F: hw/net/igb*
-- 
2.34.1




[PATCH v9 0/8] igb: merge changes from <20221229190817.25500-1-sriram.yagnara...@est.tech>

2023-03-22 Thread Sriram Yagnaraman
Based-on: <20230322042044.25136-1-akihiko.od...@daynix.com>
([PATCH for 8.0 v2] igb: Save more Tx states)

Now that Akhiko's patchset for introducing igb device is merged, I have
rebased my changes on master. The changes proposed here adds support
for 
 - Correct PF/VF reset handling
 - Introduce ICR_RXDW register definition
 - Impement support for VFTE/VFRE/VMOLR_RSSE/VMVIR/VMOLR registers
 - Check oversized packet for VMDq
 - VF statistics

Changes since v8:
- Set RSTI bit on PF reset

Changes since v7:
- Adapt to "igb: Save more Tx states" patch from Akhiko
- Fix bug in VMVIR patch for inserting vlan, do not overwrite context

Changes since v6:
- Rebased on latest, which includes igb device introduction.

Changes since v5:
- Added back an unecessarily removed empty line

Changes since v4:
- Removed the change implementing VTCTL.IGMAC, it needs more thought
  and implementation of DTXSWC.LLE and VLVF.LVLAN first

Changes since v3:
- Fix comments
- Rebased on latest patchset from Akihiko
- Remove Rx loop improvements that Akihiko has pulled into his patchset

Changes since v2:
- Fixed more comments from Akhiko
- Reordered the patches to make changes easier to understand

Changes since v1:
- Fix review comments from Akihiko


Sriram Yagnaraman (8):
  MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
  igb: handle PF/VF reset properly
  igb: add ICR_RXDW
  igb: implement VFRE and VFTE registers
  igb: check oversized packets for VMDq
  igb: respect E1000_VMOLR_RSSE
  igb: implement VF Tx and Rx stats
  igb: respect VMVIR and VMOLR for VLAN

 MAINTAINERS  |   1 +
 hw/net/e1000x_regs.h |   4 +
 hw/net/igb_core.c| 212 ++-
 hw/net/igb_core.h|   1 +
 hw/net/igb_regs.h|   6 ++
 hw/net/trace-events  |   2 +
 6 files changed, 185 insertions(+), 41 deletions(-)

-- 
2.34.1




[PATCH v9 8/8] igb: respect VMVIR and VMOLR for VLAN

2023-03-22 Thread Sriram Yagnaraman
Add support for stripping/inserting VLAN for VFs.

Had to move CSUM calculation back into the for loop, since packet data
is pulled inside the loop based on strip VLAN decision for every VF.

net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for
igb. Work for a future patch.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 62 +--
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 27bb4a55e6..52ea2b16c8 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -386,6 +386,28 @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, 
bool tx,
 info->queue = E1000_RSS_QUEUE(>mac[RETA], info->hash);
 }
 
+static void
+igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
+uint16_t vlan, bool insert_vlan)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
+/* always insert default VLAN */
+insert_vlan = true;
+vlan = core->mac[VMVIR0 + pool] & 0x;
+} else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
+insert_vlan = false;
+}
+}
+
+if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
+net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
+core->mac[VET] & 0x);
+}
+}
+
 static bool
 igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
 {
@@ -582,12 +604,11 @@ igb_process_tx_desc(IGBCore *core,
 
 if (cmd_type_len & E1000_TXD_CMD_EOP) {
 if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
-if (cmd_type_len & E1000_TXD_CMD_VLE) {
-idx = (tx->first_olinfo_status >> 4) & 1;
-uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
-uint16_t vet = core->mac[VET] & 0x;
-net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
-}
+idx = (tx->first_olinfo_status >> 4) & 1;
+igb_tx_insert_vlan(core, queue_index, tx,
+tx->ctx[idx].vlan_macip_lens >> 16,
+!!(cmd_type_len & E1000_TXD_CMD_VLE));
+
 if (igb_tx_pkt_send(core, tx, queue_index)) {
 igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
 }
@@ -1544,6 +1565,20 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 igb_update_rx_stats(core, rxi, size, total_size);
 }
 
+static bool
+igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+/* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
+return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
+core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
+core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
+}
+
+return e1000x_vlan_enabled(core->mac);
+}
+
 static inline void
 igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt)
 {
@@ -1624,10 +1659,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 ehdr = PKT_GET_ETH_HDR(filter_buf);
 net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
-
-net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
-   e1000x_vlan_enabled(core->mac),
-   core->mac[VET] & 0x);
+net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
 
 queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
@@ -1635,9 +1667,6 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 return orig_size;
 }
 
-total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
-e1000x_fcs_len(core->mac);
-
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 if (!(queues & BIT(i)) ||
 !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
@@ -1646,6 +1675,13 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 igb_rx_ring_init(core, , i);
 
+net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
+   igb_rx_strip_vlan(core, rxr.i),
+   core->mac[VET] & 0x);
+
+total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
+e1000x_fcs_len(core->mac);
+
 if (!igb_has_rxbufs(core, rxr.i, total_size)) {
 n |= E1000_ICS_RXO;
 trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
-- 
2.34.1




[PATCH v9 7/8] igb: implement VF Tx and Rx stats

2023-03-22 Thread Sriram Yagnaraman
Please note that loopback counters for VM to VM traffic is not
implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index ce81e60558..27bb4a55e6 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -492,7 +492,7 @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int 
queue_index)
 }
 
 static void
-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
+igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
 {
 static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 PTC1023, PTC1522 };
@@ -519,6 +519,13 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt 
*tx_pkt)
 core->mac[GPTC] = core->mac[TPT];
 core->mac[GOTCL] = core->mac[TOTL];
 core->mac[GOTCH] = core->mac[TOTH];
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
+core->mac[PVFGPTC0 + (pool * 64)]++;
+}
 }
 
 static void
@@ -582,7 +589,7 @@ igb_process_tx_desc(IGBCore *core,
 net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
 }
 if (igb_tx_pkt_send(core, tx, queue_index)) {
-igb_on_tx_done_update_stats(core, tx->tx_pkt);
+igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
 }
 }
 
@@ -1406,7 +1413,8 @@ igb_write_to_rx_buffers(IGBCore *core,
 }
 
 static void
-igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
+igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+size_t data_size, size_t data_fcs_size)
 {
 e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
 
@@ -1422,6 +1430,16 @@ igb_update_rx_stats(IGBCore *core, size_t data_size, 
size_t data_fcs_size)
 default:
 break;
 }
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
+core->mac[PVFGPRC0 + (pool * 64)]++;
+if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
+core->mac[PVFMPRC0 + (pool * 64)]++;
+}
+}
 }
 
 static inline bool
@@ -1523,7 +1541,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 
 } while (desc_offset < total_size);
 
-igb_update_rx_stats(core, size, total_size);
+igb_update_rx_stats(core, rxi, size, total_size);
 }
 
 static inline void
-- 
2.34.1




[PATCH v9 6/8] igb: respect E1000_VMOLR_RSSE

2023-03-22 Thread Sriram Yagnaraman
RSS for VFs is only enabled if VMOLR[n].RSSE is set.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 0c5019fd6c..ce81e60558 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1054,8 +1054,15 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 if (queues) {
 igb_rss_parse_packet(core, core->rx_pkt,
  external_tx != NULL, rss_info);
+/* Sec 8.26.1: PQn = VFn + VQn*8 */
 if (rss_info->queue & 1) {
-queues <<= 8;
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) &&
+(core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
+queues |= BIT(i + IGB_NUM_VM_POOLS);
+queues &= ~BIT(i);
+}
+}
 }
 }
 } else {
-- 
2.34.1




RE: [PATCH v8 2/8] igb: handle PF/VF reset properly

2023-03-22 Thread Sriram Yagnaraman
> -Original Message-
> From: Philippe Mathieu-Daudé 
> Sent: Wednesday, 22 March 2023 11:29
> To: Sriram Yagnaraman 
> Cc: qemu-devel@nongnu.org; Akihiko Odaki ;
> Jason Wang ; Dmitry Fleytman
> ; Michael S . Tsirkin ; Marcel
> Apfelbaum 
> Subject: Re: [PATCH v8 2/8] igb: handle PF/VF reset properly
> 
> On 22/3/23 10:26, Sriram Yagnaraman wrote:
> > Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when
> > VF is reset.
> >
> > Signed-off-by: Sriram Yagnaraman 
> > ---
> >   hw/net/igb_core.c   | 33 +
> >   hw/net/igb_regs.h   |  3 +++
> >   hw/net/trace-events |  2 ++
> >   3 files changed, 26 insertions(+), 12 deletions(-)
> >
> > diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> > 596039aab8..fe6c7518e9 100644
> > --- a/hw/net/igb_core.c
> > +++ b/hw/net/igb_core.c
> > @@ -1895,14 +1895,6 @@ static void igb_set_eims(IGBCore *core, int
> index, uint32_t val)
> >   igb_update_interrupt_state(core);
> >   }
> >
> > -static void igb_vf_reset(IGBCore *core, uint16_t vfn) -{
> > -/* TODO: Reset of the queue enable and the interrupt registers of the 
> > VF.
> */
> > -
> > -core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
> > -core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
> > -}
> > -
> >   static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
> >   {
> >   uint32_t ent = core->mac[VTIVAR_MISC + vfn]; @@ -1980,6 +1972,17
> > @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
> >   }
> >   }
> >
> > +static void igb_vf_reset(IGBCore *core, uint16_t vfn) {
> > +/* disable Rx and Tx for the VF*/
> > +core->mac[VFTE] &= ~BIT(vfn);
> > +core->mac[VFRE] &= ~BIT(vfn);
> > +/* indicate VF reset to PF */
> > +core->mac[VFLRE] |= BIT(vfn);
> > +/* VFLRE and mailbox use the same interrupt cause */
> > +mailbox_interrupt_to_pf(core);
> > +}
> 
> Orthogonal to this patch, I'm surprised to see a function named
> igb_vf_reset() which is not called by igb_reset().

Thanks for the pertinent comment, will fix it. On PF reset, the hardware will 
assert RSTI bit on all VF mailboxes, which should in turn trigger a VF reset 
after the PF reset is complete.


[PATCH v8 8/8] igb: respect VMVIR and VMOLR for VLAN

2023-03-22 Thread Sriram Yagnaraman
Add support for stripping/inserting VLAN for VFs.

Had to move CSUM calculation back into the for loop, since packet data
is pulled inside the loop based on strip VLAN decision for every VF.

net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for
igb. Work for a future patch.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 62 +--
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 7e97a01d66..572cae10bd 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -386,6 +386,28 @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, 
bool tx,
 info->queue = E1000_RSS_QUEUE(>mac[RETA], info->hash);
 }
 
+static void
+igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
+uint16_t vlan, bool insert_vlan)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
+/* always insert default VLAN */
+insert_vlan = true;
+vlan = core->mac[VMVIR0 + pool] & 0x;
+} else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
+insert_vlan = false;
+}
+}
+
+if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
+net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
+core->mac[VET] & 0x);
+}
+}
+
 static bool
 igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
 {
@@ -582,12 +604,11 @@ igb_process_tx_desc(IGBCore *core,
 
 if (cmd_type_len & E1000_TXD_CMD_EOP) {
 if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
-if (cmd_type_len & E1000_TXD_CMD_VLE) {
-idx = (tx->first_olinfo_status >> 4) & 1;
-uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
-uint16_t vet = core->mac[VET] & 0x;
-net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
-}
+idx = (tx->first_olinfo_status >> 4) & 1;
+igb_tx_insert_vlan(core, queue_index, tx,
+tx->ctx[idx].vlan_macip_lens >> 16,
+!!(cmd_type_len & E1000_TXD_CMD_VLE));
+
 if (igb_tx_pkt_send(core, tx, queue_index)) {
 igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
 }
@@ -1544,6 +1565,20 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 igb_update_rx_stats(core, rxi, size, total_size);
 }
 
+static bool
+igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+/* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
+return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
+core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
+core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
+}
+
+return e1000x_vlan_enabled(core->mac);
+}
+
 static inline void
 igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt)
 {
@@ -1624,10 +1659,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 ehdr = PKT_GET_ETH_HDR(filter_buf);
 net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
-
-net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
-   e1000x_vlan_enabled(core->mac),
-   core->mac[VET] & 0x);
+net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
 
 queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
@@ -1635,9 +1667,6 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 return orig_size;
 }
 
-total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
-e1000x_fcs_len(core->mac);
-
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 if (!(queues & BIT(i)) ||
 !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
@@ -1646,6 +1675,13 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 igb_rx_ring_init(core, , i);
 
+net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
+   igb_rx_strip_vlan(core, rxr.i),
+   core->mac[VET] & 0x);
+
+total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
+e1000x_fcs_len(core->mac);
+
 if (!igb_has_rxbufs(core, rxr.i, total_size)) {
 n |= E1000_ICS_RXO;
 trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
-- 
2.34.1




[PATCH v8 4/8] igb: implement VFRE and VFTE registers

2023-03-22 Thread Sriram Yagnaraman
Also introduce:
- Checks for RXDCTL/TXDCTL queue enable bits
- IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7)

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 38 +++---
 hw/net/igb_core.h |  1 +
 hw/net/igb_regs.h |  3 +++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index c575d4a615..7c8f665f07 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -783,6 +783,18 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
 return igb_tx_wb_eic(core, txi->idx);
 }
 
+static inline bool
+igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+{
+bool vmdq = core->mac[MRQC] & 1;
+uint16_t qn = txi->idx;
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+return (core->mac[TCTL] & E1000_TCTL_EN) &&
+(!vmdq || core->mac[VFTE] & BIT(pool)) &&
+(core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
+}
+
 static void
 igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 {
@@ -792,8 +804,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 const E1000E_RingInfo *txi = txr->i;
 uint32_t eic = 0;
 
-/* TODO: check if the queue itself is enabled too. */
-if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
+if (!igb_tx_enabled(core, txi)) {
 trace_e1000e_tx_disabled();
 return;
 }
@@ -869,6 +880,9 @@ igb_can_receive(IGBCore *core)
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 E1000E_RxRing rxr;
+if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
+continue;
+}
 
 igb_rx_ring_init(core, , i);
 if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
@@ -935,7 +949,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 
 if (core->mac[MRQC] & 1) {
 if (is_broadcast_ether_addr(ehdr->h_dest)) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
 queues |= BIT(i);
 }
@@ -969,7 +983,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
 f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
 if (macp[f >> 5] & (1 << (f & 0x1f))) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
 queues |= BIT(i);
 }
@@ -992,7 +1006,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 }
 } else {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
 mask |= BIT(i);
 }
@@ -1008,6 +1022,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
 }
 
+queues &= core->mac[VFRE];
 igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
 if (rss_info->queue & 1) {
 queues <<= 8;
@@ -1568,7 +1583,8 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 e1000x_fcs_len(core->mac);
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
-if (!(queues & BIT(i))) {
+if (!(queues & BIT(i)) ||
+!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
 continue;
 }
 
@@ -1974,9 +1990,16 @@ static void igb_set_vfmailbox(IGBCore *core, int index, 
uint32_t val)
 
 static void igb_vf_reset(IGBCore *core, uint16_t vfn)
 {
+uint16_t qn0 = vfn;
+uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
+
 /* disable Rx and Tx for the VF*/
-core->mac[VFTE] &= ~BIT(vfn);
+core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
 core->mac[VFRE] &= ~BIT(vfn);
+core->mac[VFTE] &= ~BIT(vfn);
 /* indicate VF reset to PF */
 core->mac[VFLRE] |= BIT(vfn);
 /* VFLRE and mailbox use the same interrupt cause */
@@ -3911,6 +3934,7 @@ igb_phy_reg_init[] = {
 static const uint32_t igb_mac_reg_init[] = {
 [LEDCTL]= 2 | (3 <<

[PATCH v8 1/8] MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer

2023-03-22 Thread Sriram Yagnaraman
I would like to review and be informed on changes to igb device

Signed-off-by: Sriram Yagnaraman 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9b56ccdd92..a9ed6143f5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2252,6 +2252,7 @@ F: tests/qtest/libqos/e1000e.*
 
 igb
 M: Akihiko Odaki 
+R: Sriram Yagnaraman 
 S: Maintained
 F: docs/system/devices/igb.rst
 F: hw/net/igb*
-- 
2.34.1




[PATCH v8 5/8] igb: check oversized packets for VMDq

2023-03-22 Thread Sriram Yagnaraman
Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 41 -
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 7c8f665f07..1de24ffde2 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -918,12 +918,26 @@ igb_rx_l4_cso_enabled(IGBCore *core)
 return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
 }
 
+static bool
+igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
+{
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
+int max_ethernet_lpe_size =
+core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
+int max_ethernet_vlan_size = 1522;
+
+return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
+}
+
 static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header 
*ehdr,
-   E1000E_RSSInfo *rss_info, bool *external_tx)
+   size_t size, E1000E_RSSInfo *rss_info,
+   bool *external_tx)
 {
 static const int ta_shift[] = { 4, 3, 2, 0 };
 uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
 uint16_t queues = 0;
+uint16_t oversized = 0;
 uint16_t vid = lduw_be_p(_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
 bool accepted = false;
 int i;
@@ -1023,9 +1037,26 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 
 queues &= core->mac[VFRE];
-igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
-if (rss_info->queue & 1) {
-queues <<= 8;
+if (queues) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
+oversized |= BIT(i);
+}
+}
+/* 8.19.37 increment ROC if packet is oversized for all queues */
+if (oversized == queues) {
+trace_e1000x_rx_oversized(size);
+e1000x_inc_reg_if_not_full(core->mac, ROC);
+}
+queues &= ~oversized;
+}
+
+if (queues) {
+igb_rss_parse_packet(core, core->rx_pkt,
+ external_tx != NULL, rss_info);
+if (rss_info->queue & 1) {
+queues <<= 8;
+}
 }
 } else {
 switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
@@ -1573,7 +1604,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
e1000x_vlan_enabled(core->mac),
core->mac[VET] & 0x);
 
-queues = igb_receive_assign(core, ehdr, _info, external_tx);
+queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
 trace_e1000e_rx_flt_dropped();
 return orig_size;
-- 
2.34.1




[PATCH v8 2/8] igb: handle PF/VF reset properly

2023-03-22 Thread Sriram Yagnaraman
Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF
is reset.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c   | 33 +
 hw/net/igb_regs.h   |  3 +++
 hw/net/trace-events |  2 ++
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 596039aab8..fe6c7518e9 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1895,14 +1895,6 @@ static void igb_set_eims(IGBCore *core, int index, 
uint32_t val)
 igb_update_interrupt_state(core);
 }
 
-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
-{
-/* TODO: Reset of the queue enable and the interrupt registers of the VF. 
*/
-
-core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
-core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
-}
-
 static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
 {
 uint32_t ent = core->mac[VTIVAR_MISC + vfn];
@@ -1980,6 +1972,17 @@ static void igb_set_vfmailbox(IGBCore *core, int index, 
uint32_t val)
 }
 }
 
+static void igb_vf_reset(IGBCore *core, uint16_t vfn)
+{
+/* disable Rx and Tx for the VF*/
+core->mac[VFTE] &= ~BIT(vfn);
+core->mac[VFRE] &= ~BIT(vfn);
+/* indicate VF reset to PF */
+core->mac[VFLRE] |= BIT(vfn);
+/* VFLRE and mailbox use the same interrupt cause */
+mailbox_interrupt_to_pf(core);
+}
+
 static void igb_w1c(IGBCore *core, int index, uint32_t val)
 {
 core->mac[index] &= ~val;
@@ -2234,14 +2237,20 @@ igb_set_status(IGBCore *core, int index, uint32_t val)
 static void
 igb_set_ctrlext(IGBCore *core, int index, uint32_t val)
 {
-trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
- !!(val & E1000_CTRL_EXT_SPD_BYPS));
-
-/* TODO: PFRSTD */
+trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
+  !!(val & E1000_CTRL_EXT_SPD_BYPS),
+  !!(val & E1000_CTRL_EXT_PFRSTD));
 
 /* Zero self-clearing bits */
 val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
 core->mac[CTRL_EXT] = val;
+
+if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) {
+for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
+core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
+core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD;
+}
+}
 }
 
 static void
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index 00934d4f20..a658f9b53f 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -240,6 +240,9 @@ union e1000_adv_rx_desc {
 
 /* from igb/e1000_defines.h */
 
+/* Physical Func Reset Done Indication */
+#define E1000_CTRL_EXT_PFRSTD   0x4000
+
 #define E1000_IVAR_VALID 0x80
 #define E1000_GPIE_NSICR 0x0001
 #define E1000_GPIE_MSIX_MODE 0x0010
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 65753411fc..d35554fce8 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -280,6 +280,8 @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: 
PHY[%u] UNHANDLED"
 igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
 igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
 
+igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) 
"Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset 
done: %d"
+
 igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
 igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, 
uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
 
-- 
2.34.1




[PATCH v8 0/8] igb: merge changes from <20221229190817.25500-1-sriram.yagnara...@est.tech>

2023-03-22 Thread Sriram Yagnaraman
Based-on: <20230322042044.25136-1-akihiko.od...@daynix.com>
([PATCH for 8.0 v2] igb: Save more Tx states)

Now that Akhiko's patchset for introducing igb device is merged, I have
rebased my changes on master. The changes proposed here adds support
for 
 - Correct PF/VF reset handling
 - Introduce ICR_RXDW register definition
 - Impement support for VFTE/VFRE/VMOLR_RSSE/VMVIR/VMOLR registers
 - Check oversized packet for VMDq
 - VF statistics

Changes since v7:
- Adapt to "igb: Save more Tx states" patch from Akhiko
- Fix bug in VMVIR patch for inserting vlan, do not overwrite context

Changes since v6:
- Rebased on latest, which includes igb device introduction.

Changes since v5:
- Added back an unecessarily removed empty line

Changes since v4:
- Removed the change implementing VTCTL.IGMAC, it needs more thought
  and implementation of DTXSWC.LLE and VLVF.LVLAN first

Changes since v3:
- Fix comments
- Rebased on latest patchset from Akihiko
- Remove Rx loop improvements that Akihiko has pulled into his patchset

Changes since v2:
- Fixed more comments from Akhiko
- Reordered the patches to make changes easier to understand

Changes since v1:
- Fix review comments from Akihiko


Sriram Yagnaraman (8):
  MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
  igb: handle PF/VF reset properly
  igb: add ICR_RXDW
  igb: implement VFRE and VFTE registers
  igb: check oversized packets for VMDq
  igb: respect E1000_VMOLR_RSSE
  igb: implement VF Tx and Rx stats
  igb: respect VMVIR and VMOLR for VLAN

 MAINTAINERS  |   1 +
 hw/net/e1000x_regs.h |   4 +
 hw/net/igb_core.c| 207 ++-
 hw/net/igb_core.h|   1 +
 hw/net/igb_regs.h|   6 ++
 hw/net/trace-events  |   2 +
 6 files changed, 180 insertions(+), 41 deletions(-)

-- 
2.34.1




[PATCH v8 3/8] igb: add ICR_RXDW

2023-03-22 Thread Sriram Yagnaraman
IGB uses RXDW ICR bit to indicate that rx descriptor has been written
back. This is the same as RXT0 bit in older HW.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/e1000x_regs.h | 4 
 hw/net/igb_core.c| 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
index c0832fa23d..6d3c4c6d3a 100644
--- a/hw/net/e1000x_regs.h
+++ b/hw/net/e1000x_regs.h
@@ -335,6 +335,7 @@
 #define E1000_ICR_RXDMT00x0010 /* rx desc min. threshold (0) */
 #define E1000_ICR_RXO   0x0040 /* rx overrun */
 #define E1000_ICR_RXT0  0x0080 /* rx timer intr (ring 0) */
+#define E1000_ICR_RXDW  0x0080 /* rx desc written back */
 #define E1000_ICR_MDAC  0x0200 /* MDIO access complete */
 #define E1000_ICR_RXCFG 0x0400 /* RX /c/ ordered set */
 #define E1000_ICR_GPI_EN0   0x0800 /* GP Int 0 */
@@ -378,6 +379,7 @@
 #define E1000_ICS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_ICS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_ICS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_ICS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_ICS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_ICS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -407,6 +409,7 @@
 #define E1000_IMS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -441,6 +444,7 @@
 #define E1000_IMC_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMC_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMC_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMC_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMC_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMC_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index fe6c7518e9..c575d4a615 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1580,7 +1580,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 continue;
 }
 
-n |= E1000_ICR_RXT0;
+n |= E1000_ICR_RXDW;
 
 igb_rx_fix_l4_csum(core, core->rx_pkt);
 igb_write_packet_to_guest(core, core->rx_pkt, , _info);
-- 
2.34.1




[PATCH v8 7/8] igb: implement VF Tx and Rx stats

2023-03-22 Thread Sriram Yagnaraman
Please note that loopback counters for VM to VM traffic is not
implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index fd38c7c56c..7e97a01d66 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -492,7 +492,7 @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int 
queue_index)
 }
 
 static void
-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
+igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
 {
 static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 PTC1023, PTC1522 };
@@ -519,6 +519,13 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt 
*tx_pkt)
 core->mac[GPTC] = core->mac[TPT];
 core->mac[GOTCL] = core->mac[TOTL];
 core->mac[GOTCH] = core->mac[TOTH];
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
+core->mac[PVFGPTC0 + (pool * 64)]++;
+}
 }
 
 static void
@@ -582,7 +589,7 @@ igb_process_tx_desc(IGBCore *core,
 net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
 }
 if (igb_tx_pkt_send(core, tx, queue_index)) {
-igb_on_tx_done_update_stats(core, tx->tx_pkt);
+igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
 }
 }
 
@@ -1406,7 +1413,8 @@ igb_write_to_rx_buffers(IGBCore *core,
 }
 
 static void
-igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
+igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+size_t data_size, size_t data_fcs_size)
 {
 e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
 
@@ -1422,6 +1430,16 @@ igb_update_rx_stats(IGBCore *core, size_t data_size, 
size_t data_fcs_size)
 default:
 break;
 }
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
+core->mac[PVFGPRC0 + (pool * 64)]++;
+if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
+core->mac[PVFMPRC0 + (pool * 64)]++;
+}
+}
 }
 
 static inline bool
@@ -1523,7 +1541,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 
 } while (desc_offset < total_size);
 
-igb_update_rx_stats(core, size, total_size);
+igb_update_rx_stats(core, rxi, size, total_size);
 }
 
 static inline void
-- 
2.34.1




[PATCH v8 6/8] igb: respect E1000_VMOLR_RSSE

2023-03-22 Thread Sriram Yagnaraman
RSS for VFs is only enabled if VMOLR[n].RSSE is set.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 1de24ffde2..fd38c7c56c 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1054,8 +1054,15 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 if (queues) {
 igb_rss_parse_packet(core, core->rx_pkt,
  external_tx != NULL, rss_info);
+/* Sec 8.26.1: PQn = VFn + VQn*8 */
 if (rss_info->queue & 1) {
-queues <<= 8;
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) &&
+(core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
+queues |= BIT(i + IGB_NUM_VM_POOLS);
+queues &= ~BIT(i);
+}
+}
 }
 }
 } else {
-- 
2.34.1




RE: [PATCH for 8.0] igb: Save more Tx states

2023-03-21 Thread Sriram Yagnaraman

> -Original Message-
> From: qemu-devel-bounces+sriram.yagnaraman=est.t...@nongnu.org
>  On Behalf
> Of Sriram Yagnaraman
> Sent: Friday, 17 March 2023 16:26
> To: Akihiko Odaki 
> Cc: qemu-devel@nongnu.org; Jason Wang ; Dmitry
> Fleytman ; quint...@redhat.com; Philippe
> Mathieu-Daudé 
> Subject: RE: [PATCH for 8.0] igb: Save more Tx states
> 
> 
> > -Original Message-
> > From: Akihiko Odaki 
> > Sent: Friday, 17 March 2023 15:21
> > To: Sriram Yagnaraman 
> > Cc: qemu-devel@nongnu.org; Jason Wang ; Dmitry
> > Fleytman ; quint...@redhat.com; Philippe
> > Mathieu-Daudé 
> > Subject: Re: [PATCH for 8.0] igb: Save more Tx states
> >
> > On 2023/03/17 22:08, Sriram Yagnaraman wrote:
> > >
> > >
> > >> -Original Message-
> > >> From: Akihiko Odaki 
> > >> Sent: Friday, 17 March 2023 13:25
> > >> Cc: qemu-devel@nongnu.org; Jason Wang ;
> > Dmitry
> > >> Fleytman ; quint...@redhat.com; Philippe
> > >> Mathieu-Daudé ; Sriram Yagnaraman
> > >> ; Akihiko Odaki
> > >> 
> > >> Subject: [PATCH for 8.0] igb: Save more Tx states
> > >>
> > >> The current implementation of igb uses only part of a advanced Tx
> > >> context descriptor and first data descriptor because it misses some
> > >> features and sniffs the trait of the packet instead of respecting
> > >> the packet type specified in the descriptor. However, we will
> > >> certainly need the entire Tx context descriptor when we update igb
> > >> to respect these ignored fields. Save the entire context descriptor
> > >> and first data descriptor except the buffer address to prepare for such a
> change.
> > >>
> > >> This also introduces the distinction of contexts with different
> > >> indexes, which was not present in e1000e but in igb.
> > >>
> > >> Signed-off-by: Akihiko Odaki 
> > >> ---
> > >> Supersedes: <20230316155707.27007-1-akihiko.od...@daynix.com>
> > >>
> > >>   hw/net/igb.c  | 25 ++---
> > >>   hw/net/igb_core.c | 36 +++-
> > >>   hw/net/igb_core.h |  8 +++-
> > >>   3 files changed, 40 insertions(+), 29 deletions(-)
> > >>
> > >> diff --git a/hw/net/igb.c b/hw/net/igb.c index
> > >> c6d753df87..7c05896325
> > >> 100644
> > >> --- a/hw/net/igb.c
> > >> +++ b/hw/net/igb.c
> > >> @@ -502,16 +502,27 @@ static int igb_post_load(void *opaque, int
> > >> version_id)
> > >>   return igb_core_post_load(>core);  }
> > >>
> > >> -static const VMStateDescription igb_vmstate_tx = {
> > >> -.name = "igb-tx",
> > >> +static const VMStateDescription igb_vmstate_tx_ctx = {
> > >> +.name = "igb-tx-ctx",
> > >>   .version_id = 1,
> > >>   .minimum_version_id = 1,
> > >>   .fields = (VMStateField[]) {
> > >> -VMSTATE_UINT16(vlan, struct igb_tx),
> > >> -VMSTATE_UINT16(mss, struct igb_tx),
> > >> -VMSTATE_BOOL(tse, struct igb_tx),
> > >> -VMSTATE_BOOL(ixsm, struct igb_tx),
> > >> -VMSTATE_BOOL(txsm, struct igb_tx),
> > >> +VMSTATE_UINT32(vlan_macip_lens, struct
> > e1000_adv_tx_context_desc),
> > >> +VMSTATE_UINT32(seqnum_seed, struct
> e1000_adv_tx_context_desc),
> > >> +VMSTATE_UINT32(type_tucmd_mlhl, struct
> > >> e1000_adv_tx_context_desc),
> > >> +VMSTATE_UINT32(mss_l4len_idx, struct
> e1000_adv_tx_context_desc),
> > >> +}
> > >> +};
> > >> +
> > >> +static const VMStateDescription igb_vmstate_tx = {
> > >> +.name = "igb-tx",
> > >> +.version_id = 2,
> > >> +.minimum_version_id = 2,
> > >> +.fields = (VMStateField[]) {
> > >> +VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0,
> igb_vmstate_tx_ctx,
> > >> + struct e1000_adv_tx_context_desc),
> > >> +VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
> > >> +VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
> > >>   VMSTATE_BOOL(first, struct igb_tx),
> > >>   VMSTATE_BOOL(skip_cp, struct igb_tx),
> > >>   VMSTATE_END_OF_LIS

RE: [PATCH for 8.0] igb: Save more Tx states

2023-03-17 Thread Sriram Yagnaraman

> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 17 March 2023 15:21
> To: Sriram Yagnaraman 
> Cc: qemu-devel@nongnu.org; Jason Wang ; Dmitry
> Fleytman ; quint...@redhat.com; Philippe
> Mathieu-Daudé 
> Subject: Re: [PATCH for 8.0] igb: Save more Tx states
> 
> On 2023/03/17 22:08, Sriram Yagnaraman wrote:
> >
> >
> >> -Original Message-
> >> From: Akihiko Odaki 
> >> Sent: Friday, 17 March 2023 13:25
> >> Cc: qemu-devel@nongnu.org; Jason Wang ;
> Dmitry
> >> Fleytman ; quint...@redhat.com; Philippe
> >> Mathieu-Daudé ; Sriram Yagnaraman
> >> ; Akihiko Odaki
> >> 
> >> Subject: [PATCH for 8.0] igb: Save more Tx states
> >>
> >> The current implementation of igb uses only part of a advanced Tx
> >> context descriptor and first data descriptor because it misses some
> >> features and sniffs the trait of the packet instead of respecting the
> >> packet type specified in the descriptor. However, we will certainly
> >> need the entire Tx context descriptor when we update igb to respect
> >> these ignored fields. Save the entire context descriptor and first
> >> data descriptor except the buffer address to prepare for such a change.
> >>
> >> This also introduces the distinction of contexts with different
> >> indexes, which was not present in e1000e but in igb.
> >>
> >> Signed-off-by: Akihiko Odaki 
> >> ---
> >> Supersedes: <20230316155707.27007-1-akihiko.od...@daynix.com>
> >>
> >>   hw/net/igb.c  | 25 ++---
> >>   hw/net/igb_core.c | 36 +++-
> >>   hw/net/igb_core.h |  8 +++-
> >>   3 files changed, 40 insertions(+), 29 deletions(-)
> >>
> >> diff --git a/hw/net/igb.c b/hw/net/igb.c index c6d753df87..7c05896325
> >> 100644
> >> --- a/hw/net/igb.c
> >> +++ b/hw/net/igb.c
> >> @@ -502,16 +502,27 @@ static int igb_post_load(void *opaque, int
> >> version_id)
> >>   return igb_core_post_load(>core);  }
> >>
> >> -static const VMStateDescription igb_vmstate_tx = {
> >> -.name = "igb-tx",
> >> +static const VMStateDescription igb_vmstate_tx_ctx = {
> >> +.name = "igb-tx-ctx",
> >>   .version_id = 1,
> >>   .minimum_version_id = 1,
> >>   .fields = (VMStateField[]) {
> >> -VMSTATE_UINT16(vlan, struct igb_tx),
> >> -VMSTATE_UINT16(mss, struct igb_tx),
> >> -VMSTATE_BOOL(tse, struct igb_tx),
> >> -VMSTATE_BOOL(ixsm, struct igb_tx),
> >> -VMSTATE_BOOL(txsm, struct igb_tx),
> >> +VMSTATE_UINT32(vlan_macip_lens, struct
> e1000_adv_tx_context_desc),
> >> +VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc),
> >> +VMSTATE_UINT32(type_tucmd_mlhl, struct
> >> e1000_adv_tx_context_desc),
> >> +VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc),
> >> +}
> >> +};
> >> +
> >> +static const VMStateDescription igb_vmstate_tx = {
> >> +.name = "igb-tx",
> >> +.version_id = 2,
> >> +.minimum_version_id = 2,
> >> +.fields = (VMStateField[]) {
> >> +VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx,
> >> + struct e1000_adv_tx_context_desc),
> >> +VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
> >> +VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
> >>   VMSTATE_BOOL(first, struct igb_tx),
> >>   VMSTATE_BOOL(skip_cp, struct igb_tx),
> >>   VMSTATE_END_OF_LIST()
> >> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> >> a7c7bfdc75..36027c2b54 100644
> >> --- a/hw/net/igb_core.c
> >> +++ b/hw/net/igb_core.c
> >> @@ -389,8 +389,10 @@ igb_rss_parse_packet(IGBCore *core, struct
> >> NetRxPkt *pkt, bool tx,  static bool  igb_setup_tx_offloads(IGBCore
> >> *core, struct igb_tx
> >> *tx)  {
> >> -if (tx->tse) {
> >> -if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
> >> +if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
> >> +uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
> >
> > [...] More below
> >
> >> +uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
> >> +if (

RE: [PATCH for 8.0] igb: Save more Tx states

2023-03-17 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 17 March 2023 13:25
> Cc: qemu-devel@nongnu.org; Jason Wang ; Dmitry
> Fleytman ; quint...@redhat.com; Philippe
> Mathieu-Daudé ; Sriram Yagnaraman
> ; Akihiko Odaki 
> Subject: [PATCH for 8.0] igb: Save more Tx states
> 
> The current implementation of igb uses only part of a advanced Tx context
> descriptor and first data descriptor because it misses some features and 
> sniffs
> the trait of the packet instead of respecting the packet type specified in the
> descriptor. However, we will certainly need the entire Tx context descriptor
> when we update igb to respect these ignored fields. Save the entire context
> descriptor and first data descriptor except the buffer address to prepare for
> such a change.
> 
> This also introduces the distinction of contexts with different indexes, which
> was not present in e1000e but in igb.
> 
> Signed-off-by: Akihiko Odaki 
> ---
> Supersedes: <20230316155707.27007-1-akihiko.od...@daynix.com>
> 
>  hw/net/igb.c  | 25 ++---
>  hw/net/igb_core.c | 36 +++-
>  hw/net/igb_core.h |  8 +++-
>  3 files changed, 40 insertions(+), 29 deletions(-)
> 
> diff --git a/hw/net/igb.c b/hw/net/igb.c index c6d753df87..7c05896325
> 100644
> --- a/hw/net/igb.c
> +++ b/hw/net/igb.c
> @@ -502,16 +502,27 @@ static int igb_post_load(void *opaque, int
> version_id)
>  return igb_core_post_load(>core);  }
> 
> -static const VMStateDescription igb_vmstate_tx = {
> -.name = "igb-tx",
> +static const VMStateDescription igb_vmstate_tx_ctx = {
> +.name = "igb-tx-ctx",
>  .version_id = 1,
>  .minimum_version_id = 1,
>  .fields = (VMStateField[]) {
> -VMSTATE_UINT16(vlan, struct igb_tx),
> -VMSTATE_UINT16(mss, struct igb_tx),
> -VMSTATE_BOOL(tse, struct igb_tx),
> -VMSTATE_BOOL(ixsm, struct igb_tx),
> -VMSTATE_BOOL(txsm, struct igb_tx),
> +VMSTATE_UINT32(vlan_macip_lens, struct e1000_adv_tx_context_desc),
> +VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc),
> +VMSTATE_UINT32(type_tucmd_mlhl, struct
> e1000_adv_tx_context_desc),
> +VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc),
> +}
> +};
> +
> +static const VMStateDescription igb_vmstate_tx = {
> +.name = "igb-tx",
> +.version_id = 2,
> +.minimum_version_id = 2,
> +.fields = (VMStateField[]) {
> +VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx,
> + struct e1000_adv_tx_context_desc),
> +VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
> +VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
>  VMSTATE_BOOL(first, struct igb_tx),
>  VMSTATE_BOOL(skip_cp, struct igb_tx),
>  VMSTATE_END_OF_LIST()
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index
> a7c7bfdc75..36027c2b54 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -389,8 +389,10 @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt
> *pkt, bool tx,  static bool  igb_setup_tx_offloads(IGBCore *core, struct 
> igb_tx
> *tx)  {
> -if (tx->tse) {
> -if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
> +if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
> +uint32_t idx = (tx->first_olinfo_status >> 4) & 1;

[...] More below

> +uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
> +if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
>  return false;
>  }
> 
> @@ -399,13 +401,13 @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx
> *tx)
>  return true;
>  }
> 
> -if (tx->txsm) {
> +if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) {
>  if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) {
>  return false;
>  }
>  }
> 
> -if (tx->ixsm) {
> +if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) {
>  net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
>  }
> 
> @@ -527,7 +529,7 @@ igb_process_tx_desc(IGBCore *core,  {
>  struct e1000_adv_tx_context_desc *tx_ctx_desc;
>  uint32_t cmd_type_len;
> -uint32_t olinfo_status;
> +uint32_t idx;
>  uint64_t buffer_addr;
>  uint16_t length;
> 
> @@ -538,20 +540,19 @@ igb_process_tx_desc(IGBCore *core,
>  E1000_ADVTXD_DTYP_DATA) {
>  /* advanced transmit data descriptor */
>  if (tx->first) {
> -   

RE: [PATCH for 8.0 v2] igb: Save the entire Tx context descriptor

2023-03-17 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 17 March 2023 12:13
> To: Sriram Yagnaraman 
> Cc: qemu-devel@nongnu.org; Jason Wang ; Dmitry
> Fleytman ; quint...@redhat.com; Philippe
> Mathieu-Daudé 
> Subject: Re: [PATCH for 8.0 v2] igb: Save the entire Tx context descriptor
> 
> On 2023/03/17 18:19, Sriram Yagnaraman wrote:
> >
> >
> >> -Original Message-
> >> From: Akihiko Odaki 
> >> Sent: Friday, 17 March 2023 06:46
> >> To: Sriram Yagnaraman 
> >> Cc: qemu-devel@nongnu.org; Jason Wang ;
> Dmitry
> >> Fleytman ; quint...@redhat.com; Philippe
> >> Mathieu-Daudé 
> >> Subject: Re: [PATCH for 8.0 v2] igb: Save the entire Tx context
> >> descriptor
> >>
> >> On 2023/03/17 5:27, Sriram Yagnaraman wrote:
> >>>
> >>>> -Original Message-
> >>>> From: qemu-devel-bounces+sriram.yagnaraman=est.t...@nongnu.org
> >>>>  On
> >> Behalf
> >>>> Of Akihiko Odaki
> >>>> Sent: Thursday, 16 March 2023 16:57
> >>>> Cc: qemu-devel@nongnu.org; Jason Wang ;
> >> Dmitry
> >>>> Fleytman ; quint...@redhat.com; Philippe
> >>>> Mathieu-Daudé ; Akihiko Odaki
> >>>> 
> >>>> Subject: [PATCH for 8.0 v2] igb: Save the entire Tx context
> >>>> descriptor
> >>>>
> >>>> The current implementation of igb uses only part of a advanced Tx
> >>>> context descriptor because it misses some features and sniffs the
> >>>> trait of the packet instead of respecting the packet type specified
> >>>> in the descriptor. However, we will certainly need the entire Tx
> >>>> context descriptor when we update igb to respect these ignored
> >>>> fields. Save the entire Tx context descriptor to prepare for such a 
> >>>> change.
> >>>>
> >>>> Signed-off-by: Akihiko Odaki 
> >>>> ---
> >>>> V1 -> V2: Bump igb-tx version
> >>>>
> >>>>hw/net/igb.c  | 10 ++
> >>>>hw/net/igb_core.c | 17 ++---  hw/net/igb_core.h |  3 +--
> >>>>3 files changed, 17 insertions(+), 13 deletions(-)
> >>>>
> >>>> diff --git a/hw/net/igb.c b/hw/net/igb.c index
> >>>> c6d753df87..f9ec82fc28
> >>>> 100644
> >>>> --- a/hw/net/igb.c
> >>>> +++ b/hw/net/igb.c
> >>>> @@ -504,11 +504,13 @@ static int igb_post_load(void *opaque, int
> >>>> version_id)
> >>>>
> >>>>static const VMStateDescription igb_vmstate_tx = {
> >>>>.name = "igb-tx",
> >>>> -.version_id = 1,
> >>>> -.minimum_version_id = 1,
> >>>> +.version_id = 2,
> >>>> +.minimum_version_id = 2,
> >>>>.fields = (VMStateField[]) {
> >>>> -VMSTATE_UINT16(vlan, struct igb_tx),
> >>>> -VMSTATE_UINT16(mss, struct igb_tx),
> >>>> +VMSTATE_UINT32(ctx.vlan_macip_lens, struct igb_tx),
> >>>> +VMSTATE_UINT32(ctx.seqnum_seed, struct igb_tx),
> >>>> +VMSTATE_UINT32(ctx.type_tucmd_mlhl, struct igb_tx),
> >>>> +VMSTATE_UINT32(ctx.mss_l4len_idx, struct igb_tx),
> >>>>VMSTATE_BOOL(tse, struct igb_tx),
> >>>>VMSTATE_BOOL(ixsm, struct igb_tx),
> >>>>VMSTATE_BOOL(txsm, struct igb_tx), diff --git
> >>>> a/hw/net/igb_core.c b/hw/net/igb_core.c index
> >>>> a7c7bfdc75..304f5d849f
> >>>> 100644
> >>>> --- a/hw/net/igb_core.c
> >>>> +++ b/hw/net/igb_core.c
> >>>> @@ -390,7 +390,8 @@ static bool
> >>>>igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)  {
> >>>>if (tx->tse) {
> >>>> -if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) 
> >>>> {
> >>>> +uint32_t mss = tx->ctx.mss_l4len_idx >> 16;
> >>>> +if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true,
> >>>> + mss)) {
> >>>>return false;
> >>>>}
> >>>>
> >>>> @@ -550,8 +551,10 @@ igb_process_tx_desc(IGBCore *core,
> >>>>   E1000_ADVTXD_DTYP_CTXT) {
> >>>>   

RE: [PATCH for 8.0 v2] igb: Save the entire Tx context descriptor

2023-03-17 Thread Sriram Yagnaraman


> -Original Message-
> From: Akihiko Odaki 
> Sent: Friday, 17 March 2023 06:46
> To: Sriram Yagnaraman 
> Cc: qemu-devel@nongnu.org; Jason Wang ; Dmitry
> Fleytman ; quint...@redhat.com; Philippe
> Mathieu-Daudé 
> Subject: Re: [PATCH for 8.0 v2] igb: Save the entire Tx context descriptor
> 
> On 2023/03/17 5:27, Sriram Yagnaraman wrote:
> >
> >> -Original Message-
> >> From: qemu-devel-bounces+sriram.yagnaraman=est.t...@nongnu.org
> >>  On
> Behalf
> >> Of Akihiko Odaki
> >> Sent: Thursday, 16 March 2023 16:57
> >> Cc: qemu-devel@nongnu.org; Jason Wang ;
> Dmitry
> >> Fleytman ; quint...@redhat.com; Philippe
> >> Mathieu-Daudé ; Akihiko Odaki
> >> 
> >> Subject: [PATCH for 8.0 v2] igb: Save the entire Tx context
> >> descriptor
> >>
> >> The current implementation of igb uses only part of a advanced Tx
> >> context descriptor because it misses some features and sniffs the
> >> trait of the packet instead of respecting the packet type specified
> >> in the descriptor. However, we will certainly need the entire Tx
> >> context descriptor when we update igb to respect these ignored
> >> fields. Save the entire Tx context descriptor to prepare for such a change.
> >>
> >> Signed-off-by: Akihiko Odaki 
> >> ---
> >> V1 -> V2: Bump igb-tx version
> >>
> >>   hw/net/igb.c  | 10 ++
> >>   hw/net/igb_core.c | 17 ++---  hw/net/igb_core.h |  3 +--
> >>   3 files changed, 17 insertions(+), 13 deletions(-)
> >>
> >> diff --git a/hw/net/igb.c b/hw/net/igb.c index c6d753df87..f9ec82fc28
> >> 100644
> >> --- a/hw/net/igb.c
> >> +++ b/hw/net/igb.c
> >> @@ -504,11 +504,13 @@ static int igb_post_load(void *opaque, int
> >> version_id)
> >>
> >>   static const VMStateDescription igb_vmstate_tx = {
> >>   .name = "igb-tx",
> >> -.version_id = 1,
> >> -.minimum_version_id = 1,
> >> +.version_id = 2,
> >> +.minimum_version_id = 2,
> >>   .fields = (VMStateField[]) {
> >> -VMSTATE_UINT16(vlan, struct igb_tx),
> >> -VMSTATE_UINT16(mss, struct igb_tx),
> >> +VMSTATE_UINT32(ctx.vlan_macip_lens, struct igb_tx),
> >> +VMSTATE_UINT32(ctx.seqnum_seed, struct igb_tx),
> >> +VMSTATE_UINT32(ctx.type_tucmd_mlhl, struct igb_tx),
> >> +VMSTATE_UINT32(ctx.mss_l4len_idx, struct igb_tx),
> >>   VMSTATE_BOOL(tse, struct igb_tx),
> >>   VMSTATE_BOOL(ixsm, struct igb_tx),
> >>   VMSTATE_BOOL(txsm, struct igb_tx), diff --git
> >> a/hw/net/igb_core.c b/hw/net/igb_core.c index a7c7bfdc75..304f5d849f
> >> 100644
> >> --- a/hw/net/igb_core.c
> >> +++ b/hw/net/igb_core.c
> >> @@ -390,7 +390,8 @@ static bool
> >>   igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)  {
> >>   if (tx->tse) {
> >> -if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
> >> +uint32_t mss = tx->ctx.mss_l4len_idx >> 16;
> >> +if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss))
> >> + {
> >>   return false;
> >>   }
> >>
> >> @@ -550,8 +551,10 @@ igb_process_tx_desc(IGBCore *core,
> >>  E1000_ADVTXD_DTYP_CTXT) {
> >>   /* advanced transmit context descriptor */
> >>   tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc;
> >> -tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16;
> >> -tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16;
> >> +tx->ctx.vlan_macip_lens = le32_to_cpu(tx_ctx_desc-
> >vlan_macip_lens);
> >> +tx->ctx.seqnum_seed = le32_to_cpu(tx_ctx_desc->seqnum_seed);
> >> +tx->ctx.type_tucmd_mlhl = le32_to_cpu(tx_ctx_desc-
> >>> type_tucmd_mlhl);
> >> +tx->ctx.mss_l4len_idx =
> >> + le32_to_cpu(tx_ctx_desc->mss_l4len_idx);
> >
> > Wouldn't it be better to parse the context into all the required fields 
> > like vlan,
> mss, etc., already when handling the context descriptor, instead of parsing 
> it for
> every data descriptor later?
> > Also, in my yet to be merged patch [1] which handles VLAN insertion for
> VMDq I use the vlan field in multiple places, so it would be better to have 
> the

RE: [PATCH for 8.0 v2] igb: Save the entire Tx context descriptor

2023-03-16 Thread Sriram Yagnaraman

> -Original Message-
> From: qemu-devel-bounces+sriram.yagnaraman=est.t...@nongnu.org
>  On Behalf
> Of Akihiko Odaki
> Sent: Thursday, 16 March 2023 16:57
> Cc: qemu-devel@nongnu.org; Jason Wang ; Dmitry
> Fleytman ; quint...@redhat.com; Philippe
> Mathieu-Daudé ; Akihiko Odaki
> 
> Subject: [PATCH for 8.0 v2] igb: Save the entire Tx context descriptor
> 
> The current implementation of igb uses only part of a advanced Tx context
> descriptor because it misses some features and sniffs the trait of the packet
> instead of respecting the packet type specified in the descriptor. However, we
> will certainly need the entire Tx context descriptor when we update igb to
> respect these ignored fields. Save the entire Tx context descriptor to prepare
> for such a change.
> 
> Signed-off-by: Akihiko Odaki 
> ---
> V1 -> V2: Bump igb-tx version
> 
>  hw/net/igb.c  | 10 ++
>  hw/net/igb_core.c | 17 ++---  hw/net/igb_core.h |  3 +--
>  3 files changed, 17 insertions(+), 13 deletions(-)
> 
> diff --git a/hw/net/igb.c b/hw/net/igb.c index c6d753df87..f9ec82fc28 100644
> --- a/hw/net/igb.c
> +++ b/hw/net/igb.c
> @@ -504,11 +504,13 @@ static int igb_post_load(void *opaque, int
> version_id)
> 
>  static const VMStateDescription igb_vmstate_tx = {
>  .name = "igb-tx",
> -.version_id = 1,
> -.minimum_version_id = 1,
> +.version_id = 2,
> +.minimum_version_id = 2,
>  .fields = (VMStateField[]) {
> -VMSTATE_UINT16(vlan, struct igb_tx),
> -VMSTATE_UINT16(mss, struct igb_tx),
> +VMSTATE_UINT32(ctx.vlan_macip_lens, struct igb_tx),
> +VMSTATE_UINT32(ctx.seqnum_seed, struct igb_tx),
> +VMSTATE_UINT32(ctx.type_tucmd_mlhl, struct igb_tx),
> +VMSTATE_UINT32(ctx.mss_l4len_idx, struct igb_tx),
>  VMSTATE_BOOL(tse, struct igb_tx),
>  VMSTATE_BOOL(ixsm, struct igb_tx),
>  VMSTATE_BOOL(txsm, struct igb_tx), diff --git a/hw/net/igb_core.c
> b/hw/net/igb_core.c index a7c7bfdc75..304f5d849f 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -390,7 +390,8 @@ static bool
>  igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)  {
>  if (tx->tse) {
> -if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
> +uint32_t mss = tx->ctx.mss_l4len_idx >> 16;
> +if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
>  return false;
>  }
> 
> @@ -550,8 +551,10 @@ igb_process_tx_desc(IGBCore *core,
> E1000_ADVTXD_DTYP_CTXT) {
>  /* advanced transmit context descriptor */
>  tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc;
> -tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16;
> -tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16;
> +tx->ctx.vlan_macip_lens = 
> le32_to_cpu(tx_ctx_desc->vlan_macip_lens);
> +tx->ctx.seqnum_seed = le32_to_cpu(tx_ctx_desc->seqnum_seed);
> +tx->ctx.type_tucmd_mlhl = le32_to_cpu(tx_ctx_desc-
> >type_tucmd_mlhl);
> +tx->ctx.mss_l4len_idx =
> + le32_to_cpu(tx_ctx_desc->mss_l4len_idx);

Wouldn't it be better to parse the context into all the required fields like 
vlan, mss, etc., already when handling the context descriptor, instead of 
parsing it for every data descriptor later?
Also, in my yet to be merged patch [1] which handles VLAN insertion for VMDq I 
use the vlan field in multiple places, so it would be better to have the vlan 
value readily available. 
[1]: https://lists.gnu.org/archive/html/qemu-devel/2023-02/msg00393.html

>  return;
>  } else {
>  /* unknown descriptor type */ @@ -575,8 +578,9 @@
> igb_process_tx_desc(IGBCore *core,
>  if (cmd_type_len & E1000_TXD_CMD_EOP) {
>  if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
>  if (cmd_type_len & E1000_TXD_CMD_VLE) {
> -net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
> -core->mac[VET] & 0x);
> +uint16_t vlan = tx->ctx.vlan_macip_lens >> 16;
> +uint16_t vet = core->mac[VET] & 0x;
> +net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
>  }
>  if (igb_tx_pkt_send(core, tx, queue_index)) {
>  igb_on_tx_done_update_stats(core, tx->tx_pkt); @@ -4024,8
> +4028,7 @@ static void igb_reset(IGBCore *core, bool sw)
>  for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
>  tx = >tx[i];
>  net_tx_pkt_reset(tx->tx_pkt);
> -tx->vlan = 0;
> -tx->mss = 0;
> +memset(>ctx, 0, sizeof(tx->ctx));
>  tx->tse = false;
>  tx->ixsm = false;
>  tx->txsm = false;
> diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h index
> 814c1e264b..3483edc655 100644
> --- a/hw/net/igb_core.h
> +++ b/hw/net/igb_core.h
> @@ -72,8 +72,7 @@ struct IGBCore {
>  QEMUTimer *autoneg_timer;
> 
>  struct 

[PATCH v7 6/8] igb: respect E1000_VMOLR_RSSE

2023-03-13 Thread Sriram Yagnaraman
RSS for VFs is only enabled if VMOLR[n].RSSE is set.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index cb6993dd3b..f91cea20e5 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1051,8 +1051,15 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 if (queues) {
 igb_rss_parse_packet(core, core->rx_pkt,
  external_tx != NULL, rss_info);
+/* Sec 8.26.1: PQn = VFn + VQn*8 */
 if (rss_info->queue & 1) {
-queues <<= 8;
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) &&
+(core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
+queues |= BIT(i + IGB_NUM_VM_POOLS);
+queues &= ~BIT(i);
+}
+}
 }
 }
 } else {
-- 
2.34.1




[PATCH v7 0/8] igb: merge changes from <20221229190817.25500-1-sriram.yagnara...@est.tech>

2023-03-12 Thread Sriram Yagnaraman
Now that Akhiko's patchset for introducing igb device is merged, I have
rebased my changes on master. The changes proposed here adds support
for 
 - Correct PF/VF reset handling
 - Introduce ICR_RXDW register definition
 - Impement support for VFTE/VFRE/VMOLR_RSSE/VMVIR/VMOLR registers
 - Check oversized packet for VMDq
 - VF statistics

Changes since v6:
- Rebased on latest, which includes igb device introduction.

Changes since v5:
- Added back an unecessarily removed empty line

Changes since v4:
- Removed the change implementing VTCTL.IGMAC, it needs more thought
  and implementation of DTXSWC.LLE and VLVF.LVLAN first

Changes since v3:
- Fix comments
- Rebased on latest patchset from Akihiko
- Remove Rx loop improvements that Akihiko has pulled into his patchset

Changes since v2:
- Fixed more comments from Akhiko
- Reordered the patches to make changes easier to understand

Changes since v1:
- Fix review comments from Akihiko

Sriram Yagnaraman (8):
  MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
  igb: handle PF/VF reset properly
  igb: add ICR_RXDW
  igb: implement VFRE and VFTE registers
  igb: check oversized packets for VMDq
  igb: respect E1000_VMOLR_RSSE
  igb: implement VF Tx and Rx stats
  igb: respect VMVIR and VMOLR for VLAN

 MAINTAINERS  |   1 +
 hw/net/e1000x_regs.h |   4 +
 hw/net/igb_core.c| 196 +++
 hw/net/igb_core.h|   1 +
 hw/net/igb_regs.h|   6 ++
 hw/net/trace-events  |   2 +
 6 files changed, 174 insertions(+), 36 deletions(-)

-- 
2.34.1




[PATCH v7 4/8] igb: implement VFRE and VFTE registers

2023-03-12 Thread Sriram Yagnaraman
Also introduce:
- Checks for RXDCTL/TXDCTL queue enable bits
- IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7)

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 38 +++---
 hw/net/igb_core.h |  1 +
 hw/net/igb_regs.h |  3 +++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 2a16692448..b2ea3a18ed 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -780,6 +780,18 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
 return igb_tx_wb_eic(core, txi->idx);
 }
 
+static inline bool
+igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+{
+bool vmdq = core->mac[MRQC] & 1;
+uint16_t qn = txi->idx;
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+return (core->mac[TCTL] & E1000_TCTL_EN) &&
+(!vmdq || core->mac[VFTE] & BIT(pool)) &&
+(core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
+}
+
 static void
 igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 {
@@ -789,8 +801,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 const E1000E_RingInfo *txi = txr->i;
 uint32_t eic = 0;
 
-/* TODO: check if the queue itself is enabled too. */
-if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
+if (!igb_tx_enabled(core, txi)) {
 trace_e1000e_tx_disabled();
 return;
 }
@@ -866,6 +877,9 @@ igb_can_receive(IGBCore *core)
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 E1000E_RxRing rxr;
+if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
+continue;
+}
 
 igb_rx_ring_init(core, , i);
 if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
@@ -932,7 +946,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 
 if (core->mac[MRQC] & 1) {
 if (is_broadcast_ether_addr(ehdr->h_dest)) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
 queues |= BIT(i);
 }
@@ -966,7 +980,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
 f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
 if (macp[f >> 5] & (1 << (f & 0x1f))) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
 queues |= BIT(i);
 }
@@ -989,7 +1003,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 }
 } else {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
 mask |= BIT(i);
 }
@@ -1005,6 +1019,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
 }
 
+queues &= core->mac[VFRE];
 igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
 if (rss_info->queue & 1) {
 queues <<= 8;
@@ -1565,7 +1580,8 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 e1000x_fcs_len(core->mac);
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
-if (!(queues & BIT(i))) {
+if (!(queues & BIT(i)) ||
+!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
 continue;
 }
 
@@ -1971,9 +1987,16 @@ static void igb_set_vfmailbox(IGBCore *core, int index, 
uint32_t val)
 
 static void igb_vf_reset(IGBCore *core, uint16_t vfn)
 {
+uint16_t qn0 = vfn;
+uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
+
 /* disable Rx and Tx for the VF*/
-core->mac[VFTE] &= ~BIT(vfn);
+core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
 core->mac[VFRE] &= ~BIT(vfn);
+core->mac[VFTE] &= ~BIT(vfn);
 /* indicate VF reset to PF */
 core->mac[VFLRE] |= BIT(vfn);
 /* VFLRE and mailbox use the same interrupt cause */
@@ -3908,6 +3931,7 @@ igb_phy_reg_init[] = {
 static const uint32_t igb_mac_reg_init[] = {
 [LEDCTL]= 2 | (3 <<

[PATCH v7 7/8] igb: implement VF Tx and Rx stats

2023-03-12 Thread Sriram Yagnaraman
Please note that loopback counters for VM to VM traffic is not
implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index f91cea20e5..86de73afc8 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -490,7 +490,7 @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int 
queue_index)
 }
 
 static void
-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
+igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
 {
 static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 PTC1023, PTC1522 };
@@ -517,6 +517,13 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt 
*tx_pkt)
 core->mac[GPTC] = core->mac[TPT];
 core->mac[GOTCL] = core->mac[TOTL];
 core->mac[GOTCH] = core->mac[TOTH];
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
+core->mac[PVFGPTC0 + (pool * 64)]++;
+}
 }
 
 static void
@@ -579,7 +586,7 @@ igb_process_tx_desc(IGBCore *core,
 core->mac[VET] & 0x);
 }
 if (igb_tx_pkt_send(core, tx, queue_index)) {
-igb_on_tx_done_update_stats(core, tx->tx_pkt);
+igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
 }
 }
 
@@ -1403,7 +1410,8 @@ igb_write_to_rx_buffers(IGBCore *core,
 }
 
 static void
-igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
+igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+size_t data_size, size_t data_fcs_size)
 {
 e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
 
@@ -1419,6 +1427,16 @@ igb_update_rx_stats(IGBCore *core, size_t data_size, 
size_t data_fcs_size)
 default:
 break;
 }
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
+core->mac[PVFGPRC0 + (pool * 64)]++;
+if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
+core->mac[PVFMPRC0 + (pool * 64)]++;
+}
+}
 }
 
 static inline bool
@@ -1520,7 +1538,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 
 } while (desc_offset < total_size);
 
-igb_update_rx_stats(core, size, total_size);
+igb_update_rx_stats(core, rxi, size, total_size);
 }
 
 static inline void
-- 
2.34.1




[PATCH v7 5/8] igb: check oversized packets for VMDq

2023-03-12 Thread Sriram Yagnaraman
Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 41 -
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index b2ea3a18ed..cb6993dd3b 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -915,12 +915,26 @@ igb_rx_l4_cso_enabled(IGBCore *core)
 return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
 }
 
+static bool
+igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
+{
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
+int max_ethernet_lpe_size =
+core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
+int max_ethernet_vlan_size = 1522;
+
+return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
+}
+
 static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header 
*ehdr,
-   E1000E_RSSInfo *rss_info, bool *external_tx)
+   size_t size, E1000E_RSSInfo *rss_info,
+   bool *external_tx)
 {
 static const int ta_shift[] = { 4, 3, 2, 0 };
 uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
 uint16_t queues = 0;
+uint16_t oversized = 0;
 uint16_t vid = lduw_be_p(_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
 bool accepted = false;
 int i;
@@ -1020,9 +1034,26 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 
 queues &= core->mac[VFRE];
-igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
-if (rss_info->queue & 1) {
-queues <<= 8;
+if (queues) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
+oversized |= BIT(i);
+}
+}
+/* 8.19.37 increment ROC if packet is oversized for all queues */
+if (oversized == queues) {
+trace_e1000x_rx_oversized(size);
+e1000x_inc_reg_if_not_full(core->mac, ROC);
+}
+queues &= ~oversized;
+}
+
+if (queues) {
+igb_rss_parse_packet(core, core->rx_pkt,
+ external_tx != NULL, rss_info);
+if (rss_info->queue & 1) {
+queues <<= 8;
+}
 }
 } else {
 switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
@@ -1570,7 +1601,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
e1000x_vlan_enabled(core->mac),
core->mac[VET] & 0x);
 
-queues = igb_receive_assign(core, ehdr, _info, external_tx);
+queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
 trace_e1000e_rx_flt_dropped();
 return orig_size;
-- 
2.34.1




[PATCH v7 2/8] igb: handle PF/VF reset properly

2023-03-12 Thread Sriram Yagnaraman
Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF
is reset.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c   | 33 +
 hw/net/igb_regs.h   |  3 +++
 hw/net/trace-events |  2 ++
 3 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index a7c7bfdc75..5eb83a56f9 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1892,14 +1892,6 @@ static void igb_set_eims(IGBCore *core, int index, 
uint32_t val)
 igb_update_interrupt_state(core);
 }
 
-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
-{
-/* TODO: Reset of the queue enable and the interrupt registers of the VF. 
*/
-
-core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
-core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
-}
-
 static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
 {
 uint32_t ent = core->mac[VTIVAR_MISC + vfn];
@@ -1977,6 +1969,17 @@ static void igb_set_vfmailbox(IGBCore *core, int index, 
uint32_t val)
 }
 }
 
+static void igb_vf_reset(IGBCore *core, uint16_t vfn)
+{
+/* disable Rx and Tx for the VF*/
+core->mac[VFTE] &= ~BIT(vfn);
+core->mac[VFRE] &= ~BIT(vfn);
+/* indicate VF reset to PF */
+core->mac[VFLRE] |= BIT(vfn);
+/* VFLRE and mailbox use the same interrupt cause */
+mailbox_interrupt_to_pf(core);
+}
+
 static void igb_w1c(IGBCore *core, int index, uint32_t val)
 {
 core->mac[index] &= ~val;
@@ -2231,14 +2234,20 @@ igb_set_status(IGBCore *core, int index, uint32_t val)
 static void
 igb_set_ctrlext(IGBCore *core, int index, uint32_t val)
 {
-trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
- !!(val & E1000_CTRL_EXT_SPD_BYPS));
-
-/* TODO: PFRSTD */
+trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
+  !!(val & E1000_CTRL_EXT_SPD_BYPS),
+  !!(val & E1000_CTRL_EXT_PFRSTD));
 
 /* Zero self-clearing bits */
 val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
 core->mac[CTRL_EXT] = val;
+
+if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) {
+for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
+core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
+core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD;
+}
+}
 }
 
 static void
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index 00934d4f20..a658f9b53f 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -240,6 +240,9 @@ union e1000_adv_rx_desc {
 
 /* from igb/e1000_defines.h */
 
+/* Physical Func Reset Done Indication */
+#define E1000_CTRL_EXT_PFRSTD   0x4000
+
 #define E1000_IVAR_VALID 0x80
 #define E1000_GPIE_NSICR 0x0001
 #define E1000_GPIE_MSIX_MODE 0x0010
diff --git a/hw/net/trace-events b/hw/net/trace-events
index 65753411fc..d35554fce8 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -280,6 +280,8 @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: 
PHY[%u] UNHANDLED"
 igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
 igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
 
+igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) 
"Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset 
done: %d"
+
 igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
 igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, 
uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
 
-- 
2.34.1




[PATCH v7 3/8] igb: add ICR_RXDW

2023-03-12 Thread Sriram Yagnaraman
IGB uses RXDW ICR bit to indicate that rx descriptor has been written
back. This is the same as RXT0 bit in older HW.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/e1000x_regs.h | 4 
 hw/net/igb_core.c| 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
index c0832fa23d..6d3c4c6d3a 100644
--- a/hw/net/e1000x_regs.h
+++ b/hw/net/e1000x_regs.h
@@ -335,6 +335,7 @@
 #define E1000_ICR_RXDMT00x0010 /* rx desc min. threshold (0) */
 #define E1000_ICR_RXO   0x0040 /* rx overrun */
 #define E1000_ICR_RXT0  0x0080 /* rx timer intr (ring 0) */
+#define E1000_ICR_RXDW  0x0080 /* rx desc written back */
 #define E1000_ICR_MDAC  0x0200 /* MDIO access complete */
 #define E1000_ICR_RXCFG 0x0400 /* RX /c/ ordered set */
 #define E1000_ICR_GPI_EN0   0x0800 /* GP Int 0 */
@@ -378,6 +379,7 @@
 #define E1000_ICS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_ICS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_ICS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_ICS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_ICS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_ICS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -407,6 +409,7 @@
 #define E1000_IMS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -441,6 +444,7 @@
 #define E1000_IMC_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMC_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMC_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMC_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMC_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMC_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 5eb83a56f9..2a16692448 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1577,7 +1577,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 continue;
 }
 
-n |= E1000_ICR_RXT0;
+n |= E1000_ICR_RXDW;
 
 igb_rx_fix_l4_csum(core, core->rx_pkt);
 igb_write_packet_to_guest(core, core->rx_pkt, , _info);
-- 
2.34.1




[PATCH v7 1/8] MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer

2023-03-12 Thread Sriram Yagnaraman
I would like to review and be informed on changes to igb device

Signed-off-by: Sriram Yagnaraman 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 95c957d587..70685c9dab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2251,6 +2251,7 @@ F: tests/qtest/libqos/e1000e.*
 
 igb
 M: Akihiko Odaki 
+R: Sriram Yagnaraman 
 S: Maintained
 F: docs/system/devices/igb.rst
 F: hw/net/igb*
-- 
2.34.1




[PATCH v7 8/8] igb: respect VMVIR and VMOLR for VLAN

2023-03-12 Thread Sriram Yagnaraman
Add support for stripping/inserting VLAN for VFs.

Had to move CSUM calculation back into the for loop, since packet data
is pulled inside the loop based on strip VLAN decision for every VF.

net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for
igb. Work for a future patch.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 51 +++
 1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 86de73afc8..3c0fb19b1e 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -386,6 +386,25 @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, 
bool tx,
 info->queue = E1000_RSS_QUEUE(>mac[RETA], info->hash);
 }
 
+static inline bool
+igb_tx_insert_vlan(IGBCore *core, uint16_t qn,
+   struct igb_tx *tx, bool desc_vle)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
+/* always insert default VLAN */
+desc_vle = true;
+tx->vlan = core->mac[VMVIR0 + pool] & 0x;
+} else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
+return false;
+}
+}
+
+return desc_vle && e1000x_vlan_enabled(core->mac);
+}
+
 static bool
 igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
 {
@@ -581,7 +600,8 @@ igb_process_tx_desc(IGBCore *core,
 
 if (cmd_type_len & E1000_TXD_CMD_EOP) {
 if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
-if (cmd_type_len & E1000_TXD_CMD_VLE) {
+if (igb_tx_insert_vlan(core, queue_index, tx,
+!!(cmd_type_len & E1000_TXD_CMD_VLE))) {
 net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
 core->mac[VET] & 0x);
 }
@@ -1541,6 +1561,20 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 igb_update_rx_stats(core, rxi, size, total_size);
 }
 
+static bool
+igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+/* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
+return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
+core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
+core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
+}
+
+return e1000x_vlan_enabled(core->mac);
+}
+
 static inline void
 igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt)
 {
@@ -1621,10 +1655,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 ehdr = PKT_GET_ETH_HDR(filter_buf);
 net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
-
-net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
-   e1000x_vlan_enabled(core->mac),
-   core->mac[VET] & 0x);
+net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
 
 queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
@@ -1632,9 +1663,6 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 return orig_size;
 }
 
-total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
-e1000x_fcs_len(core->mac);
-
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 if (!(queues & BIT(i)) ||
 !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
@@ -1643,6 +1671,13 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 igb_rx_ring_init(core, , i);
 
+net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
+   igb_rx_strip_vlan(core, rxr.i),
+   core->mac[VET] & 0x);
+
+total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
+e1000x_fcs_len(core->mac);
+
 if (!igb_has_rxbufs(core, rxr.i, total_size)) {
 n |= E1000_ICS_RXO;
 trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
-- 
2.34.1




RE: [PATCH v8 0/8] Introduce igb

2023-02-06 Thread Sriram Yagnaraman
> -Original Message-
> From: Akihiko Odaki 
> Sent: Saturday, 4 February 2023 05:36
> Cc: Jason Wang ; Dmitry Fleytman
> ; Michael S . Tsirkin ;
> Marcel Apfelbaum ; Alex Bennée
> ; Philippe Mathieu-Daudé ;
> Thomas Huth ; Wainer dos Santos Moschetta
> ; Beraldo Leal ; Cleber Rosa
> ; Laurent Vivier ; Paolo Bonzini
> ; Alexander Bulekov ; Bandan Das
> ; Stefan Hajnoczi ; Darren Kenny
> ; Qiuhao Li ; qemu-
> de...@nongnu.org; qemu-...@nongnu.org; de...@daynix.com; Yan
> Vugenfirer ; Yuri Benditovich
> ; Sriram Yagnaraman
> ; Gal Hammer ;
> Akihiko Odaki 
> Subject: [PATCH v8 0/8] Introduce igb
> 
> Based-on: <20230201033539.30049-1-akihiko.od...@daynix.com>
> ([PATCH v5 00/29] e1000x cleanups (preliminary for IGB))
> 
> igb is a family of Intel's gigabit ethernet controllers. This series 
> implements
> 82576 emulation in particular. You can see the last patch for the
> documentation.
> 
> Note that there is another effort to bring 82576 emulation. This series was
> developed independently by Sriram Yagnaraman.
> https://lists.gnu.org/archive/html/qemu-devel/2022-12/msg04670.html
> 
> V7 -> V8:
> - Removed obsolete patch
>   "hw/net/net_tx_pkt: Introduce net_tx_pkt_get_eth_hdr" (Cédric Le Goater)
> 
> V6 -> V7:
> - Reordered statements in igb_receive_internal() so that checksum will be
>   calculated only once and it will be more close to e1000e_receive_internal().
> 
> V5 -> V6:
> - Rebased.
> - Renamed "test" to "packet" in tests/qtest/e1000e-test.c.
> - Fixed Rx logic so that a Rx pool without enough space won't prevent other
>   pools from receiving, based on Sriram Yagnaraman's work.
> 
> V4 -> V5:
> - Rebased.
> - Squashed patches to copy from e1000e code and modify it.
> - Listed the implemented features.
> - Added a check for interrupts availablity on PF.
> - Fixed the declaration of igb_receive_internal(). (Sriram Yagnaraman)
> 
> V3 -> V4:
> - Rebased.
> - Corrected PCIDevice specified for DMA.
> 
> V2 -> V3:
> - Rebased.
> - Fixed PCIDevice reference in hw/net/igbvf.c.
> - Fixed TX packet switching when VM loopback is enabled.
> - Fixed VMDq enablement check.
> - Fixed RX descriptor length parser.
> - Fixed the definitions of RQDPC readers.
> - Implemented VLAN VM filter.
> - Implemented VT_CTL.Def_PL.
> - Implemented the combination of VMDq and RSS.
> - Noted that igb is tested with Windows HLK.
> 
> V1 -> V2:
> - Spun off e1000e general improvements to a distinct series.
> - Restored vnet_hdr offload as there seems nothing preventing from that.
> 
> Akihiko Odaki (8):
>   pcie: Introduce pcie_sriov_num_vfs
>   e1000: Split header files
>   Intrdocue igb device emulation
>   tests/qtest/e1000e-test: Fabricate ethernet header
>   tests/qtest/libqos/e1000e: Export macreg functions
>   igb: Introduce qtest for igb device
>   tests/avocado: Add igb test
>   docs/system/devices/igb: Add igb documentation
> 
>  MAINTAINERS   |9 +
>  docs/system/device-emulation.rst  |1 +
>  docs/system/devices/igb.rst   |   71 +
>  hw/net/Kconfig|5 +
>  hw/net/e1000.c|1 +
>  hw/net/e1000_common.h |  102 +
>  hw/net/e1000_regs.h   |  927 +---
>  hw/net/e1000e.c   |3 +-
>  hw/net/e1000e_core.c  |1 +
>  hw/net/e1000x_common.c|1 +
>  hw/net/e1000x_common.h|   74 -
>  hw/net/e1000x_regs.h  |  940 
>  hw/net/igb.c  |  612 +++
>  hw/net/igb_common.h   |  146 +
>  hw/net/igb_core.c | 4043 +
>  hw/net/igb_core.h |  144 +
>  hw/net/igb_regs.h |  648 +++
>  hw/net/igbvf.c|  327 ++
>  hw/net/meson.build|2 +
>  hw/net/trace-events   |   32 +
>  hw/pci/pcie_sriov.c   |5 +
>  include/hw/pci/pcie_sriov.h   |3 +
>  .../org.centos/stream/8/x86_64/test-avocado   |1 +
>  tests/avocado/igb.py  |   38 +
>  tests/qtest/e1000e-test.c |   25 +-
>  tests/qtest/fuzz/generic_fuzz_configs.h   |5 +
>  tests/qtest/igb-test.c|  243 +
>  tests/qtest/libqos/e1000e.c   |   12 -
>  tests/qtest/libqos/e1000e.h   |   14 +
>  tests/qtest/lib

[PATCH v6 3/8] igb: add ICR_RXDW

2023-02-02 Thread Sriram Yagnaraman
IGB uses RXDW ICR bit to indicate that rx descriptor has been written
back. This is the same as RXT0 bit in older HW.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/e1000x_regs.h | 4 
 hw/net/igb_core.c| 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
index fb5b861135..f509db73a7 100644
--- a/hw/net/e1000x_regs.h
+++ b/hw/net/e1000x_regs.h
@@ -335,6 +335,7 @@
 #define E1000_ICR_RXDMT00x0010 /* rx desc min. threshold (0) */
 #define E1000_ICR_RXO   0x0040 /* rx overrun */
 #define E1000_ICR_RXT0  0x0080 /* rx timer intr (ring 0) */
+#define E1000_ICR_RXDW  0x0080 /* rx desc written back */
 #define E1000_ICR_MDAC  0x0200 /* MDIO access complete */
 #define E1000_ICR_RXCFG 0x0400 /* RX /c/ ordered set */
 #define E1000_ICR_GPI_EN0   0x0800 /* GP Int 0 */
@@ -378,6 +379,7 @@
 #define E1000_ICS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_ICS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_ICS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_ICS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_ICS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_ICS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -407,6 +409,7 @@
 #define E1000_IMS_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMS_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMS_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMS_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMS_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
@@ -441,6 +444,7 @@
 #define E1000_IMC_RXDMT0E1000_ICR_RXDMT0/* rx desc min. threshold */
 #define E1000_IMC_RXO   E1000_ICR_RXO   /* rx overrun */
 #define E1000_IMC_RXT0  E1000_ICR_RXT0  /* rx timer intr */
+#define E1000_IMC_RXDW  E1000_ICR_RXDW  /* rx desc written back */
 #define E1000_IMC_MDAC  E1000_ICR_MDAC  /* MDIO access complete */
 #define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
 #define E1000_IMC_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index b484e6ac30..1ddf54f630 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1582,7 +1582,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 n |= E1000_ICS_RXDMT0;
 }
 
-n |= E1000_ICR_RXT0;
+n |= E1000_ICR_RXDW;
 
 trace_e1000e_rx_written_to_guest(rxr.i->idx);
 }
-- 
2.34.1




[PATCH v6 4/8] igb: implement VFRE and VFTE registers

2023-02-02 Thread Sriram Yagnaraman
Also introduce:
- Checks for RXDCTL/TXDCTL queue enable bits
- IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7)

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 38 +++---
 hw/net/igb_core.h |  1 +
 hw/net/igb_regs.h |  3 +++
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 1ddf54f630..c4a2bff4c1 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -780,6 +780,18 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
 return igb_tx_wb_eic(core, txi->idx);
 }
 
+static inline bool
+igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+{
+bool vmdq = core->mac[MRQC] & 1;
+uint16_t qn = txi->idx;
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+return (core->mac[TCTL] & E1000_TCTL_EN) &&
+(!vmdq || core->mac[VFTE] & BIT(pool)) &&
+(core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
+}
+
 static void
 igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 {
@@ -789,8 +801,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 const E1000E_RingInfo *txi = txr->i;
 uint32_t eic = 0;
 
-/* TODO: check if the queue itself is enabled too. */
-if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
+if (!igb_tx_enabled(core, txi)) {
 trace_e1000e_tx_disabled();
 return;
 }
@@ -866,6 +877,9 @@ igb_can_receive(IGBCore *core)
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 E1000E_RxRing rxr;
+if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
+continue;
+}
 
 igb_rx_ring_init(core, , i);
 if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
@@ -932,7 +946,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 
 if (core->mac[MRQC] & 1) {
 if (is_broadcast_ether_addr(ehdr->h_dest)) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
 queues |= BIT(i);
 }
@@ -966,7 +980,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
 f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
 if (macp[f >> 5] & (1 << (f & 0x1f))) {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
 queues |= BIT(i);
 }
@@ -989,7 +1003,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 }
 } else {
-for (i = 0; i < 8; i++) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
 mask |= BIT(i);
 }
@@ -1005,6 +1019,7 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
 }
 
+queues &= core->mac[VFRE];
 igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
 if (rss_info->queue & 1) {
 queues <<= 8;
@@ -1562,7 +1577,8 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 igb_rx_fix_l4_csum(core, core->rx_pkt);
 
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
-if (!(queues & BIT(i))) {
+if (!(queues & BIT(i)) ||
+!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
 continue;
 }
 
@@ -1966,9 +1982,16 @@ static void igb_set_vfmailbox(IGBCore *core, int index, 
uint32_t val)
 
 static void igb_vf_reset(IGBCore *core, uint16_t vfn)
 {
+uint16_t qn0 = vfn;
+uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
+
 /* disable Rx and Tx for the VF*/
-core->mac[VFTE] &= ~BIT(vfn);
+core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
+core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
 core->mac[VFRE] &= ~BIT(vfn);
+core->mac[VFTE] &= ~BIT(vfn);
 /* indicate VF reset to PF */
 core->mac[VFLRE] |= BIT(vfn);
 /* VFLRE and mailbox use the same interrupt cause */
@@ -3874,6 +3897,7 @@ igb_phy_reg_init[] = {
 static const uint32_t igb_mac_reg_init[] = {
 [LEDCTL]= 2 | (3 <

[PATCH v6 6/8] igb: respect E1000_VMOLR_RSSE

2023-02-02 Thread Sriram Yagnaraman
RSS for VFs is only enabled if VMOLR[n].RSSE is set.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 03139c76f7..9994724a39 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1051,8 +1051,15 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 if (queues) {
 igb_rss_parse_packet(core, core->rx_pkt,
  external_tx != NULL, rss_info);
+/* Sec 8.26.1: PQn = VFn + VQn*8 */
 if (rss_info->queue & 1) {
-queues <<= 8;
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) &&
+(core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
+queues |= BIT(i + IGB_NUM_VM_POOLS);
+queues &= ~BIT(i);
+}
+}
 }
 }
 } else {
-- 
2.34.1




[PATCH v6 7/8] igb: implement VF Tx and Rx stats

2023-02-02 Thread Sriram Yagnaraman
Please note that loopback counters for VM to VM traffic is not
implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 26 ++
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 9994724a39..25a5e0ec87 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -490,7 +490,7 @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int 
queue_index)
 }
 
 static void
-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
+igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
 {
 static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
 PTC1023, PTC1522 };
@@ -517,6 +517,13 @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt 
*tx_pkt)
 core->mac[GPTC] = core->mac[TPT];
 core->mac[GOTCL] = core->mac[TOTL];
 core->mac[GOTCH] = core->mac[TOTH];
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
+core->mac[PVFGPTC0 + (pool * 64)]++;
+}
 }
 
 static void
@@ -579,7 +586,7 @@ igb_process_tx_desc(IGBCore *core,
 core->mac[VET] & 0x);
 }
 if (igb_tx_pkt_send(core, tx, queue_index)) {
-igb_on_tx_done_update_stats(core, tx->tx_pkt);
+igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
 }
 }
 
@@ -1398,7 +1405,8 @@ igb_write_to_rx_buffers(IGBCore *core,
 }
 
 static void
-igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
+igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+size_t data_size, size_t data_fcs_size)
 {
 e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
 
@@ -1414,6 +1422,16 @@ igb_update_rx_stats(IGBCore *core, size_t data_size, 
size_t data_fcs_size)
 default:
 break;
 }
+
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+
+core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
+core->mac[PVFGPRC0 + (pool * 64)]++;
+if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
+core->mac[PVFMPRC0 + (pool * 64)]++;
+}
+}
 }
 
 static inline bool
@@ -1515,7 +1533,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 
 } while (desc_offset < total_size);
 
-igb_update_rx_stats(core, size, total_size);
+igb_update_rx_stats(core, rxi, size, total_size);
 }
 
 static inline void
-- 
2.34.1




[PATCH v6 5/8] igb: check oversized packets for VMDq

2023-02-02 Thread Sriram Yagnaraman
Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 41 -
 1 file changed, 36 insertions(+), 5 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index c4a2bff4c1..03139c76f7 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -915,12 +915,26 @@ igb_rx_l4_cso_enabled(IGBCore *core)
 return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
 }
 
+static bool
+igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
+{
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
+int max_ethernet_lpe_size =
+core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
+int max_ethernet_vlan_size = 1522;
+
+return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
+}
+
 static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header 
*ehdr,
-   E1000E_RSSInfo *rss_info, bool *external_tx)
+   size_t size, E1000E_RSSInfo *rss_info,
+   bool *external_tx)
 {
 static const int ta_shift[] = { 4, 3, 2, 0 };
 uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
 uint16_t queues = 0;
+uint16_t oversized = 0;
 uint16_t vid = lduw_be_p(_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
 bool accepted = false;
 int i;
@@ -1020,9 +1034,26 @@ static uint16_t igb_receive_assign(IGBCore *core, const 
struct eth_header *ehdr,
 }
 
 queues &= core->mac[VFRE];
-igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, 
rss_info);
-if (rss_info->queue & 1) {
-queues <<= 8;
+if (queues) {
+for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
+if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
+oversized |= BIT(i);
+}
+}
+/* 8.19.37 increment ROC if packet is oversized for all queues */
+if (oversized == queues) {
+trace_e1000x_rx_oversized(size);
+e1000x_inc_reg_if_not_full(core->mac, ROC);
+}
+queues &= ~oversized;
+}
+
+if (queues) {
+igb_rss_parse_packet(core, core->rx_pkt,
+ external_tx != NULL, rss_info);
+if (rss_info->queue & 1) {
+queues <<= 8;
+}
 }
 } else {
 switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
@@ -1565,7 +1596,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
e1000x_vlan_enabled(core->mac),
core->mac[VET] & 0x);
 
-queues = igb_receive_assign(core, ehdr, _info, external_tx);
+queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
 trace_e1000e_rx_flt_dropped();
 return orig_size;
-- 
2.34.1




[PATCH v6 8/8] igb: respect VMVIR and VMOLR for VLAN

2023-02-02 Thread Sriram Yagnaraman
Add support for stripping/inserting VLAN for VFs.

Had to move CSUM calculation back into the for loop, since packet data
is pulled inside the loop based on strip VLAN decision for every VF.

net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for
igb. Work for a future patch.

Signed-off-by: Sriram Yagnaraman 
---
 hw/net/igb_core.c | 54 ++-
 1 file changed, 44 insertions(+), 10 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 25a5e0ec87..cd4fba383c 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -386,6 +386,25 @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, 
bool tx,
 info->queue = E1000_RSS_QUEUE(>mac[RETA], info->hash);
 }
 
+static inline bool
+igb_tx_insert_vlan(IGBCore *core, uint16_t qn,
+   struct igb_tx *tx, bool desc_vle)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = qn % IGB_NUM_VM_POOLS;
+
+if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
+/* always insert default VLAN */
+desc_vle = true;
+tx->vlan = core->mac[VMVIR0 + pool] & 0x;
+} else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
+return false;
+}
+}
+
+return desc_vle && e1000x_vlan_enabled(core->mac);
+}
+
 static bool
 igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
 {
@@ -581,7 +600,8 @@ igb_process_tx_desc(IGBCore *core,
 
 if (cmd_type_len & E1000_TXD_CMD_EOP) {
 if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
-if (cmd_type_len & E1000_TXD_CMD_VLE) {
+if (igb_tx_insert_vlan(core, queue_index, tx,
+!!(cmd_type_len & E1000_TXD_CMD_VLE))) {
 net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
 core->mac[VET] & 0x);
 }
@@ -1536,6 +1556,20 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt 
*pkt,
 igb_update_rx_stats(core, rxi, size, total_size);
 }
 
+static bool
+igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+{
+if (core->mac[MRQC] & 1) {
+uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
+/* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
+return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
+core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
+core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
+}
+
+return e1000x_vlan_enabled(core->mac);
+}
+
 static inline void
 igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt)
 {
@@ -1616,10 +1650,7 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 ehdr = PKT_GET_ETH_HDR(filter_buf);
 net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
-
-net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
-   e1000x_vlan_enabled(core->mac),
-   core->mac[VET] & 0x);
+net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
 
 queues = igb_receive_assign(core, ehdr, size, _info, external_tx);
 if (!queues) {
@@ -1627,11 +1658,6 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 return orig_size;
 }
 
-total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
-e1000x_fcs_len(core->mac);
-
-igb_rx_fix_l4_csum(core, core->rx_pkt);
-
 for (i = 0; i < IGB_NUM_QUEUES; i++) {
 if (!(queues & BIT(i)) ||
 !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
@@ -1640,12 +1666,20 @@ igb_receive_internal(IGBCore *core, const struct iovec 
*iov, int iovcnt,
 
 igb_rx_ring_init(core, , i);
 
+net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
+   igb_rx_strip_vlan(core, rxr.i),
+   core->mac[VET] & 0x);
+
+total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
+e1000x_fcs_len(core->mac);
+
 if (!igb_has_rxbufs(core, rxr.i, total_size)) {
 n |= E1000_ICS_RXO;
 trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
 continue;
 }
 
+igb_rx_fix_l4_csum(core, core->rx_pkt);
 igb_write_packet_to_guest(core, core->rx_pkt, , _info);
 core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx);
 
-- 
2.34.1




  1   2   >