Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-07-01 Thread Neil Horman
On Sat, Jun 30, 2018 at 10:15:07PM +0300, Dan Carpenter wrote:
> Hi Neil,
> 
> I love your patch! Perhaps something to improve:
> 
> url:
> https://github.com/0day-ci/linux/commits/Neil-Horman/vmw_pvrdma-Release-netdev-when-vmxnet3-module-is-removed/20180628-232414
> 
> smatch warnings:
> drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c:987 pvrdma_pci_probe() warn: 
> variable dereferenced before check 'dev->netdev' (see line 985)
> 
Appreciate the smatch check, but this was caught by visual review and fixed in 
V2 already.

Best
Neil



Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-07-01 Thread Neil Horman
On Sat, Jun 30, 2018 at 10:15:07PM +0300, Dan Carpenter wrote:
> Hi Neil,
> 
> I love your patch! Perhaps something to improve:
> 
> url:
> https://github.com/0day-ci/linux/commits/Neil-Horman/vmw_pvrdma-Release-netdev-when-vmxnet3-module-is-removed/20180628-232414
> 
> smatch warnings:
> drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c:987 pvrdma_pci_probe() warn: 
> variable dereferenced before check 'dev->netdev' (see line 985)
> 
Appreciate the smatch check, but this was caught by visual review and fixed in 
V2 already.

Best
Neil



Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-30 Thread Dan Carpenter
Hi Neil,

I love your patch! Perhaps something to improve:

url:
https://github.com/0day-ci/linux/commits/Neil-Horman/vmw_pvrdma-Release-netdev-when-vmxnet3-module-is-removed/20180628-232414

smatch warnings:
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c:987 pvrdma_pci_probe() warn: 
variable dereferenced before check 'dev->netdev' (see line 985)

# 
https://github.com/0day-ci/linux/commit/d5bb424e18e2ecab4ac590a66b0cca9dfb96d3da
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout d5bb424e18e2ecab4ac590a66b0cca9dfb96d3da
vim +987 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c

29c8d9eb Adit Ranadive 2016-10-02   784  
29c8d9eb Adit Ranadive 2016-10-02   785  static int pvrdma_pci_probe(struct 
pci_dev *pdev,
29c8d9eb Adit Ranadive 2016-10-02   786 const 
struct pci_device_id *id)
29c8d9eb Adit Ranadive 2016-10-02   787  {
29c8d9eb Adit Ranadive 2016-10-02   788 struct pci_dev *pdev_net;
29c8d9eb Adit Ranadive 2016-10-02   789 struct pvrdma_dev *dev;
29c8d9eb Adit Ranadive 2016-10-02   790 int ret;
29c8d9eb Adit Ranadive 2016-10-02   791 unsigned long start;
29c8d9eb Adit Ranadive 2016-10-02   792 unsigned long len;
29c8d9eb Adit Ranadive 2016-10-02   793 dma_addr_t slot_dma = 0;
29c8d9eb Adit Ranadive 2016-10-02   794  
29c8d9eb Adit Ranadive 2016-10-02   795 dev_dbg(>dev, 
"initializing driver %s\n", pci_name(pdev));
29c8d9eb Adit Ranadive 2016-10-02   796  
29c8d9eb Adit Ranadive 2016-10-02   797 /* Allocate zero-out device */
29c8d9eb Adit Ranadive 2016-10-02   798 dev = (struct pvrdma_dev 
*)ib_alloc_device(sizeof(*dev));
29c8d9eb Adit Ranadive 2016-10-02   799 if (!dev) {
29c8d9eb Adit Ranadive 2016-10-02   800 dev_err(>dev, 
"failed to allocate IB device\n");
29c8d9eb Adit Ranadive 2016-10-02   801 return -ENOMEM;
29c8d9eb Adit Ranadive 2016-10-02   802 }
29c8d9eb Adit Ranadive 2016-10-02   803  
29c8d9eb Adit Ranadive 2016-10-02   804 
mutex_lock(_device_list_lock);
29c8d9eb Adit Ranadive 2016-10-02   805 list_add(>device_link, 
_device_list);
29c8d9eb Adit Ranadive 2016-10-02   806 
mutex_unlock(_device_list_lock);
29c8d9eb Adit Ranadive 2016-10-02   807  
29c8d9eb Adit Ranadive 2016-10-02   808 ret = pvrdma_init_device(dev);
29c8d9eb Adit Ranadive 2016-10-02   809 if (ret)
29c8d9eb Adit Ranadive 2016-10-02   810 goto err_free_device;
29c8d9eb Adit Ranadive 2016-10-02   811  
29c8d9eb Adit Ranadive 2016-10-02   812 dev->pdev = pdev;
29c8d9eb Adit Ranadive 2016-10-02   813 pci_set_drvdata(pdev, dev);
29c8d9eb Adit Ranadive 2016-10-02   814  
29c8d9eb Adit Ranadive 2016-10-02   815 ret = pci_enable_device(pdev);
29c8d9eb Adit Ranadive 2016-10-02   816 if (ret) {
29c8d9eb Adit Ranadive 2016-10-02   817 dev_err(>dev, 
"cannot enable PCI device\n");
29c8d9eb Adit Ranadive 2016-10-02   818 goto err_free_device;
29c8d9eb Adit Ranadive 2016-10-02   819 }
29c8d9eb Adit Ranadive 2016-10-02   820  
29c8d9eb Adit Ranadive 2016-10-02   821 dev_dbg(>dev, "PCI 
resource flags BAR0 %#lx\n",
29c8d9eb Adit Ranadive 2016-10-02   822 
pci_resource_flags(pdev, 0));
29c8d9eb Adit Ranadive 2016-10-02   823 dev_dbg(>dev, "PCI 
resource len %#llx\n",
29c8d9eb Adit Ranadive 2016-10-02   824 (unsigned long 
long)pci_resource_len(pdev, 0));
29c8d9eb Adit Ranadive 2016-10-02   825 dev_dbg(>dev, "PCI 
resource start %#llx\n",
29c8d9eb Adit Ranadive 2016-10-02   826 (unsigned long 
long)pci_resource_start(pdev, 0));
29c8d9eb Adit Ranadive 2016-10-02   827 dev_dbg(>dev, "PCI 
resource flags BAR1 %#lx\n",
29c8d9eb Adit Ranadive 2016-10-02   828 
pci_resource_flags(pdev, 1));
29c8d9eb Adit Ranadive 2016-10-02   829 dev_dbg(>dev, "PCI 
resource len %#llx\n",
29c8d9eb Adit Ranadive 2016-10-02   830 (unsigned long 
long)pci_resource_len(pdev, 1));
29c8d9eb Adit Ranadive 2016-10-02   831 dev_dbg(>dev, "PCI 
resource start %#llx\n",
29c8d9eb Adit Ranadive 2016-10-02   832 (unsigned long 
long)pci_resource_start(pdev, 1));
29c8d9eb Adit Ranadive 2016-10-02   833  
29c8d9eb Adit Ranadive 2016-10-02   834 if (!(pci_resource_flags(pdev, 
0) & IORESOURCE_MEM) ||
29c8d9eb Adit Ranadive 2016-10-02   835 !(pci_resource_flags(pdev, 
1) & IORESOURCE_MEM)) {
29c8d9eb Adit Ranadive 2016-10-02   836 dev_err(>dev, 
"PCI BAR region not MMIO\n");
29c8d9eb Adit Ranadive 2016-10-02   837 ret = -ENOMEM;
29c8d9eb Adit Ranadive 2016-10-02   838 goto err_free_device;
29c8d9eb Adit Ranadive 2016-10-02   839 }
29c8d9eb Adit Ranadive 2016-10-02   840  
29c8d9eb Adit 

Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-30 Thread Dan Carpenter
Hi Neil,

I love your patch! Perhaps something to improve:

url:
https://github.com/0day-ci/linux/commits/Neil-Horman/vmw_pvrdma-Release-netdev-when-vmxnet3-module-is-removed/20180628-232414

smatch warnings:
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c:987 pvrdma_pci_probe() warn: 
variable dereferenced before check 'dev->netdev' (see line 985)

# 
https://github.com/0day-ci/linux/commit/d5bb424e18e2ecab4ac590a66b0cca9dfb96d3da
git remote add linux-review https://github.com/0day-ci/linux
git remote update linux-review
git checkout d5bb424e18e2ecab4ac590a66b0cca9dfb96d3da
vim +987 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c

29c8d9eb Adit Ranadive 2016-10-02   784  
29c8d9eb Adit Ranadive 2016-10-02   785  static int pvrdma_pci_probe(struct 
pci_dev *pdev,
29c8d9eb Adit Ranadive 2016-10-02   786 const 
struct pci_device_id *id)
29c8d9eb Adit Ranadive 2016-10-02   787  {
29c8d9eb Adit Ranadive 2016-10-02   788 struct pci_dev *pdev_net;
29c8d9eb Adit Ranadive 2016-10-02   789 struct pvrdma_dev *dev;
29c8d9eb Adit Ranadive 2016-10-02   790 int ret;
29c8d9eb Adit Ranadive 2016-10-02   791 unsigned long start;
29c8d9eb Adit Ranadive 2016-10-02   792 unsigned long len;
29c8d9eb Adit Ranadive 2016-10-02   793 dma_addr_t slot_dma = 0;
29c8d9eb Adit Ranadive 2016-10-02   794  
29c8d9eb Adit Ranadive 2016-10-02   795 dev_dbg(>dev, 
"initializing driver %s\n", pci_name(pdev));
29c8d9eb Adit Ranadive 2016-10-02   796  
29c8d9eb Adit Ranadive 2016-10-02   797 /* Allocate zero-out device */
29c8d9eb Adit Ranadive 2016-10-02   798 dev = (struct pvrdma_dev 
*)ib_alloc_device(sizeof(*dev));
29c8d9eb Adit Ranadive 2016-10-02   799 if (!dev) {
29c8d9eb Adit Ranadive 2016-10-02   800 dev_err(>dev, 
"failed to allocate IB device\n");
29c8d9eb Adit Ranadive 2016-10-02   801 return -ENOMEM;
29c8d9eb Adit Ranadive 2016-10-02   802 }
29c8d9eb Adit Ranadive 2016-10-02   803  
29c8d9eb Adit Ranadive 2016-10-02   804 
mutex_lock(_device_list_lock);
29c8d9eb Adit Ranadive 2016-10-02   805 list_add(>device_link, 
_device_list);
29c8d9eb Adit Ranadive 2016-10-02   806 
mutex_unlock(_device_list_lock);
29c8d9eb Adit Ranadive 2016-10-02   807  
29c8d9eb Adit Ranadive 2016-10-02   808 ret = pvrdma_init_device(dev);
29c8d9eb Adit Ranadive 2016-10-02   809 if (ret)
29c8d9eb Adit Ranadive 2016-10-02   810 goto err_free_device;
29c8d9eb Adit Ranadive 2016-10-02   811  
29c8d9eb Adit Ranadive 2016-10-02   812 dev->pdev = pdev;
29c8d9eb Adit Ranadive 2016-10-02   813 pci_set_drvdata(pdev, dev);
29c8d9eb Adit Ranadive 2016-10-02   814  
29c8d9eb Adit Ranadive 2016-10-02   815 ret = pci_enable_device(pdev);
29c8d9eb Adit Ranadive 2016-10-02   816 if (ret) {
29c8d9eb Adit Ranadive 2016-10-02   817 dev_err(>dev, 
"cannot enable PCI device\n");
29c8d9eb Adit Ranadive 2016-10-02   818 goto err_free_device;
29c8d9eb Adit Ranadive 2016-10-02   819 }
29c8d9eb Adit Ranadive 2016-10-02   820  
29c8d9eb Adit Ranadive 2016-10-02   821 dev_dbg(>dev, "PCI 
resource flags BAR0 %#lx\n",
29c8d9eb Adit Ranadive 2016-10-02   822 
pci_resource_flags(pdev, 0));
29c8d9eb Adit Ranadive 2016-10-02   823 dev_dbg(>dev, "PCI 
resource len %#llx\n",
29c8d9eb Adit Ranadive 2016-10-02   824 (unsigned long 
long)pci_resource_len(pdev, 0));
29c8d9eb Adit Ranadive 2016-10-02   825 dev_dbg(>dev, "PCI 
resource start %#llx\n",
29c8d9eb Adit Ranadive 2016-10-02   826 (unsigned long 
long)pci_resource_start(pdev, 0));
29c8d9eb Adit Ranadive 2016-10-02   827 dev_dbg(>dev, "PCI 
resource flags BAR1 %#lx\n",
29c8d9eb Adit Ranadive 2016-10-02   828 
pci_resource_flags(pdev, 1));
29c8d9eb Adit Ranadive 2016-10-02   829 dev_dbg(>dev, "PCI 
resource len %#llx\n",
29c8d9eb Adit Ranadive 2016-10-02   830 (unsigned long 
long)pci_resource_len(pdev, 1));
29c8d9eb Adit Ranadive 2016-10-02   831 dev_dbg(>dev, "PCI 
resource start %#llx\n",
29c8d9eb Adit Ranadive 2016-10-02   832 (unsigned long 
long)pci_resource_start(pdev, 1));
29c8d9eb Adit Ranadive 2016-10-02   833  
29c8d9eb Adit Ranadive 2016-10-02   834 if (!(pci_resource_flags(pdev, 
0) & IORESOURCE_MEM) ||
29c8d9eb Adit Ranadive 2016-10-02   835 !(pci_resource_flags(pdev, 
1) & IORESOURCE_MEM)) {
29c8d9eb Adit Ranadive 2016-10-02   836 dev_err(>dev, 
"PCI BAR region not MMIO\n");
29c8d9eb Adit Ranadive 2016-10-02   837 ret = -ENOMEM;
29c8d9eb Adit Ranadive 2016-10-02   838 goto err_free_device;
29c8d9eb Adit Ranadive 2016-10-02   839 }
29c8d9eb Adit Ranadive 2016-10-02   840  
29c8d9eb Adit 

Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-29 Thread Neil Horman
On Thu, Jun 28, 2018 at 09:15:46PM +, Adit Ranadive wrote:
> On 6/28/18, 1:37 PM, "Jason Gunthorpe"  wrote:
> > On Thu, Jun 28, 2018 at 03:45:26PM -0400, Neil Horman wrote:
> > > On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> > > > On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > > > > On repeated module load/unload cycles, its possible for the pvrmda
> > > > > driver to encounter this crash:
> 
> > > > > @@ -962,6 +982,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
> > > > >   }
> > > > >  
> > > > >   dev->netdev = pci_get_drvdata(pdev_net);
> > > > > + dev_hold(dev->netdev);
> 
> That doesn't seem right. If the vmxnet3 driver isn't loaded at all or failed
> to create a netdev, you would be requesting a hold on a NULL netdev. What if
> you moved this to after the if(!dev->netdev) check?
> 
You're correct, I was thinking that there was a null check in dev_hold, but
there isn't, it needs to be moved after the the !dev->netdev, and released in
the error path.

> > > > >   pci_dev_put(pdev_net);
> > > > >   if (!dev->netdev) {
> > > > >   dev_err(>dev, "failed to get vmxnet3 device\n");
> > > > 
> > > > I see a lot of new dev_hold's here, where are the matching
> > > > dev_puts()?
> > > > 
> > I'm not sure I'd call 2 alot, but sure, there is a new dev_hold in the
> > pvrdma_pci_probe routine, to hold a reference to the netdev that is looked 
> > up
> > there.  It is balanced by the NETDEV_UNREGISTER case in
> > pvrdma_netdevice_event_handle.  The UNREGISTER clause is also balancing the
> > NETDEV_REGISTER case of the hanlder that looks up the matching netdev 
> > should a
> > new device be registered.  Note that we will only hold a single device at a
> > time, because a given pvrdma device only recongnizes a single vmxnet3 device
> > (the one on function 0 of its own bus/device tuple).
> > 
> > I don't see how the dev_hold in pvrdma_pci_probe is undone during
> > error unwind (eg goto err_free_cq_ring)
> > 
> > And I don't see how it is put when pvrdma_pci_remove() is called.
> 
> That's right. These seem missing as well. 
> 
yup



Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-29 Thread Neil Horman
On Thu, Jun 28, 2018 at 09:15:46PM +, Adit Ranadive wrote:
> On 6/28/18, 1:37 PM, "Jason Gunthorpe"  wrote:
> > On Thu, Jun 28, 2018 at 03:45:26PM -0400, Neil Horman wrote:
> > > On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> > > > On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > > > > On repeated module load/unload cycles, its possible for the pvrmda
> > > > > driver to encounter this crash:
> 
> > > > > @@ -962,6 +982,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
> > > > >   }
> > > > >  
> > > > >   dev->netdev = pci_get_drvdata(pdev_net);
> > > > > + dev_hold(dev->netdev);
> 
> That doesn't seem right. If the vmxnet3 driver isn't loaded at all or failed
> to create a netdev, you would be requesting a hold on a NULL netdev. What if
> you moved this to after the if(!dev->netdev) check?
> 
You're correct, I was thinking that there was a null check in dev_hold, but
there isn't, it needs to be moved after the the !dev->netdev, and released in
the error path.

> > > > >   pci_dev_put(pdev_net);
> > > > >   if (!dev->netdev) {
> > > > >   dev_err(>dev, "failed to get vmxnet3 device\n");
> > > > 
> > > > I see a lot of new dev_hold's here, where are the matching
> > > > dev_puts()?
> > > > 
> > I'm not sure I'd call 2 alot, but sure, there is a new dev_hold in the
> > pvrdma_pci_probe routine, to hold a reference to the netdev that is looked 
> > up
> > there.  It is balanced by the NETDEV_UNREGISTER case in
> > pvrdma_netdevice_event_handle.  The UNREGISTER clause is also balancing the
> > NETDEV_REGISTER case of the hanlder that looks up the matching netdev 
> > should a
> > new device be registered.  Note that we will only hold a single device at a
> > time, because a given pvrdma device only recongnizes a single vmxnet3 device
> > (the one on function 0 of its own bus/device tuple).
> > 
> > I don't see how the dev_hold in pvrdma_pci_probe is undone during
> > error unwind (eg goto err_free_cq_ring)
> > 
> > And I don't see how it is put when pvrdma_pci_remove() is called.
> 
> That's right. These seem missing as well. 
> 
yup



Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-29 Thread Neil Horman
On Thu, Jun 28, 2018 at 02:37:09PM -0600, Jason Gunthorpe wrote:
> On Thu, Jun 28, 2018 at 03:45:26PM -0400, Neil Horman wrote:
> > On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> > > On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > > > On repeated module load/unload cycles, its possible for the pvrmda
> > > > driver to encounter this crash:
> > > > 
> > > > ...
> > > > 297.032448] RIP: 0010:[]  [] 
> > > > netdev_walk_all_upper_dev_rcu+0x50/0xb0
> > > > [  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
> > > > [  297.034986] RAX:  RBX:  RCX: 
> > > > 95087a0c
> > > > [  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 
> > > > 950835d0c000
> > > > [  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: 
> > > > abddacd03f8e0ea0
> > > > [  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 
> > > > 95087a0c
> > > > [  297.039854] R13: 839e44e0 R14: 95087a0c R15: 
> > > > 950835d0c828
> > > > [  297.041071] FS:  () GS:95087fc0() 
> > > > knlGS:
> > > > [  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
> > > > [  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 
> > > > 003607f0
> > > > [  297.044674] DR0:  DR1:  DR2: 
> > > > 
> > > > [  297.045893] DR3:  DR6: fffe0ff0 DR7: 
> > > > 0400
> > > > [  297.047109] Call Trace:
> > > > [  297.047545]  [] 
> > > > netdev_has_upper_dev_all_rcu+0x18/0x20
> > > > [  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 
> > > > [ib_core]
> > > > [  297.049886]  [] ? 
> > > > is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
> > > > ...
> > > > 
> > > > This occurs because vmw_pvrdma on probe stores a pointer to the netdev
> > > > that exists on function 0 of the same bus/device/slot (which represents
> > > > the vmxnet3 ethernet driver).  However, it never removes this pointer if
> > > > the vmxnet3 module is removed, leading to crashes resulting from use
> > > > after free dereferencing incidents like the one above.
> > > > 
> > > > The fix is pretty straightforward.  vmw_pvrdma should listen for
> > > > NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
> > > > block, and update the stored netdev pointer accordingly.  This solution
> > > > has been tested by myself and the reporter with successful results.
> > > > This fix also allows the pvrdma driver to find its underlying ethernet
> > > > device in the event that vmxnet3 is loaded after pvrdma, which it was
> > > > not able to do before.
> > > > 
> > > > Signed-off-by: Neil Horman 
> > > > Reported-by: ruq...@redhat.com
> > > > CC: Adit Ranadive 
> > > > CC: VMware PV-Drivers 
> > > > CC: Doug Ledford 
> > > > CC: Jason Gunthorpe 
> > > > CC: linux-kernel@vger.kernel.org
> > > >  .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
> > > >  1 file changed, 23 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
> > > > b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > > > index 0be33a81bbe6..5b4782078a74 100644
> > > > +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > > > @@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr 
> > > > *attr, void **context)
> > > >  }
> > > >  
> > > >  static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
> > > > + struct net_device *ndev,
> > > >   unsigned long event)
> > > >  {
> > > > +   struct pci_dev *pdev_net;
> > > > +
> > > > +
> > > > switch (event) {
> > > > case NETDEV_REBOOT:
> > > > case NETDEV_DOWN:
> > > > @@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
> > > > pvrdma_dev *dev,
> > > > else
> > > > pvrdma_dispatch_event(dev, 1, 
> > > > IB_EVENT_PORT_ACTIVE);
> > > > break;
> > > > +   case NETDEV_UNREGISTER:
> > > > +   dev_put(dev->netdev);
> > > > +   dev->netdev = NULL;
> > > > +   break;
> > > > +   case NETDEV_REGISTER:
> > > > +   /* Paired vmxnet3 will have same bus, slot. But func 
> > > > will be 0 */
> > > > +   pdev_net = pci_get_slot(dev->pdev->bus, 
> > > > PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
> > > > +   if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) 
> > > > == ndev)) {
> > > > +   /* this is our netdev */
> > > > +   dev->netdev = ndev;
> > > > +   dev_hold(ndev);
> > > > +   }
> > > > +   pci_dev_put(pdev_net);
> > > > +   break;
> > > > +
> > > > default:
> > > > dev_dbg(>pdev->dev, "ignore netdevice event %ld 

Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-29 Thread Neil Horman
On Thu, Jun 28, 2018 at 02:37:09PM -0600, Jason Gunthorpe wrote:
> On Thu, Jun 28, 2018 at 03:45:26PM -0400, Neil Horman wrote:
> > On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> > > On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > > > On repeated module load/unload cycles, its possible for the pvrmda
> > > > driver to encounter this crash:
> > > > 
> > > > ...
> > > > 297.032448] RIP: 0010:[]  [] 
> > > > netdev_walk_all_upper_dev_rcu+0x50/0xb0
> > > > [  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
> > > > [  297.034986] RAX:  RBX:  RCX: 
> > > > 95087a0c
> > > > [  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 
> > > > 950835d0c000
> > > > [  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: 
> > > > abddacd03f8e0ea0
> > > > [  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 
> > > > 95087a0c
> > > > [  297.039854] R13: 839e44e0 R14: 95087a0c R15: 
> > > > 950835d0c828
> > > > [  297.041071] FS:  () GS:95087fc0() 
> > > > knlGS:
> > > > [  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
> > > > [  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 
> > > > 003607f0
> > > > [  297.044674] DR0:  DR1:  DR2: 
> > > > 
> > > > [  297.045893] DR3:  DR6: fffe0ff0 DR7: 
> > > > 0400
> > > > [  297.047109] Call Trace:
> > > > [  297.047545]  [] 
> > > > netdev_has_upper_dev_all_rcu+0x18/0x20
> > > > [  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 
> > > > [ib_core]
> > > > [  297.049886]  [] ? 
> > > > is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
> > > > ...
> > > > 
> > > > This occurs because vmw_pvrdma on probe stores a pointer to the netdev
> > > > that exists on function 0 of the same bus/device/slot (which represents
> > > > the vmxnet3 ethernet driver).  However, it never removes this pointer if
> > > > the vmxnet3 module is removed, leading to crashes resulting from use
> > > > after free dereferencing incidents like the one above.
> > > > 
> > > > The fix is pretty straightforward.  vmw_pvrdma should listen for
> > > > NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
> > > > block, and update the stored netdev pointer accordingly.  This solution
> > > > has been tested by myself and the reporter with successful results.
> > > > This fix also allows the pvrdma driver to find its underlying ethernet
> > > > device in the event that vmxnet3 is loaded after pvrdma, which it was
> > > > not able to do before.
> > > > 
> > > > Signed-off-by: Neil Horman 
> > > > Reported-by: ruq...@redhat.com
> > > > CC: Adit Ranadive 
> > > > CC: VMware PV-Drivers 
> > > > CC: Doug Ledford 
> > > > CC: Jason Gunthorpe 
> > > > CC: linux-kernel@vger.kernel.org
> > > >  .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
> > > >  1 file changed, 23 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
> > > > b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > > > index 0be33a81bbe6..5b4782078a74 100644
> > > > +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > > > @@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr 
> > > > *attr, void **context)
> > > >  }
> > > >  
> > > >  static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
> > > > + struct net_device *ndev,
> > > >   unsigned long event)
> > > >  {
> > > > +   struct pci_dev *pdev_net;
> > > > +
> > > > +
> > > > switch (event) {
> > > > case NETDEV_REBOOT:
> > > > case NETDEV_DOWN:
> > > > @@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
> > > > pvrdma_dev *dev,
> > > > else
> > > > pvrdma_dispatch_event(dev, 1, 
> > > > IB_EVENT_PORT_ACTIVE);
> > > > break;
> > > > +   case NETDEV_UNREGISTER:
> > > > +   dev_put(dev->netdev);
> > > > +   dev->netdev = NULL;
> > > > +   break;
> > > > +   case NETDEV_REGISTER:
> > > > +   /* Paired vmxnet3 will have same bus, slot. But func 
> > > > will be 0 */
> > > > +   pdev_net = pci_get_slot(dev->pdev->bus, 
> > > > PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
> > > > +   if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) 
> > > > == ndev)) {
> > > > +   /* this is our netdev */
> > > > +   dev->netdev = ndev;
> > > > +   dev_hold(ndev);
> > > > +   }
> > > > +   pci_dev_put(pdev_net);
> > > > +   break;
> > > > +
> > > > default:
> > > > dev_dbg(>pdev->dev, "ignore netdevice event %ld 

Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Adit Ranadive
On 6/28/18, 1:37 PM, "Jason Gunthorpe"  wrote:
> On Thu, Jun 28, 2018 at 03:45:26PM -0400, Neil Horman wrote:
> > On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> > > On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > > > On repeated module load/unload cycles, its possible for the pvrmda
> > > > driver to encounter this crash:

> > > > @@ -962,6 +982,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
> > > > }
> > > >  
> > > > dev->netdev = pci_get_drvdata(pdev_net);
> > > > +   dev_hold(dev->netdev);

That doesn't seem right. If the vmxnet3 driver isn't loaded at all or failed
to create a netdev, you would be requesting a hold on a NULL netdev. What if
you moved this to after the if(!dev->netdev) check?

> > > > pci_dev_put(pdev_net);
> > > > if (!dev->netdev) {
> > > > dev_err(>dev, "failed to get vmxnet3 device\n");
> > > 
> > > I see a lot of new dev_hold's here, where are the matching
> > > dev_puts()?
> > > 
> I'm not sure I'd call 2 alot, but sure, there is a new dev_hold in the
> pvrdma_pci_probe routine, to hold a reference to the netdev that is looked up
> there.  It is balanced by the NETDEV_UNREGISTER case in
> pvrdma_netdevice_event_handle.  The UNREGISTER clause is also balancing the
> NETDEV_REGISTER case of the hanlder that looks up the matching netdev should a
> new device be registered.  Note that we will only hold a single device at a
> time, because a given pvrdma device only recongnizes a single vmxnet3 device
> (the one on function 0 of its own bus/device tuple).
> 
> I don't see how the dev_hold in pvrdma_pci_probe is undone during
> error unwind (eg goto err_free_cq_ring)
> 
> And I don't see how it is put when pvrdma_pci_remove() is called.

That's right. These seem missing as well. 



Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Adit Ranadive
On 6/28/18, 1:37 PM, "Jason Gunthorpe"  wrote:
> On Thu, Jun 28, 2018 at 03:45:26PM -0400, Neil Horman wrote:
> > On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> > > On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > > > On repeated module load/unload cycles, its possible for the pvrmda
> > > > driver to encounter this crash:

> > > > @@ -962,6 +982,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
> > > > }
> > > >  
> > > > dev->netdev = pci_get_drvdata(pdev_net);
> > > > +   dev_hold(dev->netdev);

That doesn't seem right. If the vmxnet3 driver isn't loaded at all or failed
to create a netdev, you would be requesting a hold on a NULL netdev. What if
you moved this to after the if(!dev->netdev) check?

> > > > pci_dev_put(pdev_net);
> > > > if (!dev->netdev) {
> > > > dev_err(>dev, "failed to get vmxnet3 device\n");
> > > 
> > > I see a lot of new dev_hold's here, where are the matching
> > > dev_puts()?
> > > 
> I'm not sure I'd call 2 alot, but sure, there is a new dev_hold in the
> pvrdma_pci_probe routine, to hold a reference to the netdev that is looked up
> there.  It is balanced by the NETDEV_UNREGISTER case in
> pvrdma_netdevice_event_handle.  The UNREGISTER clause is also balancing the
> NETDEV_REGISTER case of the hanlder that looks up the matching netdev should a
> new device be registered.  Note that we will only hold a single device at a
> time, because a given pvrdma device only recongnizes a single vmxnet3 device
> (the one on function 0 of its own bus/device tuple).
> 
> I don't see how the dev_hold in pvrdma_pci_probe is undone during
> error unwind (eg goto err_free_cq_ring)
> 
> And I don't see how it is put when pvrdma_pci_remove() is called.

That's right. These seem missing as well. 



Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Jason Gunthorpe
On Thu, Jun 28, 2018 at 03:45:26PM -0400, Neil Horman wrote:
> On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> > On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > > On repeated module load/unload cycles, its possible for the pvrmda
> > > driver to encounter this crash:
> > > 
> > > ...
> > > 297.032448] RIP: 0010:[]  [] 
> > > netdev_walk_all_upper_dev_rcu+0x50/0xb0
> > > [  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
> > > [  297.034986] RAX:  RBX:  RCX: 
> > > 95087a0c
> > > [  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 
> > > 950835d0c000
> > > [  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: 
> > > abddacd03f8e0ea0
> > > [  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 
> > > 95087a0c
> > > [  297.039854] R13: 839e44e0 R14: 95087a0c R15: 
> > > 950835d0c828
> > > [  297.041071] FS:  () GS:95087fc0() 
> > > knlGS:
> > > [  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
> > > [  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 
> > > 003607f0
> > > [  297.044674] DR0:  DR1:  DR2: 
> > > 
> > > [  297.045893] DR3:  DR6: fffe0ff0 DR7: 
> > > 0400
> > > [  297.047109] Call Trace:
> > > [  297.047545]  [] 
> > > netdev_has_upper_dev_all_rcu+0x18/0x20
> > > [  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 
> > > [ib_core]
> > > [  297.049886]  [] ? 
> > > is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
> > > ...
> > > 
> > > This occurs because vmw_pvrdma on probe stores a pointer to the netdev
> > > that exists on function 0 of the same bus/device/slot (which represents
> > > the vmxnet3 ethernet driver).  However, it never removes this pointer if
> > > the vmxnet3 module is removed, leading to crashes resulting from use
> > > after free dereferencing incidents like the one above.
> > > 
> > > The fix is pretty straightforward.  vmw_pvrdma should listen for
> > > NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
> > > block, and update the stored netdev pointer accordingly.  This solution
> > > has been tested by myself and the reporter with successful results.
> > > This fix also allows the pvrdma driver to find its underlying ethernet
> > > device in the event that vmxnet3 is loaded after pvrdma, which it was
> > > not able to do before.
> > > 
> > > Signed-off-by: Neil Horman 
> > > Reported-by: ruq...@redhat.com
> > > CC: Adit Ranadive 
> > > CC: VMware PV-Drivers 
> > > CC: Doug Ledford 
> > > CC: Jason Gunthorpe 
> > > CC: linux-kernel@vger.kernel.org
> > >  .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
> > >  1 file changed, 23 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
> > > b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > > index 0be33a81bbe6..5b4782078a74 100644
> > > +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > > @@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr 
> > > *attr, void **context)
> > >  }
> > >  
> > >  static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
> > > +   struct net_device *ndev,
> > > unsigned long event)
> > >  {
> > > + struct pci_dev *pdev_net;
> > > +
> > > +
> > >   switch (event) {
> > >   case NETDEV_REBOOT:
> > >   case NETDEV_DOWN:
> > > @@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
> > > pvrdma_dev *dev,
> > >   else
> > >   pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
> > >   break;
> > > + case NETDEV_UNREGISTER:
> > > + dev_put(dev->netdev);
> > > + dev->netdev = NULL;
> > > + break;
> > > + case NETDEV_REGISTER:
> > > + /* Paired vmxnet3 will have same bus, slot. But func will be 0 
> > > */
> > > + pdev_net = pci_get_slot(dev->pdev->bus, 
> > > PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
> > > + if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == 
> > > ndev)) {
> > > + /* this is our netdev */
> > > + dev->netdev = ndev;
> > > + dev_hold(ndev);
> > > + }
> > > + pci_dev_put(pdev_net);
> > > + break;
> > > +
> > >   default:
> > >   dev_dbg(>pdev->dev, "ignore netdevice event %ld on %s\n",
> > >   event, dev->ib_dev.name);
> > > @@ -734,8 +753,9 @@ static void pvrdma_netdevice_event_work(struct 
> > > work_struct *work)
> > >  
> > >   mutex_lock(_device_list_lock);
> > >   list_for_each_entry(dev, _device_list, device_link) {
> > > - if (dev->netdev == netdev_work->event_netdev) {
> > > - pvrdma_netdevice_event_handle(dev, netdev_work->event);
> > > + if 

Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Jason Gunthorpe
On Thu, Jun 28, 2018 at 03:45:26PM -0400, Neil Horman wrote:
> On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> > On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > > On repeated module load/unload cycles, its possible for the pvrmda
> > > driver to encounter this crash:
> > > 
> > > ...
> > > 297.032448] RIP: 0010:[]  [] 
> > > netdev_walk_all_upper_dev_rcu+0x50/0xb0
> > > [  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
> > > [  297.034986] RAX:  RBX:  RCX: 
> > > 95087a0c
> > > [  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 
> > > 950835d0c000
> > > [  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: 
> > > abddacd03f8e0ea0
> > > [  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 
> > > 95087a0c
> > > [  297.039854] R13: 839e44e0 R14: 95087a0c R15: 
> > > 950835d0c828
> > > [  297.041071] FS:  () GS:95087fc0() 
> > > knlGS:
> > > [  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
> > > [  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 
> > > 003607f0
> > > [  297.044674] DR0:  DR1:  DR2: 
> > > 
> > > [  297.045893] DR3:  DR6: fffe0ff0 DR7: 
> > > 0400
> > > [  297.047109] Call Trace:
> > > [  297.047545]  [] 
> > > netdev_has_upper_dev_all_rcu+0x18/0x20
> > > [  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 
> > > [ib_core]
> > > [  297.049886]  [] ? 
> > > is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
> > > ...
> > > 
> > > This occurs because vmw_pvrdma on probe stores a pointer to the netdev
> > > that exists on function 0 of the same bus/device/slot (which represents
> > > the vmxnet3 ethernet driver).  However, it never removes this pointer if
> > > the vmxnet3 module is removed, leading to crashes resulting from use
> > > after free dereferencing incidents like the one above.
> > > 
> > > The fix is pretty straightforward.  vmw_pvrdma should listen for
> > > NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
> > > block, and update the stored netdev pointer accordingly.  This solution
> > > has been tested by myself and the reporter with successful results.
> > > This fix also allows the pvrdma driver to find its underlying ethernet
> > > device in the event that vmxnet3 is loaded after pvrdma, which it was
> > > not able to do before.
> > > 
> > > Signed-off-by: Neil Horman 
> > > Reported-by: ruq...@redhat.com
> > > CC: Adit Ranadive 
> > > CC: VMware PV-Drivers 
> > > CC: Doug Ledford 
> > > CC: Jason Gunthorpe 
> > > CC: linux-kernel@vger.kernel.org
> > >  .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
> > >  1 file changed, 23 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
> > > b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > > index 0be33a81bbe6..5b4782078a74 100644
> > > +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > > @@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr 
> > > *attr, void **context)
> > >  }
> > >  
> > >  static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
> > > +   struct net_device *ndev,
> > > unsigned long event)
> > >  {
> > > + struct pci_dev *pdev_net;
> > > +
> > > +
> > >   switch (event) {
> > >   case NETDEV_REBOOT:
> > >   case NETDEV_DOWN:
> > > @@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
> > > pvrdma_dev *dev,
> > >   else
> > >   pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
> > >   break;
> > > + case NETDEV_UNREGISTER:
> > > + dev_put(dev->netdev);
> > > + dev->netdev = NULL;
> > > + break;
> > > + case NETDEV_REGISTER:
> > > + /* Paired vmxnet3 will have same bus, slot. But func will be 0 
> > > */
> > > + pdev_net = pci_get_slot(dev->pdev->bus, 
> > > PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
> > > + if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == 
> > > ndev)) {
> > > + /* this is our netdev */
> > > + dev->netdev = ndev;
> > > + dev_hold(ndev);
> > > + }
> > > + pci_dev_put(pdev_net);
> > > + break;
> > > +
> > >   default:
> > >   dev_dbg(>pdev->dev, "ignore netdevice event %ld on %s\n",
> > >   event, dev->ib_dev.name);
> > > @@ -734,8 +753,9 @@ static void pvrdma_netdevice_event_work(struct 
> > > work_struct *work)
> > >  
> > >   mutex_lock(_device_list_lock);
> > >   list_for_each_entry(dev, _device_list, device_link) {
> > > - if (dev->netdev == netdev_work->event_netdev) {
> > > - pvrdma_netdevice_event_handle(dev, netdev_work->event);
> > > + if 

Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Neil Horman
On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > On repeated module load/unload cycles, its possible for the pvrmda
> > driver to encounter this crash:
> > 
> > ...
> > 297.032448] RIP: 0010:[]  [] 
> > netdev_walk_all_upper_dev_rcu+0x50/0xb0
> > [  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
> > [  297.034986] RAX:  RBX:  RCX: 
> > 95087a0c
> > [  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 
> > 950835d0c000
> > [  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: 
> > abddacd03f8e0ea0
> > [  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 
> > 95087a0c
> > [  297.039854] R13: 839e44e0 R14: 95087a0c R15: 
> > 950835d0c828
> > [  297.041071] FS:  () GS:95087fc0() 
> > knlGS:
> > [  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
> > [  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 
> > 003607f0
> > [  297.044674] DR0:  DR1:  DR2: 
> > 
> > [  297.045893] DR3:  DR6: fffe0ff0 DR7: 
> > 0400
> > [  297.047109] Call Trace:
> > [  297.047545]  [] netdev_has_upper_dev_all_rcu+0x18/0x20
> > [  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 
> > [ib_core]
> > [  297.049886]  [] ? 
> > is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
> > ...
> > 
> > This occurs because vmw_pvrdma on probe stores a pointer to the netdev
> > that exists on function 0 of the same bus/device/slot (which represents
> > the vmxnet3 ethernet driver).  However, it never removes this pointer if
> > the vmxnet3 module is removed, leading to crashes resulting from use
> > after free dereferencing incidents like the one above.
> > 
> > The fix is pretty straightforward.  vmw_pvrdma should listen for
> > NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
> > block, and update the stored netdev pointer accordingly.  This solution
> > has been tested by myself and the reporter with successful results.
> > This fix also allows the pvrdma driver to find its underlying ethernet
> > device in the event that vmxnet3 is loaded after pvrdma, which it was
> > not able to do before.
> > 
> > Signed-off-by: Neil Horman 
> > Reported-by: ruq...@redhat.com
> > CC: Adit Ranadive 
> > CC: VMware PV-Drivers 
> > CC: Doug Ledford 
> > CC: Jason Gunthorpe 
> > CC: linux-kernel@vger.kernel.org
> >  .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
> >  1 file changed, 23 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
> > b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > index 0be33a81bbe6..5b4782078a74 100644
> > +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > @@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr 
> > *attr, void **context)
> >  }
> >  
> >  static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
> > + struct net_device *ndev,
> >   unsigned long event)
> >  {
> > +   struct pci_dev *pdev_net;
> > +
> > +
> > switch (event) {
> > case NETDEV_REBOOT:
> > case NETDEV_DOWN:
> > @@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
> > pvrdma_dev *dev,
> > else
> > pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
> > break;
> > +   case NETDEV_UNREGISTER:
> > +   dev_put(dev->netdev);
> > +   dev->netdev = NULL;
> > +   break;
> > +   case NETDEV_REGISTER:
> > +   /* Paired vmxnet3 will have same bus, slot. But func will be 0 
> > */
> > +   pdev_net = pci_get_slot(dev->pdev->bus, 
> > PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
> > +   if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == 
> > ndev)) {
> > +   /* this is our netdev */
> > +   dev->netdev = ndev;
> > +   dev_hold(ndev);
> > +   }
> > +   pci_dev_put(pdev_net);
> > +   break;
> > +
> > default:
> > dev_dbg(>pdev->dev, "ignore netdevice event %ld on %s\n",
> > event, dev->ib_dev.name);
> > @@ -734,8 +753,9 @@ static void pvrdma_netdevice_event_work(struct 
> > work_struct *work)
> >  
> > mutex_lock(_device_list_lock);
> > list_for_each_entry(dev, _device_list, device_link) {
> > -   if (dev->netdev == netdev_work->event_netdev) {
> > -   pvrdma_netdevice_event_handle(dev, netdev_work->event);
> > +   if ((netdev_work->event == NETDEV_REGISTER) ||
> > +   (dev->netdev == netdev_work->event_netdev)) {
> > +   pvrdma_netdevice_event_handle(dev, 
> > netdev_work->event_netdev, netdev_work->event);
> >

Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Neil Horman
On Thu, Jun 28, 2018 at 12:59:46PM -0600, Jason Gunthorpe wrote:
> On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> > On repeated module load/unload cycles, its possible for the pvrmda
> > driver to encounter this crash:
> > 
> > ...
> > 297.032448] RIP: 0010:[]  [] 
> > netdev_walk_all_upper_dev_rcu+0x50/0xb0
> > [  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
> > [  297.034986] RAX:  RBX:  RCX: 
> > 95087a0c
> > [  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 
> > 950835d0c000
> > [  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: 
> > abddacd03f8e0ea0
> > [  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 
> > 95087a0c
> > [  297.039854] R13: 839e44e0 R14: 95087a0c R15: 
> > 950835d0c828
> > [  297.041071] FS:  () GS:95087fc0() 
> > knlGS:
> > [  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
> > [  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 
> > 003607f0
> > [  297.044674] DR0:  DR1:  DR2: 
> > 
> > [  297.045893] DR3:  DR6: fffe0ff0 DR7: 
> > 0400
> > [  297.047109] Call Trace:
> > [  297.047545]  [] netdev_has_upper_dev_all_rcu+0x18/0x20
> > [  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 
> > [ib_core]
> > [  297.049886]  [] ? 
> > is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
> > ...
> > 
> > This occurs because vmw_pvrdma on probe stores a pointer to the netdev
> > that exists on function 0 of the same bus/device/slot (which represents
> > the vmxnet3 ethernet driver).  However, it never removes this pointer if
> > the vmxnet3 module is removed, leading to crashes resulting from use
> > after free dereferencing incidents like the one above.
> > 
> > The fix is pretty straightforward.  vmw_pvrdma should listen for
> > NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
> > block, and update the stored netdev pointer accordingly.  This solution
> > has been tested by myself and the reporter with successful results.
> > This fix also allows the pvrdma driver to find its underlying ethernet
> > device in the event that vmxnet3 is loaded after pvrdma, which it was
> > not able to do before.
> > 
> > Signed-off-by: Neil Horman 
> > Reported-by: ruq...@redhat.com
> > CC: Adit Ranadive 
> > CC: VMware PV-Drivers 
> > CC: Doug Ledford 
> > CC: Jason Gunthorpe 
> > CC: linux-kernel@vger.kernel.org
> >  .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
> >  1 file changed, 23 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
> > b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > index 0be33a81bbe6..5b4782078a74 100644
> > +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> > @@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr 
> > *attr, void **context)
> >  }
> >  
> >  static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
> > + struct net_device *ndev,
> >   unsigned long event)
> >  {
> > +   struct pci_dev *pdev_net;
> > +
> > +
> > switch (event) {
> > case NETDEV_REBOOT:
> > case NETDEV_DOWN:
> > @@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
> > pvrdma_dev *dev,
> > else
> > pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
> > break;
> > +   case NETDEV_UNREGISTER:
> > +   dev_put(dev->netdev);
> > +   dev->netdev = NULL;
> > +   break;
> > +   case NETDEV_REGISTER:
> > +   /* Paired vmxnet3 will have same bus, slot. But func will be 0 
> > */
> > +   pdev_net = pci_get_slot(dev->pdev->bus, 
> > PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
> > +   if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == 
> > ndev)) {
> > +   /* this is our netdev */
> > +   dev->netdev = ndev;
> > +   dev_hold(ndev);
> > +   }
> > +   pci_dev_put(pdev_net);
> > +   break;
> > +
> > default:
> > dev_dbg(>pdev->dev, "ignore netdevice event %ld on %s\n",
> > event, dev->ib_dev.name);
> > @@ -734,8 +753,9 @@ static void pvrdma_netdevice_event_work(struct 
> > work_struct *work)
> >  
> > mutex_lock(_device_list_lock);
> > list_for_each_entry(dev, _device_list, device_link) {
> > -   if (dev->netdev == netdev_work->event_netdev) {
> > -   pvrdma_netdevice_event_handle(dev, netdev_work->event);
> > +   if ((netdev_work->event == NETDEV_REGISTER) ||
> > +   (dev->netdev == netdev_work->event_netdev)) {
> > +   pvrdma_netdevice_event_handle(dev, 
> > netdev_work->event_netdev, netdev_work->event);
> >

Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Jason Gunthorpe
On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> On repeated module load/unload cycles, its possible for the pvrmda
> driver to encounter this crash:
> 
> ...
> 297.032448] RIP: 0010:[]  [] 
> netdev_walk_all_upper_dev_rcu+0x50/0xb0
> [  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
> [  297.034986] RAX:  RBX:  RCX: 
> 95087a0c
> [  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 
> 950835d0c000
> [  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: 
> abddacd03f8e0ea0
> [  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 
> 95087a0c
> [  297.039854] R13: 839e44e0 R14: 95087a0c R15: 
> 950835d0c828
> [  297.041071] FS:  () GS:95087fc0() 
> knlGS:
> [  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
> [  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 
> 003607f0
> [  297.044674] DR0:  DR1:  DR2: 
> 
> [  297.045893] DR3:  DR6: fffe0ff0 DR7: 
> 0400
> [  297.047109] Call Trace:
> [  297.047545]  [] netdev_has_upper_dev_all_rcu+0x18/0x20
> [  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 [ib_core]
> [  297.049886]  [] ? 
> is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
> ...
> 
> This occurs because vmw_pvrdma on probe stores a pointer to the netdev
> that exists on function 0 of the same bus/device/slot (which represents
> the vmxnet3 ethernet driver).  However, it never removes this pointer if
> the vmxnet3 module is removed, leading to crashes resulting from use
> after free dereferencing incidents like the one above.
> 
> The fix is pretty straightforward.  vmw_pvrdma should listen for
> NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
> block, and update the stored netdev pointer accordingly.  This solution
> has been tested by myself and the reporter with successful results.
> This fix also allows the pvrdma driver to find its underlying ethernet
> device in the event that vmxnet3 is loaded after pvrdma, which it was
> not able to do before.
> 
> Signed-off-by: Neil Horman 
> Reported-by: ruq...@redhat.com
> CC: Adit Ranadive 
> CC: VMware PV-Drivers 
> CC: Doug Ledford 
> CC: Jason Gunthorpe 
> CC: linux-kernel@vger.kernel.org
>  .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
>  1 file changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
> b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> index 0be33a81bbe6..5b4782078a74 100644
> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> @@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr 
> *attr, void **context)
>  }
>  
>  static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
> +   struct net_device *ndev,
> unsigned long event)
>  {
> + struct pci_dev *pdev_net;
> +
> +
>   switch (event) {
>   case NETDEV_REBOOT:
>   case NETDEV_DOWN:
> @@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
> pvrdma_dev *dev,
>   else
>   pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
>   break;
> + case NETDEV_UNREGISTER:
> + dev_put(dev->netdev);
> + dev->netdev = NULL;
> + break;
> + case NETDEV_REGISTER:
> + /* Paired vmxnet3 will have same bus, slot. But func will be 0 
> */
> + pdev_net = pci_get_slot(dev->pdev->bus, 
> PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
> + if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == 
> ndev)) {
> + /* this is our netdev */
> + dev->netdev = ndev;
> + dev_hold(ndev);
> + }
> + pci_dev_put(pdev_net);
> + break;
> +
>   default:
>   dev_dbg(>pdev->dev, "ignore netdevice event %ld on %s\n",
>   event, dev->ib_dev.name);
> @@ -734,8 +753,9 @@ static void pvrdma_netdevice_event_work(struct 
> work_struct *work)
>  
>   mutex_lock(_device_list_lock);
>   list_for_each_entry(dev, _device_list, device_link) {
> - if (dev->netdev == netdev_work->event_netdev) {
> - pvrdma_netdevice_event_handle(dev, netdev_work->event);
> + if ((netdev_work->event == NETDEV_REGISTER) ||
> + (dev->netdev == netdev_work->event_netdev)) {
> + pvrdma_netdevice_event_handle(dev, 
> netdev_work->event_netdev, netdev_work->event);
>   break;
>   }
>   }
> @@ -962,6 +982,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
>   }
>  
>   dev->netdev = pci_get_drvdata(pdev_net);
> + dev_hold(dev->netdev);
>   

Re: [PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Jason Gunthorpe
On Thu, Jun 28, 2018 at 09:59:38AM -0400, Neil Horman wrote:
> On repeated module load/unload cycles, its possible for the pvrmda
> driver to encounter this crash:
> 
> ...
> 297.032448] RIP: 0010:[]  [] 
> netdev_walk_all_upper_dev_rcu+0x50/0xb0
> [  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
> [  297.034986] RAX:  RBX:  RCX: 
> 95087a0c
> [  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 
> 950835d0c000
> [  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: 
> abddacd03f8e0ea0
> [  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 
> 95087a0c
> [  297.039854] R13: 839e44e0 R14: 95087a0c R15: 
> 950835d0c828
> [  297.041071] FS:  () GS:95087fc0() 
> knlGS:
> [  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
> [  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 
> 003607f0
> [  297.044674] DR0:  DR1:  DR2: 
> 
> [  297.045893] DR3:  DR6: fffe0ff0 DR7: 
> 0400
> [  297.047109] Call Trace:
> [  297.047545]  [] netdev_has_upper_dev_all_rcu+0x18/0x20
> [  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 [ib_core]
> [  297.049886]  [] ? 
> is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
> ...
> 
> This occurs because vmw_pvrdma on probe stores a pointer to the netdev
> that exists on function 0 of the same bus/device/slot (which represents
> the vmxnet3 ethernet driver).  However, it never removes this pointer if
> the vmxnet3 module is removed, leading to crashes resulting from use
> after free dereferencing incidents like the one above.
> 
> The fix is pretty straightforward.  vmw_pvrdma should listen for
> NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
> block, and update the stored netdev pointer accordingly.  This solution
> has been tested by myself and the reporter with successful results.
> This fix also allows the pvrdma driver to find its underlying ethernet
> device in the event that vmxnet3 is loaded after pvrdma, which it was
> not able to do before.
> 
> Signed-off-by: Neil Horman 
> Reported-by: ruq...@redhat.com
> CC: Adit Ranadive 
> CC: VMware PV-Drivers 
> CC: Doug Ledford 
> CC: Jason Gunthorpe 
> CC: linux-kernel@vger.kernel.org
>  .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
>  1 file changed, 23 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
> b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> index 0be33a81bbe6..5b4782078a74 100644
> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> @@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr 
> *attr, void **context)
>  }
>  
>  static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
> +   struct net_device *ndev,
> unsigned long event)
>  {
> + struct pci_dev *pdev_net;
> +
> +
>   switch (event) {
>   case NETDEV_REBOOT:
>   case NETDEV_DOWN:
> @@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
> pvrdma_dev *dev,
>   else
>   pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
>   break;
> + case NETDEV_UNREGISTER:
> + dev_put(dev->netdev);
> + dev->netdev = NULL;
> + break;
> + case NETDEV_REGISTER:
> + /* Paired vmxnet3 will have same bus, slot. But func will be 0 
> */
> + pdev_net = pci_get_slot(dev->pdev->bus, 
> PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
> + if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == 
> ndev)) {
> + /* this is our netdev */
> + dev->netdev = ndev;
> + dev_hold(ndev);
> + }
> + pci_dev_put(pdev_net);
> + break;
> +
>   default:
>   dev_dbg(>pdev->dev, "ignore netdevice event %ld on %s\n",
>   event, dev->ib_dev.name);
> @@ -734,8 +753,9 @@ static void pvrdma_netdevice_event_work(struct 
> work_struct *work)
>  
>   mutex_lock(_device_list_lock);
>   list_for_each_entry(dev, _device_list, device_link) {
> - if (dev->netdev == netdev_work->event_netdev) {
> - pvrdma_netdevice_event_handle(dev, netdev_work->event);
> + if ((netdev_work->event == NETDEV_REGISTER) ||
> + (dev->netdev == netdev_work->event_netdev)) {
> + pvrdma_netdevice_event_handle(dev, 
> netdev_work->event_netdev, netdev_work->event);
>   break;
>   }
>   }
> @@ -962,6 +982,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
>   }
>  
>   dev->netdev = pci_get_drvdata(pdev_net);
> + dev_hold(dev->netdev);
>   

[PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Neil Horman
On repeated module load/unload cycles, its possible for the pvrmda
driver to encounter this crash:

...
297.032448] RIP: 0010:[]  [] 
netdev_walk_all_upper_dev_rcu+0x50/0xb0
[  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
[  297.034986] RAX:  RBX:  RCX: 95087a0c
[  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 950835d0c000
[  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: abddacd03f8e0ea0
[  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 95087a0c
[  297.039854] R13: 839e44e0 R14: 95087a0c R15: 950835d0c828
[  297.041071] FS:  () GS:95087fc0() 
knlGS:
[  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
[  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 003607f0
[  297.044674] DR0:  DR1:  DR2: 
[  297.045893] DR3:  DR6: fffe0ff0 DR7: 0400
[  297.047109] Call Trace:
[  297.047545]  [] netdev_has_upper_dev_all_rcu+0x18/0x20
[  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 [ib_core]
[  297.049886]  [] ? 
is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
...

This occurs because vmw_pvrdma on probe stores a pointer to the netdev
that exists on function 0 of the same bus/device/slot (which represents
the vmxnet3 ethernet driver).  However, it never removes this pointer if
the vmxnet3 module is removed, leading to crashes resulting from use
after free dereferencing incidents like the one above.

The fix is pretty straightforward.  vmw_pvrdma should listen for
NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
block, and update the stored netdev pointer accordingly.  This solution
has been tested by myself and the reporter with successful results.
This fix also allows the pvrdma driver to find its underlying ethernet
device in the event that vmxnet3 is loaded after pvrdma, which it was
not able to do before.

Signed-off-by: Neil Horman 
Reported-by: ruq...@redhat.com
CC: Adit Ranadive 
CC: VMware PV-Drivers 
CC: Doug Ledford 
CC: Jason Gunthorpe 
CC: linux-kernel@vger.kernel.org
---
 .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 0be33a81bbe6..5b4782078a74 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, 
void **context)
 }
 
 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
+ struct net_device *ndev,
  unsigned long event)
 {
+   struct pci_dev *pdev_net;
+
+
switch (event) {
case NETDEV_REBOOT:
case NETDEV_DOWN:
@@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
pvrdma_dev *dev,
else
pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
break;
+   case NETDEV_UNREGISTER:
+   dev_put(dev->netdev);
+   dev->netdev = NULL;
+   break;
+   case NETDEV_REGISTER:
+   /* Paired vmxnet3 will have same bus, slot. But func will be 0 
*/
+   pdev_net = pci_get_slot(dev->pdev->bus, 
PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
+   if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == 
ndev)) {
+   /* this is our netdev */
+   dev->netdev = ndev;
+   dev_hold(ndev);
+   }
+   pci_dev_put(pdev_net);
+   break;
+
default:
dev_dbg(>pdev->dev, "ignore netdevice event %ld on %s\n",
event, dev->ib_dev.name);
@@ -734,8 +753,9 @@ static void pvrdma_netdevice_event_work(struct work_struct 
*work)
 
mutex_lock(_device_list_lock);
list_for_each_entry(dev, _device_list, device_link) {
-   if (dev->netdev == netdev_work->event_netdev) {
-   pvrdma_netdevice_event_handle(dev, netdev_work->event);
+   if ((netdev_work->event == NETDEV_REGISTER) ||
+   (dev->netdev == netdev_work->event_netdev)) {
+   pvrdma_netdevice_event_handle(dev, 
netdev_work->event_netdev, netdev_work->event);
break;
}
}
@@ -962,6 +982,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
}
 
dev->netdev = pci_get_drvdata(pdev_net);
+   dev_hold(dev->netdev);
pci_dev_put(pdev_net);
if (!dev->netdev) {
dev_err(>dev, "failed to get vmxnet3 device\n");
-- 
2.17.1



[PATCH] vmw_pvrdma: Release netdev when vmxnet3 module is removed

2018-06-28 Thread Neil Horman
On repeated module load/unload cycles, its possible for the pvrmda
driver to encounter this crash:

...
297.032448] RIP: 0010:[]  [] 
netdev_walk_all_upper_dev_rcu+0x50/0xb0
[  297.034078] RSP: 0018:95087780bd08  EFLAGS: 00010286
[  297.034986] RAX:  RBX:  RCX: 95087a0c
[  297.036196] RDX: 95087a0c RSI: 839e44e0 RDI: 950835d0c000
[  297.037421] RBP: 95087780bd40 R08: 95087a0e0ea0 R09: abddacd03f8e0ea0
[  297.038636] R10: abddacd03f8e0ea0 R11: ef5901e9dbc0 R12: 95087a0c
[  297.039854] R13: 839e44e0 R14: 95087a0c R15: 950835d0c828
[  297.041071] FS:  () GS:95087fc0() 
knlGS:
[  297.042443] CS:  0010 DS:  ES:  CR0: 80050033
[  297.043429] CR2: ffe8 CR3: 7a652000 CR4: 003607f0
[  297.044674] DR0:  DR1:  DR2: 
[  297.045893] DR3:  DR6: fffe0ff0 DR7: 0400
[  297.047109] Call Trace:
[  297.047545]  [] netdev_has_upper_dev_all_rcu+0x18/0x20
[  297.048691]  [] is_eth_port_of_netdev+0x2f/0xa0 [ib_core]
[  297.049886]  [] ? 
is_eth_active_slave_of_bonding_rcu+0x70/0x70 [ib_core]
...

This occurs because vmw_pvrdma on probe stores a pointer to the netdev
that exists on function 0 of the same bus/device/slot (which represents
the vmxnet3 ethernet driver).  However, it never removes this pointer if
the vmxnet3 module is removed, leading to crashes resulting from use
after free dereferencing incidents like the one above.

The fix is pretty straightforward.  vmw_pvrdma should listen for
NETDEV_REGISTER and NETDEV_UNREGISTER events in its event listener code
block, and update the stored netdev pointer accordingly.  This solution
has been tested by myself and the reporter with successful results.
This fix also allows the pvrdma driver to find its underlying ethernet
device in the event that vmxnet3 is loaded after pvrdma, which it was
not able to do before.

Signed-off-by: Neil Horman 
Reported-by: ruq...@redhat.com
CC: Adit Ranadive 
CC: VMware PV-Drivers 
CC: Doug Ledford 
CC: Jason Gunthorpe 
CC: linux-kernel@vger.kernel.org
---
 .../infiniband/hw/vmw_pvrdma/pvrdma_main.c| 25 +--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c 
b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 0be33a81bbe6..5b4782078a74 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -699,8 +699,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, 
void **context)
 }
 
 static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
+ struct net_device *ndev,
  unsigned long event)
 {
+   struct pci_dev *pdev_net;
+
+
switch (event) {
case NETDEV_REBOOT:
case NETDEV_DOWN:
@@ -718,6 +722,21 @@ static void pvrdma_netdevice_event_handle(struct 
pvrdma_dev *dev,
else
pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
break;
+   case NETDEV_UNREGISTER:
+   dev_put(dev->netdev);
+   dev->netdev = NULL;
+   break;
+   case NETDEV_REGISTER:
+   /* Paired vmxnet3 will have same bus, slot. But func will be 0 
*/
+   pdev_net = pci_get_slot(dev->pdev->bus, 
PCI_DEVFN(PCI_SLOT(dev->pdev->devfn), 0));
+   if ((dev->netdev == NULL) && (pci_get_drvdata(pdev_net) == 
ndev)) {
+   /* this is our netdev */
+   dev->netdev = ndev;
+   dev_hold(ndev);
+   }
+   pci_dev_put(pdev_net);
+   break;
+
default:
dev_dbg(>pdev->dev, "ignore netdevice event %ld on %s\n",
event, dev->ib_dev.name);
@@ -734,8 +753,9 @@ static void pvrdma_netdevice_event_work(struct work_struct 
*work)
 
mutex_lock(_device_list_lock);
list_for_each_entry(dev, _device_list, device_link) {
-   if (dev->netdev == netdev_work->event_netdev) {
-   pvrdma_netdevice_event_handle(dev, netdev_work->event);
+   if ((netdev_work->event == NETDEV_REGISTER) ||
+   (dev->netdev == netdev_work->event_netdev)) {
+   pvrdma_netdevice_event_handle(dev, 
netdev_work->event_netdev, netdev_work->event);
break;
}
}
@@ -962,6 +982,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
}
 
dev->netdev = pci_get_drvdata(pdev_net);
+   dev_hold(dev->netdev);
pci_dev_put(pdev_net);
if (!dev->netdev) {
dev_err(>dev, "failed to get vmxnet3 device\n");
-- 
2.17.1