[PATCH 4.15 109/128] net: sched: drop qdisc_reset from dev_graft_qdisc

2018-03-16 Thread Greg Kroah-Hartman
4.15-stable review patch.  If anyone has any objections, please let me know.

--

From: John Fastabend 


[ Upstream commit 7bbde83b1860c28a1cc35516352c4e7e5172c29a ]

In qdisc_graft_qdisc a "new" qdisc is attached and the 'qdisc_destroy'
operation is called on the old qdisc. The destroy operation will wait
a rcu grace period and call qdisc_rcu_free(). At which point
gso_cpu_skb is free'd along with all stats so no need to zero stats
and gso_cpu_skb from the graft operation itself.

Further after dropping the qdisc locks we can not continue to call
qdisc_reset before waiting an rcu grace period so that the qdisc is
detached from all cpus. By removing the qdisc_reset() here we get
the correct property of waiting an rcu grace period and letting the
qdisc_destroy operation clean up the qdisc correctly.

Note, a refcnt greater than 1 would cause the destroy operation to
be aborted however if this ever happened the reference to the qdisc
would be lost and we would have a memory leak.

Signed-off-by: John Fastabend 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
Signed-off-by: Greg Kroah-Hartman 
---
 net/sched/sch_generic.c |   28 +++-
 1 file changed, 19 insertions(+), 9 deletions(-)

--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -762,10 +762,6 @@ struct Qdisc *dev_graft_qdisc(struct net
root_lock = qdisc_lock(oqdisc);
spin_lock_bh(root_lock);
 
-   /* Prune old scheduler */
-   if (oqdisc && refcount_read(>refcnt) <= 1)
-   qdisc_reset(oqdisc);
-
/* ... and graft new one */
if (qdisc == NULL)
qdisc = _qdisc;
@@ -916,6 +912,16 @@ static bool some_qdisc_is_busy(struct ne
return false;
 }
 
+static void dev_qdisc_reset(struct net_device *dev,
+   struct netdev_queue *dev_queue,
+   void *none)
+{
+   struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+   if (qdisc)
+   qdisc_reset(qdisc);
+}
+
 /**
  * dev_deactivate_many - deactivate transmissions on several devices
  * @head: list of devices to deactivate
@@ -926,7 +932,6 @@ static bool some_qdisc_is_busy(struct ne
 void dev_deactivate_many(struct list_head *head)
 {
struct net_device *dev;
-   bool sync_needed = false;
 
list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_deactivate_queue,
@@ -936,20 +941,25 @@ void dev_deactivate_many(struct list_hea
 _qdisc);
 
dev_watchdog_down(dev);
-   sync_needed |= !dev->dismantle;
}
 
/* Wait for outstanding qdisc-less dev_queue_xmit calls.
 * This is avoided if all devices are in dismantle phase :
 * Caller will call synchronize_net() for us
 */
-   if (sync_needed)
-   synchronize_net();
+   synchronize_net();
 
/* Wait for outstanding qdisc_run calls. */
-   list_for_each_entry(dev, head, close_list)
+   list_for_each_entry(dev, head, close_list) {
while (some_qdisc_is_busy(dev))
yield();
+   /* The new qdisc is assigned at this point so we can safely
+* unwind stale skb lists and qdisc statistics
+*/
+   netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
+   if (dev_ingress_queue(dev))
+   dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
+   }
 }
 
 void dev_deactivate(struct net_device *dev)




[PATCH 4.15 109/128] net: sched: drop qdisc_reset from dev_graft_qdisc

2018-03-16 Thread Greg Kroah-Hartman
4.15-stable review patch.  If anyone has any objections, please let me know.

--

From: John Fastabend 


[ Upstream commit 7bbde83b1860c28a1cc35516352c4e7e5172c29a ]

In qdisc_graft_qdisc a "new" qdisc is attached and the 'qdisc_destroy'
operation is called on the old qdisc. The destroy operation will wait
a rcu grace period and call qdisc_rcu_free(). At which point
gso_cpu_skb is free'd along with all stats so no need to zero stats
and gso_cpu_skb from the graft operation itself.

Further after dropping the qdisc locks we can not continue to call
qdisc_reset before waiting an rcu grace period so that the qdisc is
detached from all cpus. By removing the qdisc_reset() here we get
the correct property of waiting an rcu grace period and letting the
qdisc_destroy operation clean up the qdisc correctly.

Note, a refcnt greater than 1 would cause the destroy operation to
be aborted however if this ever happened the reference to the qdisc
would be lost and we would have a memory leak.

Signed-off-by: John Fastabend 
Signed-off-by: David S. Miller 
Signed-off-by: Sasha Levin 
Signed-off-by: Greg Kroah-Hartman 
---
 net/sched/sch_generic.c |   28 +++-
 1 file changed, 19 insertions(+), 9 deletions(-)

--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -762,10 +762,6 @@ struct Qdisc *dev_graft_qdisc(struct net
root_lock = qdisc_lock(oqdisc);
spin_lock_bh(root_lock);
 
-   /* Prune old scheduler */
-   if (oqdisc && refcount_read(>refcnt) <= 1)
-   qdisc_reset(oqdisc);
-
/* ... and graft new one */
if (qdisc == NULL)
qdisc = _qdisc;
@@ -916,6 +912,16 @@ static bool some_qdisc_is_busy(struct ne
return false;
 }
 
+static void dev_qdisc_reset(struct net_device *dev,
+   struct netdev_queue *dev_queue,
+   void *none)
+{
+   struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+
+   if (qdisc)
+   qdisc_reset(qdisc);
+}
+
 /**
  * dev_deactivate_many - deactivate transmissions on several devices
  * @head: list of devices to deactivate
@@ -926,7 +932,6 @@ static bool some_qdisc_is_busy(struct ne
 void dev_deactivate_many(struct list_head *head)
 {
struct net_device *dev;
-   bool sync_needed = false;
 
list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_deactivate_queue,
@@ -936,20 +941,25 @@ void dev_deactivate_many(struct list_hea
 _qdisc);
 
dev_watchdog_down(dev);
-   sync_needed |= !dev->dismantle;
}
 
/* Wait for outstanding qdisc-less dev_queue_xmit calls.
 * This is avoided if all devices are in dismantle phase :
 * Caller will call synchronize_net() for us
 */
-   if (sync_needed)
-   synchronize_net();
+   synchronize_net();
 
/* Wait for outstanding qdisc_run calls. */
-   list_for_each_entry(dev, head, close_list)
+   list_for_each_entry(dev, head, close_list) {
while (some_qdisc_is_busy(dev))
yield();
+   /* The new qdisc is assigned at this point so we can safely
+* unwind stale skb lists and qdisc statistics
+*/
+   netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
+   if (dev_ingress_queue(dev))
+   dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
+   }
 }
 
 void dev_deactivate(struct net_device *dev)