[PATCH v2 net-next] virtio: Fix affinity for #VCPUs != #queue pairs

2017-02-07 Thread Ben Serebrin
From: Benjamin Serebrin <sereb...@google.com>

If the number of virtio queue pairs is not equal to the
number of VCPUs, the virtio guest driver doesn't assign
any CPU affinity for the queue interrupts or the xps
aggregation interrupt.  (In contrast, the driver does assign
both if the counts of VCPUs and queues are equal, which is a good
default behavior.)

Google Compute Engine currently provides 1 queue pair for
every VCPU, but limits that at a maximum of 32 queue pairs.

This code extends the driver's default interrupt affinity
and transmit affinity settings for the case where there
are mismatching queue and VCPU counts.  Userspace affinity
adjustment may always be needed to tune for a given workload.

Tested:

(on a 64-VCPU VM with debian 8, jessie-backports 4.9.2)

Without the fix we see all queues affinitized to all CPUs:

cd /proc/irq
for i in `seq 24 92` ; do sudo grep ".*" $i/smp_affinity_list;  done
0-63
[...]
0-63

and we see all TX queues' xps_cpus affinitzed to no cores:

for i in `seq 0 31` ; do sudo grep ".*" tx-$i/xps_cpus; done
,
[...]
,

With the fix, we see each queue assigned to the a single core,
and xps affinity set to 1 unique cpu per TX queue.

64 VCPU:

cd /proc/irq
for i in `seq 24 92` ; do sudo grep ".*" $i/smp_affinity_list;  done

0-63
0
0
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
20
20
21
21
22
22
23
23
24
24
25
25
26
26
27
27
28
28
29
29
30
30
31
31
0-63
0-63
0-63
0-63

cd /sys/class/net/eth0/queues
for i in `seq 0 31` ; do sudo grep ".*" tx-$i/xps_cpus;  done

0001,0001
0002,0002
0004,0004
0008,0008
0010,0010
0020,0020
0040,0040
0080,0080
0100,0100
0200,0200
0400,0400
0800,0800
1000,1000
2000,2000
4000,4000
8000,8000
0001,0001
0002,0002
0004,0004
0008,0008
0010,0010
0020,0020
0040,0040
0080,0080
0100,0100
0200,0200
0400,0400
0800,0800
1000,1000
2000,2000
4000,4000
8000,8000

48 VCPU:

cd /proc/irq
for i in `seq 24 92` ; do sudo grep ".*" $i/smp_affinity_list;  done
0-47
0
0
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
20
20
21
21
22
22
23
23
24
24
25
25
26
26
27
27
28
28
29
29
30
30
31
31
0-47
0-47
0-47
0-47

cd /sys/class/net/eth0/queues
for i in `seq 0 31` ; do sudo grep ".*" tx-$i/xps_cpus;  done

0001,0001
0002,0002
0004,0004
0008,0008
0010,0010
0020,0020
0040,0040
0080,0080
0100,0100
0200,0200
0400,0400
0800,0800
1000,1000
2000,2000
4000,4000
8000,8000
,0001
,0002
,0004
,0008
,0010
,0020
,0040
,0080
,0100
,0200
,0400
,0800
,1000
,2000
,4000
,8000

Acked-by: Willem de Bruijn <will...@google.com>
Acked-by: Jim Mattson <jmatt...@google.com>
Acked-by: Venkatesh Srinivas <venkate...@google.com>

Signed-off-by: Ben Serebrin <sereb...@google.com>
---
 drivers/net/virtio_net.c | 30 +++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 765c2d6358da..0dc3a102bfc4 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1502,20 +1502,44 @@ static void virtnet_set_affinity(struct virtnet_info 
*vi)
 * queue pairs, we let the queue pairs to be private to one cpu by
 * setting the affinity hint to eliminate the contention.
 */
-   if (vi->curr_queue_pairs == 1 ||
-   vi->max_queue_pairs != num_online_cpus()) {
+   if (vi->curr_queue_pairs == 1) {
virtnet_clean_affinity(vi, -1);
return;
}
 
+   /* If there are more cpus than queues, then assign the queues'
+* interrupts to the first cpus until we run out.
+*/
i = 0;
for_each_online_cpu(cpu) {
+   if (i == vi->max_queue_pairs)
+   break;
virtqueue_set_affinity(vi->rq[i].vq, cpu);
virtqueue_set_affinity(vi->sq[i].vq, cpu);
-   netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
i++;
}
 
+   /* Stripe the XPS affinities across the online CPUs.
+* Hyperthread pairs are typically assigned such that Linux's
+* CPU X and X + (numcpus / 2) are hyperthread twins, so we cause
+* hyperthread twins to share TX queues, in the case where there are
+* more cpus than queues.
+*/
+   for (i = 0; i < vi->max_queue_pairs; i++) {
+   struct cpumask mask;
+   int skip =

[PATCH net-next] virtio: Fix affinity for >32 VCPUs

2017-02-02 Thread Ben Serebrin
From: Benjamin Serebrin <sereb...@google.com>

If the number of virtio queue pairs is not equal to the
number of VCPUs, the virtio guest driver doesn't assign
any CPU affinity for the queue interrupts or the xps
aggregation interrupt.

Google Compute Engine currently provides 1 queue pair for
every VCPU, but limits that at a maximum of 32 queue pairs.

This code assigns interrupt affinity even when there are more than
32 VCPUs.

Tested:

(on a 64-VCPU VM with debian 8, jessie-backports 4.9.2)

Without the fix we see all queues affinitized to all CPUs:

cd /proc/irq
for i in `seq 24 92` ; do sudo grep ".*" $i/smp_affinity_list;  done
0-63
[...]
0-63

and we see all TX queues' xps_cpus affinitzed to no cores:

for i in `seq 0 31` ; do sudo grep ".*" tx-$i/xps_cpus; done
,
[...]
,

With the fix, we see each queue assigned to the a single core,
and xps affinity set to 1 unique cpu per TX queue.

64 VCPU:

cd /proc/irq
for i in `seq 24 92` ; do sudo grep ".*" $i/smp_affinity_list;  done

0-63
0
0
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
20
20
21
21
22
22
23
23
24
24
25
25
26
26
27
27
28
28
29
29
30
30
31
31
0-63
0-63
0-63
0-63

cd /sys/class/net/eth0/queues
for i in `seq 0 31` ; do sudo grep ".*" tx-$i/xps_cpus;  done

0001,0001
0002,0002
0004,0004
0008,0008
0010,0010
0020,0020
0040,0040
0080,0080
0100,0100
0200,0200
0400,0400
0800,0800
1000,1000
2000,2000
4000,4000
8000,8000
0001,0001
0002,0002
0004,0004
0008,0008
0010,0010
0020,0020
0040,0040
0080,0080
0100,0100
0200,0200
0400,0400
0800,0800
1000,1000
2000,2000
4000,4000
8000,8000

48 VCPU:

cd /proc/irq
for i in `seq 24 92` ; do sudo grep ".*" $i/smp_affinity_list;  done
0-47
0
0
1
1
2
2
3
3
4
4
5
5
6
6
7
7
8
8
9
9
10
10
11
11
12
12
13
13
14
14
15
15
16
16
17
17
18
18
19
19
20
20
21
21
22
22
23
23
24
24
25
25
26
26
27
27
28
28
29
29
30
30
31
31
0-47
0-47
0-47
0-47

cd /sys/class/net/eth0/queues
for i in `seq 0 31` ; do sudo grep ".*" tx-$i/xps_cpus;  done

0001,0001
0002,0002
0004,0004
0008,0008
0010,0010
0020,0020
0040,0040
0080,0080
0100,0100
0200,0200
0400,0400
0800,0800
1000,1000
2000,2000
4000,4000
8000,8000
,0001
,0002
,0004
,0008
,0010
,0020
,0040
,0080
,0100
,0200
,0400
,0800
,10000000
,2000
,4000
,8000

Signed-off-by: Ben Serebrin <sereb...@google.com>
Acked-by: Willem de Bruijn <will...@google.com>
Acked-by: Jim Mattson <jmatt...@google.com>
Acked-by: Venkatesh Srinivas <venkate...@google.com>

Effort: kvm
---
 drivers/net/virtio_net.c | 30 +++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 765c2d6358da..0dc3a102bfc4 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1502,20 +1502,44 @@ static void virtnet_set_affinity(struct virtnet_info 
*vi)
 * queue pairs, we let the queue pairs to be private to one cpu by
 * setting the affinity hint to eliminate the contention.
 */
-   if (vi->curr_queue_pairs == 1 ||
-   vi->max_queue_pairs != num_online_cpus()) {
+   if (vi->curr_queue_pairs == 1) {
virtnet_clean_affinity(vi, -1);
return;
}
 
+   /* If there are more cpus than queues, then assign the queues'
+* interrupts to the first cpus until we run out.
+*/
i = 0;
for_each_online_cpu(cpu) {
+   if (i == vi->max_queue_pairs)
+   break;
virtqueue_set_affinity(vi->rq[i].vq, cpu);
virtqueue_set_affinity(vi->sq[i].vq, cpu);
-   netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
i++;
}
 
+   /* Stripe the XPS affinities across the online CPUs.
+* Hyperthread pairs are typically assigned such that Linux's
+* CPU X and X + (numcpus / 2) are hyperthread twins, so we cause
+* hyperthread twins to share TX queues, in the case where there are
+* more cpus than queues.
+*/
+   for (i = 0; i < vi->max_queue_pairs; i++) {
+   struct cpumask mask;
+   int skip = i;
+
+   cpumask_clear();
+   for_each_online_cpu(cpu) {
+   while (skip--)
+   cpu = cpumask_next(cpu, cpu_online_mask);
+   if (cpu < num_possibl