BBlack has submitted this change and it was merged. Change subject: Add optional RSS setup to interface RPS script ......................................................................
Add optional RSS setup to interface RPS script Adds a second parameter as an RSS pattern match in /proc/interrupts for finding the RSS IRQs that match the RPS queues. If specified, it will be used to do matching RSS+RPS assignments to the CPUs. The pattern must contain a "%d" placeholder for the location of the queue number in the interrupt action name. Unfortunately I haven't found any generic way to figure out the RSS IRQ action name automagically, or to avoid text-parsing /proc/interrupts. The parameter, when used, will be driver/hardware -specific. This also disables irqbalance if RSS is in use. Change-Id: Ib51ad49935e86d858362c5f8df3d6c16bbe21a6b --- M modules/interface/files/interface-rps.py M modules/interface/manifests/rps.pp M modules/interface/templates/enable-rps.conf.erb A modules/irqbalance/manifests/disable.pp A modules/irqbalance/manifests/init.pp 5 files changed, 77 insertions(+), 15 deletions(-) Approvals: BBlack: Verified; Looks good to me, approved jenkins-bot: Verified diff --git a/modules/interface/files/interface-rps.py b/modules/interface/files/interface-rps.py index 4efccb1..8dda9a5 100755 --- a/modules/interface/files/interface-rps.py +++ b/modules/interface/files/interface-rps.py @@ -6,12 +6,17 @@ # what's common advice out there (all CPUs to all queues), as experience has # shown a tremendous difference. # -# Author: Faidon Liambotis -# Copyright (c) 2013 Wikimedia Foundation, Inc. +# Also sets up matching Receive Side Scaling (RSS) IRQ affinities if +# given a second parameter for lookups in /proc/interrupts. e.g. for +# bnx2x, this would be "eth0-fp-%d". +# +# Authors: Faidon Liambotis and Brandon Black +# Copyright (c) 2013-2014 Wikimedia Foundation, Inc. import os import glob import sys +import re def get_value(path): @@ -49,10 +54,35 @@ 'rx-*')) rx_queues = [int(os.path.basename(q)[3:]) for q in rx_nodes] - return rx_queues + return sorted(rx_queues) -def assign_rx_queue_to_cpus(device, rx_queue, cpus): +def get_rx_irqs(rss_pattern, rx_queues): + """Find RSS IRQs for rx queues matching rss_pattern (e.g. 'eth0-fp-%d')""" + + # create a dictionary of rxq:rx_irq, e.g. + # { 0: '128', 1: '129', 2: '130', ... } + irqs = {} + rss_pat_asre = re.sub('%d', r'(\d+)', rss_pattern) + rss_re = re.compile(r'^\s*([0-9]+):.*\s' + rss_pat_asre + r'\n$') + irq_file = open('/proc/interrupts', 'r') + for line in irq_file: + match = rss_re.match(line) + if match: + irqs[int(match.group(2))] = match.group(1) + + # If we don't get an *exact* match for the rx_queues list, give up + if len(irqs) != len(rx_queues): + raise Exception('RSS IRQ count mismatch for pattern %s' % rss_pattern) + for rxq in rx_queues: + if rxq not in irqs: + raise Exception('RSS IRQ missing for queue %d' % rxq) + + # Return a dict of rxq:rx_irq that matches rx_queues + return irqs + + +def assign_rx_queue_to_cpus(device, cpus, rx_queue, rx_irq): """Assign a device's RX queue to a CPU set""" bitmask = 0 for cpu in cpus: @@ -60,11 +90,14 @@ rx_node = os.path.join('/sys/class/net', device, 'queues', 'rx-%s' % rx_queue, 'rps_cpus') - write_value(rx_node, format(bitmask, 'x')) + if rx_irq: + irq_node = '/proc/irq/%s/smp_affinity' % rx_irq + write_value(irq_node, format(bitmask, 'x')) -def distribute_rx_queues_to_cpus(device, rx_queues, cpu_list): + +def distribute_rx_queues_to_cpus(device, cpu_list, rx_queues, rx_irqs): """Performs a smart distribution of RX queues to CPUs (or vice-versa)""" if len(rx_queues) >= len(cpu_list): # try to divide queues / CPUs and assign N CPUs per queue, isolated @@ -73,7 +106,7 @@ for i, cpu in enumerate(cpu_list): for j in range(quot): rxq = rx_queues[i*quot + j] - assign_rx_queue_to_cpus(device, rxq, [cpu]) + assign_rx_queue_to_cpus(device, [cpu], rxq, rx_irqs[rxq]) # if there are remainder queues, split CPU list into rem subgroups # (with trailing remainder of CPUs left out), one per queue @@ -81,7 +114,7 @@ cquot = len(cpu_list)/rem for i, rxq in enumerate(rx_queues[-rem:]): cpu_sublist = cpu_list[i * cquot:(i + 1) * cquot] - assign_rx_queue_to_cpus(device, rxq, cpu_sublist) + assign_rx_queue_to_cpus(device, cpu_sublist, rxq, rx_irqs[rxq]) else: # do the opposite division @@ -92,7 +125,7 @@ cpus = [] for j in range(quot): cpus.append(cpu_list[i*quot + j]) - assign_rx_queue_to_cpus(device, rxq, cpus) + assign_rx_queue_to_cpus(device, cpus, rxq, rx_irqs[rxq]) def main(): @@ -102,11 +135,23 @@ except IndexError: device = 'eth0' + try: + rss_pattern = sys.argv[2] + except IndexError: + rss_pattern = None + cpu_list = get_cpu_list() rx_queues = get_rx_queues(device) - distribute_rx_queues_to_cpus(device, rx_queues, cpu_list) + if rss_pattern: + if rss_pattern.count('%') != 1 or rss_pattern.count('%d') != 1: + raise Exception('The RSS pattern must contain a single %d') + rx_irqs = get_rx_irqs(rss_pattern, rx_queues) + else: + # fill in a dict of None to simplify distribution code + rx_irqs = {rxq: None for rxq in rx_queues} + distribute_rx_queues_to_cpus(device, cpu_list, rx_queues, rx_irqs) if __name__ == '__main__': main() diff --git a/modules/interface/manifests/rps.pp b/modules/interface/manifests/rps.pp index 7c09a13..3092e6a 100644 --- a/modules/interface/manifests/rps.pp +++ b/modules/interface/manifests/rps.pp @@ -1,14 +1,24 @@ # Definition: interface::rps # -# Automagically sets RPS for an interface +# Automagically sets RPS (and optionally, RSS) for an interface # # Parameters: # - $interface: # The network interface to operate on -define interface::rps { +# - $rss_pattern: +# Optional RSS IRQ name pattern +# If set (to hw-specific value), RSS will be enabled as well +# Must contain a single "%d" format character for the queue number +# (on bnx2x, this would be "eth0-fp-%d") +define interface::rps( $rss_pattern="" ) { require interface::rpstools $interface = $title + + # Disable irqbalance if RSS in use + if $rss_pattern != "" { + require irqbalance::disable + } file { "/etc/init/enable-rps-$interface.conf": owner => 'root', @@ -18,7 +28,7 @@ } exec { "interface-rps $interface": - command => "/usr/local/sbin/interface-rps $interface", + command => "/usr/local/sbin/interface-rps $interface $rss_pattern", subscribe => File["/etc/init/enable-rps-$interface.conf"], require => File["/etc/init/enable-rps-$interface.conf"], } diff --git a/modules/interface/templates/enable-rps.conf.erb b/modules/interface/templates/enable-rps.conf.erb index 9368149..9282196 100644 --- a/modules/interface/templates/enable-rps.conf.erb +++ b/modules/interface/templates/enable-rps.conf.erb @@ -1,9 +1,9 @@ # enable-rps -description "Enable RPS on <%= @interface %> RX queues" +description "Enable RPS/RSS on <%= @interface %> RX queues" start on filesystem task script - interface-rps <%= @interface %> + interface-rps <%= @interface %> <%= @rss_pattern %> end script diff --git a/modules/irqbalance/manifests/disable.pp b/modules/irqbalance/manifests/disable.pp new file mode 100644 index 0000000..a216212 --- /dev/null +++ b/modules/irqbalance/manifests/disable.pp @@ -0,0 +1,7 @@ + +class irqbalance::disable { + service { "irqbalance": + enable => false, + ensure => stopped, + } +} diff --git a/modules/irqbalance/manifests/init.pp b/modules/irqbalance/manifests/init.pp new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/modules/irqbalance/manifests/init.pp -- To view, visit https://gerrit.wikimedia.org/r/135931 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ib51ad49935e86d858362c5f8df3d6c16bbe21a6b Gerrit-PatchSet: 5 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: BBlack <bbl...@wikimedia.org> Gerrit-Reviewer: BBlack <bbl...@wikimedia.org> Gerrit-Reviewer: Faidon Liambotis <fai...@wikimedia.org> Gerrit-Reviewer: Mark Bergsma <m...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits