Public bug reported:

Hello!

OVSHybridIptablesFirewallDriver completely hang neutron l3-agent during
a very long time when using a large number of firewall rules (400 in our
case). After debugging, it appears that we are locked in
neutron/agent/linux/iptables_manager.py in function
_generate_chain_diff_iptables_commands and exactly at line "for line in
difflib.ndiff(old_chain_rules, new_chain_rules):".

Ex.

iptables_manager.py(784):     for line in difflib.ndiff(old_chain_rules, 
new_chain_rules):
 --- modulename: difflib, funcname: compare
difflib.py(922):             for line in g:
 --- modulename: difflib, funcname: _dump
difflib.py(927):         for i in xrange(lo, hi):
difflib.py(910):         for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
difflib.py(911):             if tag == 'replace':
difflib.py(912):                 g = self._fancy_replace(a, alo, ahi, b, blo, 
bhi)
difflib.py(922):             for line in g:
 --- modulename: difflib, funcname: _fancy_replace
difflib.py(966):         best_ratio, cutoff = 0.74, 0.75
difflib.py(967):         cruncher = SequenceMatcher(self.charjunk)
 --- modulename: difflib, funcname: __init__
difflib.py(218):         self.isjunk = isjunk
difflib.py(219):         self.a = self.b = None
difflib.py(220):         self.autojunk = autojunk
difflib.py(221):         self.set_seqs(a, b)
 --- modulename: difflib, funcname: set_seqs
difflib.py(232):         self.set_seq1(a)
 --- modulename: difflib, funcname: set_seq1
difflib.py(256):         if a is self.a:
difflib.py(258):         self.a = a
difflib.py(259):         self.matching_blocks = self.opcodes = None
difflib.py(233):         self.set_seq2(b)
 --- modulename: difflib, funcname: set_seq2
difflib.py(282):         if b is self.b:
difflib.py(284):         self.b = b
difflib.py(285):         self.matching_blocks = self.opcodes = None
difflib.py(286):         self.fullbcount = None
difflib.py(287):         self.__chain_b()
 --- modulename: difflib, funcname: __chain_b
difflib.py(317):         b = self.b
difflib.py(318):         self.b2j = b2j = {}
difflib.py(320):         for i, elt in enumerate(b):
difflib.py(325):         junk = set()
difflib.py(326):         isjunk = self.isjunk
difflib.py(327):         if isjunk:
difflib.py(328):             for elt in list(b2j.keys()):  # using list() since 
b2j is modified
difflib.py(334):         popular = set()
difflib.py(335):         n = len(b)
difflib.py(336):         if self.autojunk and n >= 200:
difflib.py(347):         self.isbjunk = junk.__contains__
difflib.py(348):         self.isbpopular = popular.__contains__
difflib.py(968):         eqi, eqj = None, None   # 1st indices of equal lines 
(if any)
difflib.py(973):         for j in xrange(blo, bhi):
difflib.py(974):             bj = b[j]
difflib.py(975):             cruncher.set_seq2(bj)
 --- modulename: difflib, funcname: set_seq2
difflib.py(282):         if b is self.b:
difflib.py(284):         self.b = b

[...]

difflib.py(418):             for j in b2j.get(a[i], nothing):
difflib.py(420):                 if j < blo:
difflib.py(422):                 if j >= bhi:
difflib.py(424):                 k = newj2len[j] = j2lenget(j-1, 0) + 1
difflib.py(425):                 if k > bestsize:
difflib.py(418):             for j in b2j.get(a[i], nothing):
difflib.py(420):                 if j < blo:
difflib.py(422):                 if j >= bhi:
difflib.py(424):                 k = newj2len[j] = j2lenget(j-1, 0) + 1
difflib.py(425):                 if k > bestsize:
difflib.py(418):             for j in b2j.get(a[i], nothing):
difflib.py(420):                 if j < blo:
difflib.py(422):                 if j >= bhi:
difflib.py(424):                 k = newj2len[j] = j2lenget(j-1, 0) + 1
difflib.py(425):                 if k > bestsize:
difflib.py(418):             for j in b2j.get(a[i], nothing):
difflib.py(420):                 if j < blo:

...


In facts, it seems that performance of difflib.ndiff decrease rapidly with 
length of arrays. But, if I am right, finally we just add a -D rule_number ... 
when a rule has to be deleted and -I rule_number ... when a rule has to be 
added. So, we have to do both (-D / -I) when a new rule is different, nothing 
if rule is the same, only -D when we have less rules than before and -I when we 
have more rules than before.

Couldn't we change the algorithm with something like that (which is more
than 100x faster) or perhaps something else better ?

def _generate_chain_diff_iptables_commands(chain, old_chain_rules,
                                          new_chain_rules):
    statements = []

    for i in range(0, max(len(old_chain_rules), len(new_chain_rules))):
      if i >= len(old_chain_rules):
           rule = new_chain_rules[i][5:].split(' ', 1)[-1]
           # rule inserted at this position
           statements.append('-I %s %d %s' % (chain, i+1, rule))
      elif i >= len(new_chain_rules):
         statements.append('-D %s %d' % (chain, i+1))
      elif old_chain_rules[i] == new_chain_rules[i]:
         continue
      else:
         statements.append('-D %s %d' % (chain, i+1))
 
         rule = new_chain_rules[i][5:].split(' ', 1)[-1]
         # rule inserted at this position
         statements.append('-I %s %d %s' % (chain, i+1, rule))
    return statements

** Affects: neutron
     Importance: Undecided
         Status: New

-- 
You received this bug notification because you are a member of Yahoo!
Engineering Team, which is subscribed to neutron.
https://bugs.launchpad.net/bugs/1618430

Title:
  OVSHybridIptablesFirewallDriver could hang neutron l3-agent

Status in neutron:
  New

Bug description:
  Hello!

  OVSHybridIptablesFirewallDriver completely hang neutron l3-agent
  during a very long time when using a large number of firewall rules
  (400 in our case). After debugging, it appears that we are locked in
  neutron/agent/linux/iptables_manager.py in function
  _generate_chain_diff_iptables_commands and exactly at line "for line
  in difflib.ndiff(old_chain_rules, new_chain_rules):".

  Ex.

  iptables_manager.py(784):     for line in difflib.ndiff(old_chain_rules, 
new_chain_rules):
   --- modulename: difflib, funcname: compare
  difflib.py(922):             for line in g:
   --- modulename: difflib, funcname: _dump
  difflib.py(927):         for i in xrange(lo, hi):
  difflib.py(910):         for tag, alo, ahi, blo, bhi in 
cruncher.get_opcodes():
  difflib.py(911):             if tag == 'replace':
  difflib.py(912):                 g = self._fancy_replace(a, alo, ahi, b, blo, 
bhi)
  difflib.py(922):             for line in g:
   --- modulename: difflib, funcname: _fancy_replace
  difflib.py(966):         best_ratio, cutoff = 0.74, 0.75
  difflib.py(967):         cruncher = SequenceMatcher(self.charjunk)
   --- modulename: difflib, funcname: __init__
  difflib.py(218):         self.isjunk = isjunk
  difflib.py(219):         self.a = self.b = None
  difflib.py(220):         self.autojunk = autojunk
  difflib.py(221):         self.set_seqs(a, b)
   --- modulename: difflib, funcname: set_seqs
  difflib.py(232):         self.set_seq1(a)
   --- modulename: difflib, funcname: set_seq1
  difflib.py(256):         if a is self.a:
  difflib.py(258):         self.a = a
  difflib.py(259):         self.matching_blocks = self.opcodes = None
  difflib.py(233):         self.set_seq2(b)
   --- modulename: difflib, funcname: set_seq2
  difflib.py(282):         if b is self.b:
  difflib.py(284):         self.b = b
  difflib.py(285):         self.matching_blocks = self.opcodes = None
  difflib.py(286):         self.fullbcount = None
  difflib.py(287):         self.__chain_b()
   --- modulename: difflib, funcname: __chain_b
  difflib.py(317):         b = self.b
  difflib.py(318):         self.b2j = b2j = {}
  difflib.py(320):         for i, elt in enumerate(b):
  difflib.py(325):         junk = set()
  difflib.py(326):         isjunk = self.isjunk
  difflib.py(327):         if isjunk:
  difflib.py(328):             for elt in list(b2j.keys()):  # using list() 
since b2j is modified
  difflib.py(334):         popular = set()
  difflib.py(335):         n = len(b)
  difflib.py(336):         if self.autojunk and n >= 200:
  difflib.py(347):         self.isbjunk = junk.__contains__
  difflib.py(348):         self.isbpopular = popular.__contains__
  difflib.py(968):         eqi, eqj = None, None   # 1st indices of equal lines 
(if any)
  difflib.py(973):         for j in xrange(blo, bhi):
  difflib.py(974):             bj = b[j]
  difflib.py(975):             cruncher.set_seq2(bj)
   --- modulename: difflib, funcname: set_seq2
  difflib.py(282):         if b is self.b:
  difflib.py(284):         self.b = b

  [...]

  difflib.py(418):             for j in b2j.get(a[i], nothing):
  difflib.py(420):                 if j < blo:
  difflib.py(422):                 if j >= bhi:
  difflib.py(424):                 k = newj2len[j] = j2lenget(j-1, 0) + 1
  difflib.py(425):                 if k > bestsize:
  difflib.py(418):             for j in b2j.get(a[i], nothing):
  difflib.py(420):                 if j < blo:
  difflib.py(422):                 if j >= bhi:
  difflib.py(424):                 k = newj2len[j] = j2lenget(j-1, 0) + 1
  difflib.py(425):                 if k > bestsize:
  difflib.py(418):             for j in b2j.get(a[i], nothing):
  difflib.py(420):                 if j < blo:
  difflib.py(422):                 if j >= bhi:
  difflib.py(424):                 k = newj2len[j] = j2lenget(j-1, 0) + 1
  difflib.py(425):                 if k > bestsize:
  difflib.py(418):             for j in b2j.get(a[i], nothing):
  difflib.py(420):                 if j < blo:

  ...

  
  In facts, it seems that performance of difflib.ndiff decrease rapidly with 
length of arrays. But, if I am right, finally we just add a -D rule_number ... 
when a rule has to be deleted and -I rule_number ... when a rule has to be 
added. So, we have to do both (-D / -I) when a new rule is different, nothing 
if rule is the same, only -D when we have less rules than before and -I when we 
have more rules than before.

  Couldn't we change the algorithm with something like that (which is
  more than 100x faster) or perhaps something else better ?

  def _generate_chain_diff_iptables_commands(chain, old_chain_rules,
                                            new_chain_rules):
      statements = []

      for i in range(0, max(len(old_chain_rules), len(new_chain_rules))):
        if i >= len(old_chain_rules):
             rule = new_chain_rules[i][5:].split(' ', 1)[-1]
             # rule inserted at this position
             statements.append('-I %s %d %s' % (chain, i+1, rule))
        elif i >= len(new_chain_rules):
           statements.append('-D %s %d' % (chain, i+1))
        elif old_chain_rules[i] == new_chain_rules[i]:
           continue
        else:
           statements.append('-D %s %d' % (chain, i+1))
   
           rule = new_chain_rules[i][5:].split(' ', 1)[-1]
           # rule inserted at this position
           statements.append('-I %s %d %s' % (chain, i+1, rule))
      return statements

To manage notifications about this bug go to:
https://bugs.launchpad.net/neutron/+bug/1618430/+subscriptions

-- 
Mailing list: https://launchpad.net/~yahoo-eng-team
Post to     : yahoo-eng-team@lists.launchpad.net
Unsubscribe : https://launchpad.net/~yahoo-eng-team
More help   : https://help.launchpad.net/ListHelp

Reply via email to