sorting SNMP OIDs fast in scripting languages

Kragen Sitaker Fri, 30 Jan 2004 01:39:48 -0800

Like other things posted here without notices to the contrary, this
code is in the public domain.


Here's qd.py:

def chomp(text):
    while text[-1:] in ['\r', '\n']: text = text[:-1]
    return text

assert chomp('a\n') == 'a'
assert chomp('\n') == ''
assert chomp('') == ''

def untabify(text):
    def dumb_tab_exp(char):
        if char != '\t': return char
        return '    '
    return ''.join(map(dumb_tab_exp, text))


And here's some code that sorts a bunch of OIDs lexically, using
qd.py:

#!/usr/bin/python
import qd, sys, time
oids = map(qd.chomp, file(sys.argv[1]))
#for oid in oids: print oid
def sortem(oids):
    split_oids = [[int(num) for num in oid.split('.') if num != ''] for oid in oids]
    #split_oids.sort()
    return ['.' + '.'.join([str(edge) for edge in oid]) for oid in split_oids]
nn = 30
start = time.time()
for ii in range(nn): sortem(oids)
# This says 0.754 per second, or 1326 ms per.
# If we eliminate the sort, it's 0.884, or 1131 ms per.
# The conversion costs totally overwhelm the sorting time.
print nn / (time.time() - start)
#for oid in sortem(oids): print oid

Here's code that compares the speed of several different ways to sort
a bunch of OIDs lexically:

#!/usr/bin/perl -w
use strict;
# I can't believe this isn't included in the standard Perl distribution!!
use Time::HiRes;
use Benchmark qw(timethese);
use Data::Dumper;
use lib '../pkgs/Net-SNMP-4.1.2/lib';  # probably this is only useful to me
use Net::SNMP ();

# sorts 3877 SNMP OIDs in a bunch of different ways.

# N lg N for 3877 is 46254 or so.  The best approach I've come with so
# far is 0.33 seconds for all of that, which is 7 milliseconds per
# comparison or so.  What crappy-ass performance!

{
  no warnings 'uninitialized';
  sub _oid_cmp {
    my @achunks = split /\./, $a;
    my @bchunks = split /\./, $b;
    my $len = $#achunks < $#bchunks ? $#achunks : $#bchunks;
    my $result;
    for (0..$len) {
      $result = $achunks[$_] <=> $bchunks[$_];
      return $result if $result;
    }
    return @achunks <=> @bchunks;
  }

  sub dumbcache {
    my ($list) = @_;
    my %cache;
    return sort {
      ($cache{$a} ||= pack "w*", split /\./, $a)
        cmp
          ($cache{$b} ||= pack "w*", split /\./, $b)
        } @$list;
  }

  {
    my %cheat;
    sub cheatdumbcache {
      my ($list) = @_;
      return sort {
        ($cheat{$a} ||= pack "w*", split /\./, $a)
          cmp
            ($cheat{$b} ||= pack "w*", split /\./, $b)
          } @$list;
    }
  }

  sub grt {
    my ($list) = @_;
    my @maplist = map { pack "N*", split /\./ } @$list;
    return map { '.' . join '.', unpack "xxxxN*", $_ } sort @maplist;
  }

  sub hashgrt {
    my ($list) = @_;
    my %hash = map { ((pack "N*", split /\./) => $_) } @$list;
    #print Dumper \%hash;
    return map { $hash{$_} } sort keys %hash;
  }

  sub hashgrt2 {
    my ($list) = @_;
    my %hash;
    for (@$list) {
      $hash{pack "N*", split /\./} = $_;
    }
    return map { $hash{$_} } sort keys %hash;
  }

  sub berhashgrt {
    my ($list) = @_;
    my %hash = map { ((pack "w*", split /\./) => $_) } @$list;
    #print Dumper \%hash;
    return map { $hash{$_} } sort keys %hash;
  }

  sub berhashgrt2 {
    my ($list) = @_;
    my %hash;
    for my $oid (@$list) {
      $hash{pack 'w*', split /\./, $oid} = $oid;
    }
    return map { $hash{$_} } sort keys %hash;
  }

  sub berhashgrt2_nosort {
    my ($list) = @_;
    my %hash;
    for my $oid (@$list) {
      $hash{pack 'w*', split /\./, $oid} = $oid;
    }
    return map { $hash{$_} } keys %hash;
  }
}

my %attempts = (
                # 0.83 per second:
                orcish2 => sub {
                  my ($list) = @_;
                  my %orcish;
                  return sort { $orcish{$a}{$b} ||= _oid_cmp } @$list;
                },
                # 0.85 per second:
                orcish => sub {
                  my ($list) = @_;
                  my %orcish;
                  return sort { $orcish{"$a $b"} ||= _oid_cmp } @$list;
                },
                # 0.94 per second:
                dumb => sub {
                  my ($list) = @_;
                  return sort _oid_cmp @$list;
                  my @rv = sort _oid_cmp @$list;
                },
                # 1.4 per second:
                re_grt => sub {
                  my ($list) = @_;
                  my @maplist = map { pack "w*", /(\d+)/g } @$list;
                  return map { '.' . join '.', unpack 'w*', $_ } sort @maplist;
                },
                # 1.8 per second:
                grt => \&grt,
                # 2.4 per second:
                hashgrt => \&hashgrt,
                # 2.4 per second:
                berhashgrt => \&berhashgrt,
                # 2.7 per second:
                net_snmp => sub {
                  my ($list) = @_;
                  return Net::SNMP::oid_lex_sort(@$list);
                },
                # 2.8 per second:
                hashgrt2 => \&hashgrt2,
                # 2.9 per second:
                # that means 345 ms each, 28 ms more than not even sorting.
                berhashgrt2 => \&berhashgrt2,
                # try to make 'dumb' a little smarter
                # 3.0 per second:
                dumbcache => \&dumbcache,
                # 3.15 per second, but doesn't work:
                # that means 317 ms each.
                berhashgrt2_nosort => \&berhashgrt2_nosort,
                # 8.0/s, but doesn't actually work
                unixsort => sub {
                  my ($list) = @_;
                  open FOO, ">foo.tmp";
                  print FOO map { $_, "\n" } @$list;
                  my @results = `sort -n foo.tmp`;
                  chomp @results;
                  return @results;
                },
                # 16 per second
                # like dumbcache, but doesn't regenerate cache each time
                cheatdumbcache => \&cheatdumbcache,
);


my @testdata = (qw(
  .1.3.6.1.4.1.522.3.4.5.1.3
  .1
  .12
  .1.3.6.1.4.1.522.3.4.6.1.3
  .1.3.6.1.4.1.522.3.4.7.1.23
  .1.3.6.1.4.1.522.3.4.4.1.3
  .1.3.6.1.4.1.522.3.4.4.1.23
  .1.3.6.1.4.1.522.3.4.3.1.3
  .1.3.6.1.4.1.522.3.4.3.1.23
  .1.3.6.1.4.1.522.3.4.2.1.3
  .1.3.6.1.4.1.522.3.4.2.1.23
  .1.3.6.1.4.1.522.1.4.2.1.2.3.2.15
  .1.3.6.1.4.1.522.1.4.2.1.2.3.2.16
  .1.3.6.1.4.1.522.1.4.2.1.2.3.2.17
  .1.3.6.1.4.1.522.1.4.2.1.2.3.2.18
  .1.3.6.1.4.1.522.1.4.2.1.2.3.2.19
  .1.3.6.1.4.1.522.1.4.2.1.2.3.2.20
  .1.3.6.1.4.1.522.1.4.2.1.3.3.2.15
  .1.3.6.1.4.1.522.1.4.2.1.3.3.2.16
  .1.3.6.1.4.1.522.1.4.2.1.3.3.2.17
  .1.3.6.1.4.1.522.1.4.2.1.3.3.2.18
  .1.3.6.1.4.1.522.1.4.2.1.3.3.2.19
  .1.3.6.1.4.1.522.1.4.2.1.3.3.2.20
  .1.3.6.1.4.1.522.3.4.6.1.23
  .1.3.6.1.4.1.522.3.4.7.1.3
  .1.3.6.1.4.1.522.1.4.2.1.4.3.2.15
  .1.3.6.1.4.1.522.1.4.2.1.4.3.2.16
  .1.3.6.1.4.1.522.1.4.2.1.4.3.2.17
  .1.3.6.1.4.1.522.1.4.2.1.4.3.2.18
  .1.3.6.1.4.1.522.1.4.2.1.4.3.2.19
  .1.3.6.1.4.1.522.1.4.2.1.4.3.2.20
  .1.3.6.1.4.1.522.1.4.2.1.5.3.2.15
  .1.3.6.1.4.1.522.1.4.2.1.5.3.2.16
  .1.3.6.1.4.1.522.1.4.2.1.5.3.2.17
  .1.3.6.1.4.1.522.1.4.2.1.5.3.2.18
  .1.3.6.1.4.1.522.1.4.2.1.5.3.2.19
  .1.3.6.1.4.1.522.1.4.2.1.5.3.2.20
), map {
  my $col = $_;
  map { ".1.3.6.1.4.1.522.4.5.1.$col.108.28.31.3.4.$_" } 0..255
} (2, 4, 1, 3, 15, 14, 11, 6, 12, 5, 10, 7, 9, 13, 8));

print "sorting " . @testdata . " oids\n";
#print map { $_, "\n" } @testdata;

#print map { "$_\n" } $attempts{dumb}([EMAIL PROTECTED]);

my @sorted = $attempts{dumb}([EMAIL PROTECTED]);
algorithm: for my $key (keys %attempts) {
  print "$key: ";
  my @maybe = $attempts{$key}([EMAIL PROTECTED]);
  if (@maybe != @sorted) {
    print "len wrong (" . @maybe . " != " . @sorted . ")\n";
    next algorithm;
  }
  for (0..$#maybe) {
    if ($sorted[$_] ne $maybe[$_]) {
      print "mismatch at $_: $maybe[$_] ne $sorted[$_]\n";
      next algorithm;
    }
  }
  print "ok\n";
}

my @result;
my %time_attempts = (map { my $name = $_; ($name => sub { @result = 
$attempts{$name}->([EMAIL PROTECTED]) }) } 
  keys %attempts);
timethese(10, \%time_attempts);

sorting SNMP OIDs fast in scripting languages

Reply via email to