Like other things posted here without notices to the contrary, this
code is in the public domain.
Here's qd.py:
def chomp(text):
while text[-1:] in ['\r', '\n']: text = text[:-1]
return text
assert chomp('a\n') == 'a'
assert chomp('\n') == ''
assert chomp('') == ''
def untabify(text):
def dumb_tab_exp(char):
if char != '\t': return char
return ' '
return ''.join(map(dumb_tab_exp, text))
And here's some code that sorts a bunch of OIDs lexically, using
qd.py:
#!/usr/bin/python
import qd, sys, time
oids = map(qd.chomp, file(sys.argv[1]))
#for oid in oids: print oid
def sortem(oids):
split_oids = [[int(num) for num in oid.split('.') if num != ''] for oid in oids]
#split_oids.sort()
return ['.' + '.'.join([str(edge) for edge in oid]) for oid in split_oids]
nn = 30
start = time.time()
for ii in range(nn): sortem(oids)
# This says 0.754 per second, or 1326 ms per.
# If we eliminate the sort, it's 0.884, or 1131 ms per.
# The conversion costs totally overwhelm the sorting time.
print nn / (time.time() - start)
#for oid in sortem(oids): print oid
Here's code that compares the speed of several different ways to sort
a bunch of OIDs lexically:
#!/usr/bin/perl -w
use strict;
# I can't believe this isn't included in the standard Perl distribution!!
use Time::HiRes;
use Benchmark qw(timethese);
use Data::Dumper;
use lib '../pkgs/Net-SNMP-4.1.2/lib'; # probably this is only useful to me
use Net::SNMP ();
# sorts 3877 SNMP OIDs in a bunch of different ways.
# N lg N for 3877 is 46254 or so. The best approach I've come with so
# far is 0.33 seconds for all of that, which is 7 milliseconds per
# comparison or so. What crappy-ass performance!
{
no warnings 'uninitialized';
sub _oid_cmp {
my @achunks = split /\./, $a;
my @bchunks = split /\./, $b;
my $len = $#achunks < $#bchunks ? $#achunks : $#bchunks;
my $result;
for (0..$len) {
$result = $achunks[$_] <=> $bchunks[$_];
return $result if $result;
}
return @achunks <=> @bchunks;
}
sub dumbcache {
my ($list) = @_;
my %cache;
return sort {
($cache{$a} ||= pack "w*", split /\./, $a)
cmp
($cache{$b} ||= pack "w*", split /\./, $b)
} @$list;
}
{
my %cheat;
sub cheatdumbcache {
my ($list) = @_;
return sort {
($cheat{$a} ||= pack "w*", split /\./, $a)
cmp
($cheat{$b} ||= pack "w*", split /\./, $b)
} @$list;
}
}
sub grt {
my ($list) = @_;
my @maplist = map { pack "N*", split /\./ } @$list;
return map { '.' . join '.', unpack "xxxxN*", $_ } sort @maplist;
}
sub hashgrt {
my ($list) = @_;
my %hash = map { ((pack "N*", split /\./) => $_) } @$list;
#print Dumper \%hash;
return map { $hash{$_} } sort keys %hash;
}
sub hashgrt2 {
my ($list) = @_;
my %hash;
for (@$list) {
$hash{pack "N*", split /\./} = $_;
}
return map { $hash{$_} } sort keys %hash;
}
sub berhashgrt {
my ($list) = @_;
my %hash = map { ((pack "w*", split /\./) => $_) } @$list;
#print Dumper \%hash;
return map { $hash{$_} } sort keys %hash;
}
sub berhashgrt2 {
my ($list) = @_;
my %hash;
for my $oid (@$list) {
$hash{pack 'w*', split /\./, $oid} = $oid;
}
return map { $hash{$_} } sort keys %hash;
}
sub berhashgrt2_nosort {
my ($list) = @_;
my %hash;
for my $oid (@$list) {
$hash{pack 'w*', split /\./, $oid} = $oid;
}
return map { $hash{$_} } keys %hash;
}
}
my %attempts = (
# 0.83 per second:
orcish2 => sub {
my ($list) = @_;
my %orcish;
return sort { $orcish{$a}{$b} ||= _oid_cmp } @$list;
},
# 0.85 per second:
orcish => sub {
my ($list) = @_;
my %orcish;
return sort { $orcish{"$a $b"} ||= _oid_cmp } @$list;
},
# 0.94 per second:
dumb => sub {
my ($list) = @_;
return sort _oid_cmp @$list;
my @rv = sort _oid_cmp @$list;
},
# 1.4 per second:
re_grt => sub {
my ($list) = @_;
my @maplist = map { pack "w*", /(\d+)/g } @$list;
return map { '.' . join '.', unpack 'w*', $_ } sort @maplist;
},
# 1.8 per second:
grt => \&grt,
# 2.4 per second:
hashgrt => \&hashgrt,
# 2.4 per second:
berhashgrt => \&berhashgrt,
# 2.7 per second:
net_snmp => sub {
my ($list) = @_;
return Net::SNMP::oid_lex_sort(@$list);
},
# 2.8 per second:
hashgrt2 => \&hashgrt2,
# 2.9 per second:
# that means 345 ms each, 28 ms more than not even sorting.
berhashgrt2 => \&berhashgrt2,
# try to make 'dumb' a little smarter
# 3.0 per second:
dumbcache => \&dumbcache,
# 3.15 per second, but doesn't work:
# that means 317 ms each.
berhashgrt2_nosort => \&berhashgrt2_nosort,
# 8.0/s, but doesn't actually work
unixsort => sub {
my ($list) = @_;
open FOO, ">foo.tmp";
print FOO map { $_, "\n" } @$list;
my @results = `sort -n foo.tmp`;
chomp @results;
return @results;
},
# 16 per second
# like dumbcache, but doesn't regenerate cache each time
cheatdumbcache => \&cheatdumbcache,
);
my @testdata = (qw(
.1.3.6.1.4.1.522.3.4.5.1.3
.1
.12
.1.3.6.1.4.1.522.3.4.6.1.3
.1.3.6.1.4.1.522.3.4.7.1.23
.1.3.6.1.4.1.522.3.4.4.1.3
.1.3.6.1.4.1.522.3.4.4.1.23
.1.3.6.1.4.1.522.3.4.3.1.3
.1.3.6.1.4.1.522.3.4.3.1.23
.1.3.6.1.4.1.522.3.4.2.1.3
.1.3.6.1.4.1.522.3.4.2.1.23
.1.3.6.1.4.1.522.1.4.2.1.2.3.2.15
.1.3.6.1.4.1.522.1.4.2.1.2.3.2.16
.1.3.6.1.4.1.522.1.4.2.1.2.3.2.17
.1.3.6.1.4.1.522.1.4.2.1.2.3.2.18
.1.3.6.1.4.1.522.1.4.2.1.2.3.2.19
.1.3.6.1.4.1.522.1.4.2.1.2.3.2.20
.1.3.6.1.4.1.522.1.4.2.1.3.3.2.15
.1.3.6.1.4.1.522.1.4.2.1.3.3.2.16
.1.3.6.1.4.1.522.1.4.2.1.3.3.2.17
.1.3.6.1.4.1.522.1.4.2.1.3.3.2.18
.1.3.6.1.4.1.522.1.4.2.1.3.3.2.19
.1.3.6.1.4.1.522.1.4.2.1.3.3.2.20
.1.3.6.1.4.1.522.3.4.6.1.23
.1.3.6.1.4.1.522.3.4.7.1.3
.1.3.6.1.4.1.522.1.4.2.1.4.3.2.15
.1.3.6.1.4.1.522.1.4.2.1.4.3.2.16
.1.3.6.1.4.1.522.1.4.2.1.4.3.2.17
.1.3.6.1.4.1.522.1.4.2.1.4.3.2.18
.1.3.6.1.4.1.522.1.4.2.1.4.3.2.19
.1.3.6.1.4.1.522.1.4.2.1.4.3.2.20
.1.3.6.1.4.1.522.1.4.2.1.5.3.2.15
.1.3.6.1.4.1.522.1.4.2.1.5.3.2.16
.1.3.6.1.4.1.522.1.4.2.1.5.3.2.17
.1.3.6.1.4.1.522.1.4.2.1.5.3.2.18
.1.3.6.1.4.1.522.1.4.2.1.5.3.2.19
.1.3.6.1.4.1.522.1.4.2.1.5.3.2.20
), map {
my $col = $_;
map { ".1.3.6.1.4.1.522.4.5.1.$col.108.28.31.3.4.$_" } 0..255
} (2, 4, 1, 3, 15, 14, 11, 6, 12, 5, 10, 7, 9, 13, 8));
print "sorting " . @testdata . " oids\n";
#print map { $_, "\n" } @testdata;
#print map { "$_\n" } $attempts{dumb}([EMAIL PROTECTED]);
my @sorted = $attempts{dumb}([EMAIL PROTECTED]);
algorithm: for my $key (keys %attempts) {
print "$key: ";
my @maybe = $attempts{$key}([EMAIL PROTECTED]);
if (@maybe != @sorted) {
print "len wrong (" . @maybe . " != " . @sorted . ")\n";
next algorithm;
}
for (0..$#maybe) {
if ($sorted[$_] ne $maybe[$_]) {
print "mismatch at $_: $maybe[$_] ne $sorted[$_]\n";
next algorithm;
}
}
print "ok\n";
}
my @result;
my %time_attempts = (map { my $name = $_; ($name => sub { @result =
$attempts{$name}->([EMAIL PROTECTED]) }) }
keys %attempts);
timethese(10, \%time_attempts);