Like other things posted here without notices to the contrary, this code is in the public domain.
Here's qd.py: def chomp(text): while text[-1:] in ['\r', '\n']: text = text[:-1] return text assert chomp('a\n') == 'a' assert chomp('\n') == '' assert chomp('') == '' def untabify(text): def dumb_tab_exp(char): if char != '\t': return char return ' ' return ''.join(map(dumb_tab_exp, text)) And here's some code that sorts a bunch of OIDs lexically, using qd.py: #!/usr/bin/python import qd, sys, time oids = map(qd.chomp, file(sys.argv[1])) #for oid in oids: print oid def sortem(oids): split_oids = [[int(num) for num in oid.split('.') if num != ''] for oid in oids] #split_oids.sort() return ['.' + '.'.join([str(edge) for edge in oid]) for oid in split_oids] nn = 30 start = time.time() for ii in range(nn): sortem(oids) # This says 0.754 per second, or 1326 ms per. # If we eliminate the sort, it's 0.884, or 1131 ms per. # The conversion costs totally overwhelm the sorting time. print nn / (time.time() - start) #for oid in sortem(oids): print oid Here's code that compares the speed of several different ways to sort a bunch of OIDs lexically: #!/usr/bin/perl -w use strict; # I can't believe this isn't included in the standard Perl distribution!! use Time::HiRes; use Benchmark qw(timethese); use Data::Dumper; use lib '../pkgs/Net-SNMP-4.1.2/lib'; # probably this is only useful to me use Net::SNMP (); # sorts 3877 SNMP OIDs in a bunch of different ways. # N lg N for 3877 is 46254 or so. The best approach I've come with so # far is 0.33 seconds for all of that, which is 7 milliseconds per # comparison or so. What crappy-ass performance! { no warnings 'uninitialized'; sub _oid_cmp { my @achunks = split /\./, $a; my @bchunks = split /\./, $b; my $len = $#achunks < $#bchunks ? $#achunks : $#bchunks; my $result; for (0..$len) { $result = $achunks[$_] <=> $bchunks[$_]; return $result if $result; } return @achunks <=> @bchunks; } sub dumbcache { my ($list) = @_; my %cache; return sort { ($cache{$a} ||= pack "w*", split /\./, $a) cmp ($cache{$b} ||= pack "w*", split /\./, $b) } @$list; } { my %cheat; sub cheatdumbcache { my ($list) = @_; return sort { ($cheat{$a} ||= pack "w*", split /\./, $a) cmp ($cheat{$b} ||= pack "w*", split /\./, $b) } @$list; } } sub grt { my ($list) = @_; my @maplist = map { pack "N*", split /\./ } @$list; return map { '.' . join '.', unpack "xxxxN*", $_ } sort @maplist; } sub hashgrt { my ($list) = @_; my %hash = map { ((pack "N*", split /\./) => $_) } @$list; #print Dumper \%hash; return map { $hash{$_} } sort keys %hash; } sub hashgrt2 { my ($list) = @_; my %hash; for (@$list) { $hash{pack "N*", split /\./} = $_; } return map { $hash{$_} } sort keys %hash; } sub berhashgrt { my ($list) = @_; my %hash = map { ((pack "w*", split /\./) => $_) } @$list; #print Dumper \%hash; return map { $hash{$_} } sort keys %hash; } sub berhashgrt2 { my ($list) = @_; my %hash; for my $oid (@$list) { $hash{pack 'w*', split /\./, $oid} = $oid; } return map { $hash{$_} } sort keys %hash; } sub berhashgrt2_nosort { my ($list) = @_; my %hash; for my $oid (@$list) { $hash{pack 'w*', split /\./, $oid} = $oid; } return map { $hash{$_} } keys %hash; } } my %attempts = ( # 0.83 per second: orcish2 => sub { my ($list) = @_; my %orcish; return sort { $orcish{$a}{$b} ||= _oid_cmp } @$list; }, # 0.85 per second: orcish => sub { my ($list) = @_; my %orcish; return sort { $orcish{"$a $b"} ||= _oid_cmp } @$list; }, # 0.94 per second: dumb => sub { my ($list) = @_; return sort _oid_cmp @$list; my @rv = sort _oid_cmp @$list; }, # 1.4 per second: re_grt => sub { my ($list) = @_; my @maplist = map { pack "w*", /(\d+)/g } @$list; return map { '.' . join '.', unpack 'w*', $_ } sort @maplist; }, # 1.8 per second: grt => \&grt, # 2.4 per second: hashgrt => \&hashgrt, # 2.4 per second: berhashgrt => \&berhashgrt, # 2.7 per second: net_snmp => sub { my ($list) = @_; return Net::SNMP::oid_lex_sort(@$list); }, # 2.8 per second: hashgrt2 => \&hashgrt2, # 2.9 per second: # that means 345 ms each, 28 ms more than not even sorting. berhashgrt2 => \&berhashgrt2, # try to make 'dumb' a little smarter # 3.0 per second: dumbcache => \&dumbcache, # 3.15 per second, but doesn't work: # that means 317 ms each. berhashgrt2_nosort => \&berhashgrt2_nosort, # 8.0/s, but doesn't actually work unixsort => sub { my ($list) = @_; open FOO, ">foo.tmp"; print FOO map { $_, "\n" } @$list; my @results = `sort -n foo.tmp`; chomp @results; return @results; }, # 16 per second # like dumbcache, but doesn't regenerate cache each time cheatdumbcache => \&cheatdumbcache, ); my @testdata = (qw( .1.3.6.1.4.1.522.3.4.5.1.3 .1 .12 .1.3.6.1.4.1.522.3.4.6.1.3 .1.3.6.1.4.1.522.3.4.7.1.23 .1.3.6.1.4.1.522.3.4.4.1.3 .1.3.6.1.4.1.522.3.4.4.1.23 .1.3.6.1.4.1.522.3.4.3.1.3 .1.3.6.1.4.1.522.3.4.3.1.23 .1.3.6.1.4.1.522.3.4.2.1.3 .1.3.6.1.4.1.522.3.4.2.1.23 .1.3.6.1.4.1.522.1.4.2.1.2.3.2.15 .1.3.6.1.4.1.522.1.4.2.1.2.3.2.16 .1.3.6.1.4.1.522.1.4.2.1.2.3.2.17 .1.3.6.1.4.1.522.1.4.2.1.2.3.2.18 .1.3.6.1.4.1.522.1.4.2.1.2.3.2.19 .1.3.6.1.4.1.522.1.4.2.1.2.3.2.20 .1.3.6.1.4.1.522.1.4.2.1.3.3.2.15 .1.3.6.1.4.1.522.1.4.2.1.3.3.2.16 .1.3.6.1.4.1.522.1.4.2.1.3.3.2.17 .1.3.6.1.4.1.522.1.4.2.1.3.3.2.18 .1.3.6.1.4.1.522.1.4.2.1.3.3.2.19 .1.3.6.1.4.1.522.1.4.2.1.3.3.2.20 .1.3.6.1.4.1.522.3.4.6.1.23 .1.3.6.1.4.1.522.3.4.7.1.3 .1.3.6.1.4.1.522.1.4.2.1.4.3.2.15 .1.3.6.1.4.1.522.1.4.2.1.4.3.2.16 .1.3.6.1.4.1.522.1.4.2.1.4.3.2.17 .1.3.6.1.4.1.522.1.4.2.1.4.3.2.18 .1.3.6.1.4.1.522.1.4.2.1.4.3.2.19 .1.3.6.1.4.1.522.1.4.2.1.4.3.2.20 .1.3.6.1.4.1.522.1.4.2.1.5.3.2.15 .1.3.6.1.4.1.522.1.4.2.1.5.3.2.16 .1.3.6.1.4.1.522.1.4.2.1.5.3.2.17 .1.3.6.1.4.1.522.1.4.2.1.5.3.2.18 .1.3.6.1.4.1.522.1.4.2.1.5.3.2.19 .1.3.6.1.4.1.522.1.4.2.1.5.3.2.20 ), map { my $col = $_; map { ".1.3.6.1.4.1.522.4.5.1.$col.108.28.31.3.4.$_" } 0..255 } (2, 4, 1, 3, 15, 14, 11, 6, 12, 5, 10, 7, 9, 13, 8)); print "sorting " . @testdata . " oids\n"; #print map { $_, "\n" } @testdata; #print map { "$_\n" } $attempts{dumb}([EMAIL PROTECTED]); my @sorted = $attempts{dumb}([EMAIL PROTECTED]); algorithm: for my $key (keys %attempts) { print "$key: "; my @maybe = $attempts{$key}([EMAIL PROTECTED]); if (@maybe != @sorted) { print "len wrong (" . @maybe . " != " . @sorted . ")\n"; next algorithm; } for (0..$#maybe) { if ($sorted[$_] ne $maybe[$_]) { print "mismatch at $_: $maybe[$_] ne $sorted[$_]\n"; next algorithm; } } print "ok\n"; } my @result; my %time_attempts = (map { my $name = $_; ($name => sub { @result = $attempts{$name}->([EMAIL PROTECTED]) }) } keys %attempts); timethese(10, \%time_attempts);