#!/usr/bin/perl -- # -*- Perl -*- use strict; use Getopt::Std; use Text::DelimMatch; my $usage = "Usage: $0 file\n"; my $bbdbfile = shift; open (F, $bbdbfile) || die; chop($_ = scalar()); die "Unsupported BBDB file version: $_\n" if !/file-version: 6$/; my $userfields = ""; while () { last if !/^;/; $userfields = $1 if /^\;+ user-fields: $(.*)$/; } close (F); print "\n"; print "\n"; my %categories = (); my @categories = (); my $catCount = 0; open (F, $bbdbfile) || die; while () { chop; next if !/^\[/; die "Unexpected line: $_\n" if !/^\[(.*)\]$/s; $_ = $1; $_ =~ s/\\n/\n/sg; print "\n"; my %uf = (); $_ = &strip_field($_, 'firstname'); $_ = &strip_field($_, 'lastname'); $_ = &strip_field($_, 'aka'); $_ = &strip_field($_, 'company'); $_ = &strip_field($_, 'phone'); $_ = &strip_field($_, 'address'); $_ = &strip_field($_, 'email'); # Ok, now the first thing in there is a list of tuples my $mc = new Text::DelimMatch "\$", "\$"; $mc->quote('"'); my($pre, $match, $post) = $mc->match($_); $match = substr($match, 1, length($match)-2); # strip ( and ) ($_ = $post) =~ s/^\s*//sg; while ($match) { if ($match =~ /^$(\S+) \. \"(.*?)\"\s*$\s*/s) { $uf{$1} = &escape($2, $1 eq 'last-subj'); print ""; print &escape($2, $1 eq 'last-subj'); print "\n"; $match = $'; } elsif ($match =~ /^$(\S+) \. \#\(/) { my $name = $1; my $string = "(" . $'; my $mc2 = new Text::DelimMatch "\\(", "\$"; $mc2->quote('"'); my($pre, $xmatch, $post) = $mc2->match($string); $uf{$name} = &escape($xmatch, $name eq 'last-subj'); print ""; print &escape($xmatch, $name eq 'last-subj'); print "\n"; $match = $post; $match = $' if $match =~ /^\)\s*/; # ' } elsif (/^nil\s*$/) { last; } else { die "Failed to match: $_\n"; } } my $cat = $uf{'category'}; if (!defined($uf{'category'})) { print "Unfiled\n"; $cat = 'Unfiled'; } if (! exists $categories{$cat}) { $catCount++; $categories[$catCount] = $cat; $categories{$cat} = $catCount; } if ($_ !~ /^nil\s*$/) { die "Unexpected end-of-record: $_\n"; } print "\n\n"; } print "\n"; for (my $count = 1; $count < $catCount; $count++) { print " ", $categories[$count], "\n"; } print "\n\n"; print "\n"; sub strip_field { local $_ = shift; my $tag = shift; if (/^\"(.*?)\"\s*/s) { my $field = $1; $_ = $'; # ' if ($tag eq 'company' && $field =~ /;/) { my @cos = split(/;\s*/, $field); &feedback($cos[0], $tag); foreach my $co (@cos) { print "<$tag>", &escape($co), "\n"; } } else { &feedback($field, $tag); print "<$tag>", &escape($field), "\n"; } return $_; } if (/^nil\s*/) { &feedback(undef, $tag); return $'; # ' } if (/^$/) { my $mc = new Text::DelimMatch "\\(", "\$"; $mc->quote('"'); my($pre, $match, $post) = $mc->match($_); $match = substr($match, 1, length($match)-2); # strip ( and ) ($_ = $post) =~ s/^\s*//sg; while ($match ne "") { $match = &strip_field($match, $tag); } return $_; } if (/^\[/) { my $mc = new Text::DelimMatch "\\[", "\\]"; my($pre, $match, $post) = $mc->match($_); $match = substr($match, 1, length($match)-2); # strip [ and ] ($_ = $post) =~ s/^\s*//sg; if ($tag eq 'phone') { &parse_phone($match); } elsif ($tag eq 'address') { &parse_address($match); } else { die "Unexpected [] field in $tag.\n"; } return $_; } die "Unexpected start char: $_\n"; } sub parse_phone { local $_ = shift; if (/^\"(.*?)\"\s+\"(.*?)\"\s*$/) { print "", &escape($2), "\n"; return; } elsif (/^\"(.*?)\"\s+(\d+)\s+(\d+)\s+(\d+)\s+0\s*$/) { print ""; printf "%03d.%03d.%04d", &escape($2), &escape($3), &escape($4); print "\n"; return; } elsif (/^\"(.*?)\"\s+(.*) 0\s*$/) { print "", &escape($2), "\n"; } else { die "Unparseable phone: $_\n"; } } sub parse_address { local $_ = shift; my @lines = (); my $city = ""; my $state = ""; my $zip = ""; my $country = ""; die "Unparseable address: $_\n" if !/^\"(.*?)\"\s+(.*)$/s; $_ = $2; print "

"; if (/^$\"/) { $_ = substr($_, 1); my $done = 0; while (!$done) { if (/^\"/) { $_ = substr($_, 1); if (/^(.*?[^\\])\"/) { push(@lines, $1); $_ = $'; # ' s/^\s*//g; } else { die "Unparseable line in address: $_\n"; } } elsif (/^$/) { $_ = substr($_, 1); s/^\s*//g; $done = 1; } else { die "Unparseable list of lines in address: $_\n"; } } } elsif (/^\s*/) { # no lines $_ = $'; # ' } elsif (/^nil\s*/) { # no lines $_ = $'; # ' } else { die "Expected list of lines in address: $_\n"; } ($city, $_) = next_field($_, 'city'); ($state, $_) = next_field($_, 'state'); if ($_ =~ /^$(\S+) (\S+)\s*$\s*/) { $_ = $'; # ' # (99999 9999) or ("L1W" "34P") or ("99999" "9999") my $z1 = $1; my $z2 = $2; if ($z1 =~ /\"(.*)\"/) { $z1 = $1; } else { $z1 = sprintf("%05d", $z1); } if ($z2 =~ /\"(.*)\"/) { $z2 = $1; } else { $z2 = sprintf("%04d", $z2); } $zip = $z1 . "-" . $z2; } else { ($zip, $_) = next_field($_, 'zip'); $zip = sprintf("%05d", $zip) if $zip ne "" && length($zip) < 5; } ($country, $_) = next_field($_, 'country'); foreach my $line (@lines) { print "", &escape($line), "\n"; } print "", &escape($city), "\n" if $city ne ""; print "", &escape($state), "\n" if $state ne ""; print "", &escape($zip), "\n" if $zip ne ""; print "", &escape($country), "\n" if $country ne ""; print "

\n"; } sub next_field { local $_ = shift; my $type = shift; if (/^\"(.*?)\"\s*/s) { return($1, $'); # ' } if (/^(\d+)\s*/) { return($1, $'); # ' } die "Unparseable next field ($type): $_\n"; } sub escape { my $str = shift; my $leave_escaped_quotes = shift; #$str =~ s/\\(\d\d\d)/chr($1)/esg; $str =~ s/\\\"/\"/sg if !$leave_escaped_quotes; my(@chars) = split(//, $str); local $_; $_ = ""; while (@chars) { my $char = shift @chars; if ($char eq '&') { my $test = join("", @chars); if ($test !~ /^\#[0-9]+;/) { $_ .= "&"; } else { $_ .= "&"; } } elsif ($char eq '<') { $_ .= "<"; } elsif (ord($char) == 9 || ord($char) == 10 || ord($char) == 13) { $_ .= $char; } elsif (0 && (ord($char) == 195 || ord($char) == 196) && ord($chars[0]) > 127) { # UTF8? my $val1 = oct("0" . ord($1)); my $val2 = oct("0" . ord($chars[0])); shift @chars; my $ch1 = chr($val1); my $ch2 = chr($val2); my $repl = &XmlUtf8Decode("$ch1$ch2", 1); $_ .= $repl; } elsif (ord($char) < 32 || ord($char) > 127) { $_ .= "&#" . ord($char) . ";"; } else { $_ .= $char; } } s/\\(\d\d\d)/sprintf("&#%d;", oct($1))/esg; #s/\&\#(\d+);/sprintf("\\u%04x", $1)/esg; #s/\"/\\u0022/sg; #s/\\\'/\\u0027/sg; return $_; } sub XmlUtf8Decode { my ($str, $hex) = @_; my $len = length ($str); my $n; if ($len == 2) { my @n = unpack "C2", $str; $n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); } elsif ($len == 3) { my @n = unpack "C3", $str; $n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + ($n[2] & 0x3f); } elsif ($len == 4) { my @n = unpack "C4", $str; $n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) + (($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); } elsif ($len == 1) # just to be complete... { $n = ord ($str); } else { die "bad value [$str] for XmlUtf8Decode"; } $hex ? sprintf ("&#x%x;", $n) : "&#$n;"; } sub XmlUtf8Encode { my $n = shift; if ($n < 0x80) { return chr ($n); } elsif ($n < 0x800) { return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80)); } elsif ($n < 0x10000) { return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80)); } elsif ($n < 0x110000) { return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80), ((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80)); } die "number is too large for Unicode [$n] in &XmlUtf8Encode"; } sub feedback { my $field = shift; my $tag = shift; print STDERR "$field " if ($field && ($tag eq 'firstname' || $tag eq 'lastname' || $tag eq 'company')); print STDERR "\n" if $tag eq 'company'; }