#!/usr/bin/perl -- # -*- Perl -*-

use strict;
use Getopt::Std;
use Text::DelimMatch;

my $usage = "Usage: $0 file\n";

my $bbdbfile = shift;

open (F, $bbdbfile) || die;

chop($_ = scalar(<F>));
die "Unsupported BBDB file version: $_\n" if !/file-version: 6$/;

my $userfields = "";

while (<F>) {
    last if !/^;/;
    $userfields = $1 if /^\;+ user-fields: \((.*)\)/;
}
close (F);

print "<?xml version='1.0'?><!-- -*- XML -*- -->\n";
print "<bbdb";
print " user-fields=\"$userfields\"" if $userfields;
print ">\n";

my %categories = ();
my @categories = ();
my $catCount = 0;

open (F, $bbdbfile) || die;
while (<F>) {
    chop;

    next if !/^\[/;
    die "Unexpected line: $_\n" if !/^\[(.*)\]$/s;
    $_ = $1;

    $_ =~ s/\\n/\n/sg;

    print "<record>\n";

    my %uf = ();

    $_ = &strip_field($_, 'firstname');
    $_ = &strip_field($_, 'lastname');
    $_ = &strip_field($_, 'aka');
    $_ = &strip_field($_, 'company');
    $_ = &strip_field($_, 'phone');
    $_ = &strip_field($_, 'address');
    $_ = &strip_field($_, 'email');

    # Ok, now the first thing in there is a list of tuples

    my $mc = new Text::DelimMatch "\\(", "\\)";
    $mc->quote('"');
    my($pre, $match, $post) = $mc->match($_);
    $match = substr($match, 1, length($match)-2); # strip ( and )
    ($_ = $post) =~ s/^\s*//sg;

    while ($match) {
	if ($match =~ /^\((\S+) \. \"(.*?)\"\s*\)\s*/s) {
	    $uf{$1} = &escape($2, $1 eq 'last-subj');
	    print "<user-field name=\"$1\">";
	    print &escape($2, $1 eq 'last-subj');
	    print "</user-field>\n";
	    $match = $';
	} elsif ($match =~ /^\((\S+) \. \#\(/) {
	    my $name = $1;
	    my $string = "(" . $';
	    my $mc2 = new Text::DelimMatch "\\(", "\\)";
	    $mc2->quote('"');
	    my($pre, $xmatch, $post) = $mc2->match($string);
	    $uf{$name} = &escape($xmatch, $name eq 'last-subj');
	    print "<user-field name=\"$name\">";
	    print &escape($xmatch, $name eq 'last-subj');
	    print "</user-field>\n";
	    $match = $post;
	    $match = $' if $match =~ /^\)\s*/; # '
	} elsif (/^nil\s*$/) {
	    last;
	} else {
	    die "Failed to match: $_\n";
	}
    }

    my $cat = $uf{'category'};
    if (!defined($uf{'category'})) {
	print "<user-field name=\"category\">Unfiled</user-field>\n";
	$cat = 'Unfiled';
    }

    if (! exists $categories{$cat}) {
	$catCount++;
	$categories[$catCount] = $cat;
	$categories{$cat} = $catCount;
    }

    if ($_ !~ /^nil\s*$/) {
	die "Unexpected end-of-record: $_\n";
    }

    print "</record>\n\n";
}

print "<categories>\n";
for (my $count = 1; $count < $catCount; $count++) {
    print "  <category>", $categories[$count], "</category>\n";
}
print "</categories>\n\n";

print "</bbdb>\n";

sub strip_field {
    local $_ = shift;
    my $tag = shift;

    if (/^\"(.*?)\"\s*/s) {
	my $field = $1;
	$_ = $'; # '

	if ($tag eq 'company' && $field =~ /;/) {
	    my @cos = split(/;\s*/, $field);
	    &feedback($cos[0], $tag);
	    foreach my $co (@cos) {
		print "<$tag>", &escape($co), "</$tag>\n";
	    }
	} else {
	    &feedback($field, $tag);
	    print "<$tag>", &escape($field), "</$tag>\n";
	}

	return $_;
    }

    if (/^nil\s*/) {
	&feedback(undef, $tag);
	return $'; # '
    }

    if (/^\(/) {
	my $mc = new Text::DelimMatch "\\(", "\\)";
	$mc->quote('"');
	my($pre, $match, $post) = $mc->match($_);
	$match = substr($match, 1, length($match)-2); # strip ( and )
	($_ = $post) =~ s/^\s*//sg;

	while ($match ne "") {
	    $match = &strip_field($match, $tag);
	}

	return $_;
    }

    if (/^\[/) {
	my $mc = new Text::DelimMatch "\\[", "\\]";
	my($pre, $match, $post) = $mc->match($_);
	$match = substr($match, 1, length($match)-2); # strip [ and ]
	($_ = $post) =~ s/^\s*//sg;

	if ($tag eq 'phone') {
	    &parse_phone($match);
	} elsif ($tag eq 'address') {
	    &parse_address($match);
	} else {
	    die "Unexpected [] field in $tag.\n";
	}

	return $_;
    }

    die "Unexpected start char: $_\n";
}

sub parse_phone {
    local $_ = shift;

    if (/^\"(.*?)\"\s+\"(.*?)\"\s*$/) {
	print "<phone label=\"", &escape($1), "\">", &escape($2), "</phone>\n";
	return;
    } elsif (/^\"(.*?)\"\s+(\d+)\s+(\d+)\s+(\d+)\s+0\s*$/) {
	print "<phone label=\"", &escape($1), "\">";
	printf "%03d.%03d.%04d", &escape($2), &escape($3), &escape($4);
	print "</phone>\n";
	return;
    } elsif (/^\"(.*?)\"\s+(.*) 0\s*$/) {
	print "<phone label=\"", &escape($1), "\">", &escape($2), "</phone>\n";
    } else {
	die "Unparseable phone: $_\n";
    }
}

sub parse_address {
    local $_ = shift;
    my @lines = ();
    my $city = "";
    my $state = "";
    my $zip = "";
    my $country = "";

    die "Unparseable address: $_\n" if !/^\"(.*?)\"\s+(.*)$/s;
    $_ = $2;

    print "<address name=\"", &escape($1), "\">";

    if (/^\(\"/) {
	$_ = substr($_, 1);
	my $done = 0;
	while (!$done) {
	    if (/^\"/) {
		$_ = substr($_, 1);
		if (/^(.*?[^\\])\"/) {
		    push(@lines, $1);
		    $_ = $'; # '
		    s/^\s*//g;
		} else {
		    die "Unparseable line in address: $_\n";
		}
	    } elsif (/^\)/) {
		$_ = substr($_, 1);
		s/^\s*//g;
		$done = 1;
	    } else {
		die "Unparseable list of lines in address: $_\n";
	    }
	}
    } elsif (/^\(\)\s*/) {
	# no lines
	$_ = $'; # '
    } elsif (/^nil\s*/) {
	# no lines
	$_ = $'; # '
    } else {
	die "Expected list of lines in address: $_\n";
    }

    ($city, $_) = next_field($_, 'city');
    ($state, $_) = next_field($_, 'state');

    if ($_ =~ /^\((\S+) (\S+)\s*\)\s*/) {
	$_ = $'; # '

	# (99999 9999) or ("L1W" "34P") or ("99999" "9999")
	my $z1 = $1;
	my $z2 = $2;

	if ($z1 =~ /\"(.*)\"/) {
	    $z1 = $1;
	} else {
	    $z1 = sprintf("%05d", $z1);
	}

	if ($z2 =~ /\"(.*)\"/) {
	    $z2 = $1;
	} else {
	    $z2 = sprintf("%04d", $z2);
	}

	$zip = $z1 . "-" . $z2;
    } else {
	($zip, $_) = next_field($_, 'zip');
	$zip = sprintf("%05d", $zip) if $zip ne "" && length($zip) < 5;
    }

    ($country, $_) = next_field($_, 'country');

    foreach my $line (@lines) {
	print "<line>", &escape($line), "</line>\n";
    }

    print "<city>", &escape($city), "</city>\n" if $city ne "";
    print "<state>", &escape($state), "</state>\n" if $state ne "";
    print "<zip>", &escape($zip), "</zip>\n" if $zip ne "";
    print "<country>", &escape($country), "</country>\n" if $country ne "";

    print "</address>\n";
}

sub next_field {
    local $_ = shift;
    my $type = shift;

    if (/^\"(.*?)\"\s*/s) {
	return($1, $'); # '
    }

    if (/^(\d+)\s*/) {
	return($1, $'); # '
    }

    die "Unparseable next field ($type): $_\n";
}

sub escape {
    my $str = shift;
    my $leave_escaped_quotes = shift;

    #$str =~ s/\\(\d\d\d)/chr($1)/esg;
    $str =~ s/\\\"/\"/sg if !$leave_escaped_quotes;

    my(@chars) = split(//, $str);
    local $_;

    $_ = "";
    while (@chars) {
	my $char = shift @chars;
	if ($char eq '&') {
	    my $test = join("", @chars);
	    if ($test !~ /^\#[0-9]+;/) {
		$_ .= "&amp;";
	    } else {
		$_ .= "&";
	    }
	} elsif ($char eq '<') {
	    $_ .= "&lt;";
	} elsif (ord($char) == 9 || ord($char) == 10 || ord($char) == 13) {
	    $_ .= $char;
	} elsif (0 && (ord($char) == 195 || ord($char) == 196) && ord($chars[0]) > 127) {
	    # UTF8?
	    my $val1 = oct("0" . ord($1));
	    my $val2 = oct("0" . ord($chars[0]));

	    shift @chars;

	    my $ch1 = chr($val1);
	    my $ch2 = chr($val2);
	    my $repl = &XmlUtf8Decode("$ch1$ch2", 1);
	    $_ .= $repl;
	} elsif (ord($char) < 32 || ord($char) > 127) {
	    $_ .= "&#" . ord($char) . ";";
	} else {
	    $_ .= $char;
	}
    }

    s/\\(\d\d\d)/sprintf("&#%d;", oct($1))/esg;
    #s/\&\#(\d+);/sprintf("\\u%04x", $1)/esg;
    #s/\"/\\u0022/sg;
    #s/\\\'/\\u0027/sg;

    return $_;
}

sub XmlUtf8Decode
{
    my ($str, $hex) = @_;
    my $len = length ($str);
    my $n;

    if ($len == 2)
    {
	my @n = unpack "C2", $str;
	$n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f);
    }
    elsif ($len == 3)
    {
	my @n = unpack "C3", $str;
	$n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + 
		($n[2] & 0x3f);
    }
    elsif ($len == 4)
    {
	my @n = unpack "C4", $str;
	$n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) + 
		(($n[2] & 0x3f) << 6) + ($n[3] & 0x3f);
    }
    elsif ($len == 1)	# just to be complete...
    {
	$n = ord ($str);
    }
    else
    {
	die "bad value [$str] for XmlUtf8Decode";
    }
    $hex ? sprintf ("&#x%x;", $n) : "&#$n;";
}

sub XmlUtf8Encode
{
    my $n = shift;
    if ($n < 0x80)
    {
	return chr ($n);
    }
    elsif ($n < 0x800)
    {
	return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80));
    }
    elsif ($n < 0x10000)
    {
	return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80),
		     (($n & 0x3f) | 0x80));
    }
    elsif ($n < 0x110000)
    {
	return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80),
		     ((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80));
    }
    die "number is too large for Unicode [$n] in &XmlUtf8Encode";
}

sub feedback {
    my $field = shift;
    my $tag = shift;

    print STDERR "$field " if ($field
			       && ($tag eq 'firstname'
				   || $tag eq 'lastname'
				   || $tag eq 'company'));

    print STDERR "\n" if $tag eq 'company';
}
