#!/usr/bin/perl -- # -*- Perl -*-

use Getopt::Std;
use XML::DOM;
use strict;
use locale;
use POSIX qw (locale_h);

setlocale(LC_CTYPE, "en_US.iso-8859-1");

my $xmlfile = shift;

my $parser  = new XML::DOM::Parser (NoExpand => 0);
my $xmldb   = $parser->parsefile("$xmlfile");
my $root    = $xmldb->getDocumentElement();
my $reclist = $root->getElementsByTagName('record');

print ";;; file-version: 6\n";

if ($root->getAttribute('user-fields')) {
    my $fields = $root->getAttribute('user-fields');
    print ";;; user-fields: ($fields)\n";
}

for (my $count = 0; $count < $reclist->getLength(); $count++) {
    my $rec = $reclist->item($count);

    print "[";

    &print_field($rec, 'firstname');
    &print_field($rec, 'lastname');
    &print_arr_field($rec, 'aka');
    &print_field($rec, 'company');
    &print_arr_field($rec, 'phone');
    &print_arr_field($rec, 'address');
    &print_arr_field($rec, 'email');

    my $child = $rec->getFirstChild();
    my $found = 0;
    while ($child) {
	my $node = $child;
	$child = $child->getNextSibling();

	next if $node->getNodeType != ELEMENT_NODE;
	next if $node->getTagName() eq 'firstname';
	next if $node->getTagName() eq 'lastname';
	next if $node->getTagName() eq 'aka';
	next if $node->getTagName() eq 'company';
	next if $node->getTagName() eq 'phone';
	next if $node->getTagName() eq 'address';
	next if $node->getTagName() eq 'email';

	print "(" if !$found;
	$found = 1;

	if ($node->getTagName() eq 'user-field'
	    && $node->getAttribute('name') eq 'last-subj') {
	    &print_odd_field($rec, 'user-field', 'last-subj');
	} else {
	    &print_dot_field($node);
	}
    }

    print ")" if $found;

    print " nil]\n";
}

sub print_field {
    my $rec = shift;
    my $name = shift;
    my $flist = $rec->getElementsByTagName($name);

    if ($flist->getLength() == 0) {
	print "nil ";
	print STDERR "\n" if $name eq 'company';
    } elsif ($flist->getLength() == 1) {
	&print_value($flist->item(0));

	my $text = &pcdata($flist->item(0));
	print STDERR "$text " if $text && ($name eq 'firstname'
					   || $name eq 'lastname'
					   || $name eq 'company');
	print STDERR "\n" if $name eq 'company';
    } elsif ($name eq 'company') {
	# this is a special case...
	my $company = "";
	for (my $count = 0; $count < $flist->getLength(); $count++) {
	    my $co = $flist->item($count);
	    $company .= "; " if $company ne '';
	    $company .= &pcdata($co);
	    print STDERR $company if $count == 0;
	}
	print "\"$company\" ";
	print STDERR "\n";
    } else {
	die "Unexpected array of $name.\n";
    }
}

sub print_arr_field {
    my $rec = shift;
    my $name = shift;
    my $flist = $rec->getElementsByTagName($name);

    if ($flist->getLength() == 0) {
	print "nil ";
    } else {
	print "(";
	for (my $count = 0; $count < $flist->getLength(); $count++) {
	    print " " if $count > 0;
	    &print_value($flist->item($count));
	}
	print ") ";
    }
}

sub print_odd_field {
    my $rec = shift;
    my $name = shift;
    my $aname = shift;
    my $flist = $rec->getElementsByTagName($name);
    my @values = ();

    for (my $count = 0; $count < $flist->getLength(); $count++) {
	push(@values, $flist->item($count))
	    if $flist->item($count)->getAttribute('name') eq $aname;
    }

    if (!@values) {
	print "nil ";
    } else {
	print "($aname . #";
	for (my $count = 0; $count <= $#values; $count++) {
	    print " " if $count > 0;
	    my $text = &odd_pcdata($values[$count]);
	    print $text;
	}
	print ") ";
    }
}

sub print_dot_field {
    my $field = shift;
    my $name = $field->getTagName();
    $name = $field->getAttribute('name') if ($name eq 'user-field'); 

    print "($name . ";
    &print_value($field);
    print ") ";
}

sub print_value {
    my $field = shift;

    if ($field->getTagName() eq 'phone') {
	my $phone = &pcdata($field);
	print "[";
	print "\"", $field->getAttribute('label'), "\" ";
	print "\"$phone\" ";
	print "]";
    } elsif ($field->getTagName() eq 'address') {
	print "[";
	print "\"", $field->getAttribute('name'), "\" ";
	my $lines = $field->getElementsByTagName('line');
	print "(";
	for (my $count = 0; $count < $lines->getLength(); $count++) {
	    print "\"", &pcdata($lines->item($count)), "\" ";
	}
	print ") ";
	my $city = $field->getElementsByTagName('city');
	my $state = $field->getElementsByTagName('state');
	my $zip = $field->getElementsByTagName('zip');
	my $country = $field->getElementsByTagName('country');

	if ($city->getLength() > 0) {
	    print "\"", &pcdata($city->item(0)), "\" ";
	} else {
	    print "\"\" ";
	}

	if ($state->getLength() > 0) {
	    print "\"", &pcdata($state->item(0)), "\" ";
	} else {
	    print "\"\" ";
	}

	if ($zip->getLength() > 0) {
	    print "\"", &pcdata($zip->item(0)), "\" ";
	} else {
	    print "\"\" ";
	}

	if ($country->getLength() > 0) {
	    print "\"", &pcdata($country->item(0)), "\" ";
	} else {
	    print "\"\" ";
	}

	print "] ";
    } else {
	my $pcdata = &pcdata($field);

	if (0 && $pcdata =~ /^\d+$/) {
	    print $pcdata, " ";
	} else {
	    print "\"$pcdata\" ";
	}
    }
}

sub pcdata {
    my $field = shift;
    my $child = $field->getFirstChild();
    my $pcdata = "";

    my %DecodeDefaultEntity = (
			       '"' => "&quot;",
			       ">" => "&gt;",
			       "<" => "&lt;",
			       "'" => "&apos;",
			       "&" => "&amp;"
			       );

    my $default = "";

    while ($child) {
	if ($child->getNodeType() == ELEMENT_NODE) {
	    $pcdata .= &pcdata($child);
	} elsif ($child->getNodeType() == TEXT_NODE) {
	    $pcdata .= $child->getData();
	}

	$child = $child->getNextSibling();
    }

{
    $pcdata =~ s/([\xC0-\xDF].|[\xE0-\xEF]..|[\xF0-\xFF]...)|([$default])|(]]>)/
               defined($1) ? XmlUtf8Decode ($1) :
               defined($2) ? $DecodeDefaultEntity{$2} : "]]&gt;" /egs;
}

# This hideousness seems necessary to get Perl to use the right encoding
# Somehow XML strings are "tainted" UTF8 and impossible to get back as
# ISO 8859-1 strings without this nonsense.
my $foo = "";
for (my $count = 0; $count < length($pcdata); $count++) {
    $_ = substr($pcdata, $count);
    if (/^\&\#(\d+)\;/) {
        $foo .= chr($1);
        $count += length($&)-1;
    } else {
	my $och = ord(substr($pcdata, $count, 1));
	$foo .= chr($och);
    }
}

    $pcdata = $foo;

    $pcdata =~ s/^\n+//sg;
    $pcdata =~ s/\n+$//sg;

    $pcdata =~ s/&lt;/</sg;
    $pcdata =~ s/&amp;/&/sg;
    $pcdata =~ s/\n/\\n/sg;
    $pcdata =~ s/\"/\\\"/sg;

    return $pcdata;
}

sub odd_pcdata {
    # like pcdata but don't escape "

    my $field = shift;
    my $child = $field->getFirstChild();
    my $pcdata = "";

    while ($child) {
	if ($child->getNodeType() == ELEMENT_NODE) {
	    $pcdata .= &pcdata($child);
	} elsif ($child->getNodeType() == TEXT_NODE) {
	    $pcdata .= $child->getData();
	}

	$child = $child->getNextSibling();
    }

    $pcdata =~ s/^\n+//sg;
    $pcdata =~ s/\n+$//sg;

    $pcdata =~ s/&lt;/</sg;
    $pcdata =~ s/&amp;/&/sg;
    $pcdata =~ s/\n/\\n/sg;

    return $pcdata;
}

sub XmlUtf8Decode
{
    my ($str, $hex) = @_;
    my $len = length ($str);
    my $n;

    if ($len == 2)
    {
	my @n = unpack "C2", $str;
	$n = (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f);
    }
    elsif ($len == 3)
    {
	my @n = unpack "C3", $str;
	$n = (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + 
		($n[2] & 0x3f);
    }
    elsif ($len == 4)
    {
	my @n = unpack "C4", $str;
	$n = (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) + 
		(($n[2] & 0x3f) << 6) + ($n[3] & 0x3f);
    }
    elsif ($len == 1)	# just to be complete...
    {
	$n = ord ($str);
    }
    else
    {
	die "bad value [$str] for XmlUtf8Decode";
    }
    $hex ? sprintf ("&#x%x;", $n) : "&#$n;";
}
