-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
/ "Eric S. Raymond" <[EMAIL PROTECTED]> was heard to say:
| Norman Walsh <[EMAIL PROTECTED]>:
|> That's valid when the PIs are left in, but results in a non-XML
|> document when profiled. My model forces the input to be well-formed
|> XML and guarantees that the result will be well-formed.
|
| Good answer. Same one I anticipated several messages up-thgread :-)
|
| So, how would *you* implement this "specialized vocabulary"? XSLT
| doesn't have the marbles for it.
I think the right answer is a specialized XML parser that performs a
variant of the identity transformation. In fact, it does exactly what
Jirka's profiling code does except that it has a funky serializer that
outputs the <!DOCTYPE declaration and the internal subset (or ideally
only the necessary parts of it).
In fact, that's just what my code does, by way of an egregious hack.
#!/usr/bin/perl -w -- # -*- Perl -*-
use strict;
use XML::Parser::PerlSAX;
my $host = $ENV{'HTTP_HOST'} || "";
my $uri = $ENV{'REQUEST_URI'} || "";
if ($host ne 'localhost') {
&forbidden();
}
my %profile = ();
my $xmlfile = "";
my $options = "";
if ($uri =~ /^.*?profile(\/.*?)\?(.*)$/) {
$xmlfile = $1;
$options = $2;
} elsif ($uri =~ /^.*?profile(\/.*)$/) {
$xmlfile = $1;
} else {
&forbidden();
}
my @args = split(/&/, $options);
foreach $_ (@args) {
tr/+/ /;
s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
}
foreach my $cond (@args) {
next if $cond !~ /^(\S+)=(\S+)/;
if (exists $profile{$1}) {
$profile{$1} .= "|$2";
} else {
$profile{$1} = $2;
}
}
print "Content-type: application/xml\n\n";
my $xmlDecl = "<?xml version='1.0'?>";
my $internalSubset = "";
if (open (F, $xmlfile)) {
# THIS IS A HACK
read (F, $_, 16384);
if (/^\s*(\<\?xml.*?\?>)/is) {
$xmlDecl = $1;
}
if (/<!DOCTYPE\s/is) {
$_ = $& . $'; # '
if (/\]\>/is) {
$_ = $` . $&;
}
$internalSubset = $_;
}
close (F);
}
my $shandler = new SerializeHandler($xmlDecl, $internalSubset);
my $handler = new ProfileHandler($shandler, %profile);
my $parser = new XML::Parser::PerlSAX (Handler => $handler);
$parser->parse (Source => { 'SystemId' => $xmlfile });
close (STDOUT);
exit;
sub forbidden {
# FIXME: make this work on my server
# print "HTTP/1.1 403 Forbidden\n";
# print "Connection: close\n";
print "Content-Type: text/html; charset=iso-8859-1\n\n";
print "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n";
print "<HTML><HEAD>\n";
print "<TITLE>403 Forbidden</TITLE>\n";
print "</HEAD><BODY>\n";
print "<H1>Forbidden</H1>\n";
print "You don't have permission to access that resource\n";
print "on this server.<P>\n";
print "</BODY></HTML>\n";
exit 0;
}
package ProfileHandler;
sub new {
my $type = shift;
my $chain = shift;
my %profile = @_;
my @stack = ();
my $self = { 'chain' => $chain,
'profile' => \%profile,
'stack' => \@stack };
return bless $self, $type;
}
sub start_document {
my $self = shift;
$self->{'chain'}->start_document() if $self->{'chain'};
$self->include();
}
sub start_element {
my $self = shift;
my $element = shift;
# print $element->{'Name'}, ": ", $self->context(), "\n";
if ($self->context()) {
my %profile = %{$self->{'profile'}};
my %attrs = %{$element->{'Attributes'}};
my $match = 1;
foreach my $attr (keys %attrs) {
if (exists $profile{$attr}) {
my $value = $attrs{$attr};
my $prof = $profile{$attr};
$match = $match && $self->profileMatch($value,$prof);
}
}
if ($match) {
$self->include();
} else {
$self->ignore();
}
} else {
$self->ignore();
}
if ($self->context()) {
$self->{'chain'}->start_element($element) if $self->{'chain'};
}
}
sub end_element {
my $self = shift;
if ($self->context()) {
$self->{'chain'}->end_element(@_) if $self->{'chain'};
}
$self->pop();
}
sub characters {
my $self = shift;
if ($self->context()) {
$self->{'chain'}->characters(@_) if $self->{'chain'};
}
}
sub processing_instruction {
my $self = shift;
$self->{'chain'}->processing_instruction(@_) if $self->{'chain'};
}
sub comment {
my $self = shift;
if ($self->context()) {
$self->{'chain'}->comment(@_) if $self->{'chain'};
}
}
sub ignore {
my $self = shift;
# print "IGNORE\n";
push (@{$self->{'stack'}}, 0);
}
sub include {
my $self = shift;
# print "INCLUDE\n";
push (@{$self->{'stack'}}, 1);
}
sub pop {
my $self = shift;
# print "POP\n";
pop (@{$self->{'stack'}});
}
sub context {
my $self = shift;
my @stack = @{$self->{'stack'}};
# print "CONTEXT: ", $stack[$#stack], "\n";
return $stack[$#stack];
}
sub profileMatch {
my $self = shift;
my $values= shift;
my $profiles = shift;
my %profs = ();
my @profiles = split(/\|/, $profiles);
foreach my $profile (@profiles) {
$profs{$profile} = 1;
}
foreach my $value (split(/\|/, $values)) {
return 1 if $profs{$value};
}
return 0;
}
package SerializeHandler;
sub new {
my $type = shift;
my $xmlDecl = shift;
my $internalSubset = shift;
return bless { 'xmldecl' => $xmlDecl, 'subset' => $internalSubset}, $type;
}
sub start_document {
my $self = shift;
print $self->{'xmldecl'};
print "\n";
print $self->{'subset'};
print "\n" if $self->{'subset'} ne '';
}
sub start_element {
my $self = shift;
my $element = shift;
print "<", $element->{'Name'};
my %attr = %{$element->{'Attributes'}};
if (%attr) {
foreach my $name (keys %attr) {
my $value = $attr{$name};
my $quot = '"';
if ($value =~ /\"/) {
$quot = "'";
$value =~ s/\'/\'/sg;
}
print " $name=$quot$value$quot";
}
}
print ">";
}
sub end_element {
my $self = shift;
my $element = shift;
print "</", $element->{'Name'}, ">";
}
sub characters {
my $self = shift;
my $data = shift;
print $data->{'Data'};
}
sub processing_instruction {
my $self = shift;
my $pi = shift;
print "<?", $pi->{'Target'}, " ", $pi->{'Data'}, "?>";
}
sub comment {
my $self = shift;
my $comment = shift;
print "<--", $comment->{'Data'}, "-->";
}
Be seeing you,
norm
- --
Norman Walsh <[EMAIL PROTECTED]> | The art of living is more like
http://www.oasis-open.org/docbook/ | wrestling than dancing.--Marcus
Chair, DocBook Technical Committee | Aurelius
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.0.6 (GNU/Linux)
Comment: Processed by Mailcrypt 3.5.7 <http://mailcrypt.sourceforge.net/>
iD8DBQE9pz4kOyltUcwYWjsRAh3ZAJ9qfOS5Lr3C9XFmeYKdmEgSie1XJgCgr5v9
ucfZl6KWRHnBPSPsDTti56A=
=SBtk
-----END PGP SIGNATURE-----