James Purser wrote:
I need to convert SMIL files to another XML format and was wondering if
anyone else had any experience with Perls XML::Simple and could point me
at any good doco, with a focus on getting the attributes out.
I found the combination of XML::Parser and XML::Writer more useful than
XML::Simple.
I have atached two small programs from several years ago that show the use of the
modules.
Mike
--
Michael Lake
Computational Research Support Unit
Science Faculty, UTS
Ph: 9514 2238
#!/usr/bin/perl -w
# Survex format to XML format
# This program is generated from noweb documentation.
# Programs svx2xml/xml2svx: convert cave surveying data between
# Survex and CaveScript XML format.
#
# Copyright (C) 2000 Michael R. Lake
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Program Usage: program_name < survex_data_file > xml_data_file
use strict;
use XML::Writer; # see perldoc XML::Writer
my $i = 0;
my ($tmp, @tmp);
my $todays_date;
my $data_in;
my @data_in;
my $writer = new XML::Writer(NEWLINES => 0);
my $data_type = "normal";
my @data_type_stack = ""; # When we change the Survex data-type we push
the
# last one onto this
stack.
# Declare and initialise a hash for the data ordering.
my %data_order_default;
$data_order_default{"from"} = 0;
$data_order_default{"to"} = 1;
$data_order_default{"tape"} = 2;
$data_order_default{"compass"} = 3;
$data_order_default{"clino"} = 4;
# Copy the default data-ordering hash to the current working hash.
my %data_order_current = %data_order_default;
# Write an XML declaration and a comment that the XML file was created from
# this Survex to XML conversion program.
$writer->xmlDecl("", "no");
$writer->doctype("CAVESURVEY", "", "CaveSurvey.dtd");
# temporarily removed date function as Martin Laverty reported that it crashes
Perl
# on M$ Windows
# $tmp = "This file was generated from svx2xml on ".`date`;
$tmp = "This file was generated from svx2xml";
chomp($tmp); $writer->comment($tmp);
# Write the opening root element.
$writer->startTag("CAVESURVEY");
$writer->characters("\n");
# Read data from STDIN
@data_in = read_stdin();
# Process Survex lines (* commands are in alphabetical order)
for ($i=0; $i<=$#data_in; $i++)
{
# Survex blank line.
if ($data_in[$i] eq "")
{
$writer->characters("\n");
}
# Survex comment line.
# FIRST we check for comment lines BEFORE we check for *command lines.
# A Survex comment is a line comprising zero or more white space
followed
# by a semicolon and trailing text. Convert to XML comment.
elsif ($data_in[$i] =~ /^\s*;/)
{
svx_comment($data_in[$i]);
}
# NOW we check for *commands. This way any *commands commented out with
;
# will have been turned into XML comments. We are also checking that the
# *command is at the start of a line - superfluous but lets do it.
# See the comments made about this in the full noweb documentation.
# Survex "*begin series_name" line to XML
elsif ($data_in[$i] =~ /^\*begin/i) {
svx_begin($data_in[$i]);
}
# Survex "*calibrate instrument value" line to XML
elsif ($data_in[$i] =~ /^\*calibrate/i) {
svx_calibrate($data_in[$i]);
}
# Survex "*data type ordering" line to XML
elsif ($data_in[$i] =~ /^\*data/i) {
svx_datatype($data_in[$i]);
}
# Survex "*end series_name" line to XML
elsif ($data_in[$i] =~ /^\*end/i) {
svx_end($data_in[$i]);
}
# Survex "*equate station1 station2" line to a XML
elsif ($data_in[$i] =~ /^\*equate/i) {
svx_equate($data_in[$i]);
}
# Survex "*fix station" line to XML.
elsif ($data_in[$i] =~ /^\*fix/i) {
svx_fix($data_in[$i]);
}
# Survex "*include filename.svx" line to XML.
elsif ($data_in[$i] =~ /^\*include/i) {
svx_include($data_in[$i]);
}
# Survex "*sd instrument value units" line to XML
elsif ($data_in[$i] =~ /^\*sd/i) {
svx_sd($data_in[$i]);
}
# At this point we assume it's valid survey data in a Survex format.
# Default order is "From To Tape Compass Clino" but a diff order
# can be handled by the *data command.
else {
svx_survey_data($i, $data_in[$i]);
}
}
$writer->endTag("CAVESURVEY");
# Finish the XML document. This method will
# check that the document has exactly one document
# element, and that all start tags are closed:
$writer->end();
###################################
### Survex specific subroutines ###
###################################
sub svx_comment
{
# Survex comment line to XML comment line.
my @tmp = split (/;/, $_[0]); # split the original string again!
$writer->comment($tmp[1]); # write out the second half.
$writer->characters("\n");
}
sub svx_begin
{
# Survex "*begin survey_name ; comment" line.
# Save away the current data_type (ie normal or diving) so
# it can be restored on meeting an *end
push(@data_type_stack, $data_type);
my @tmp = split(/\s+/, $_[0]);
if (!$#tmp)
# Case 1: "*begin " ie. no name or trailing comments
{
$writer->startTag("SERIES");
}
else
# Case 2: "*begin something [something else]"
{
# ie. @tmp array will be (*begin, name [; comments...] ) so
# $tmp[0] will be *begin always
# $tmp[1] will be either ";" or will be a series "name" but NOT
null
# $tmp[2] will be either ";" or comment characters or else ""
ie null.
if ($tmp[1] eq ";")
{
$writer->startTag("SERIES");
}
else
{
$writer->startTag("SERIES", "NAME" => $tmp[1]);
}
}
# Cope with case of a trailing comment.
if ($_[0] =~ /;/)
{
svx_comment($_[0]);
}
else
{
$writer->characters("\n");
}
}
sub svx_calibrate
{
# Survex "*calibrate instrument value" line to XML line
# <instrument ZERO_CORRECT="value" SCALE_CORRECT="value"> tag.
# or
# Survex "*calibrate declination value" line to XML line
# <AREA DECLINATION="value" > tag.
my @tmp = split(/\s+/, $_[0]);
# ie. @tmp array will be either, using tape as an example...
# (*calibrate, tape, 0.1 [scale]) or (*calibrate, declination, 12.5
[1])
# $tmp[1] will be either the instrument we are calibrating or
declination
# $tmp[2] will be it's value.
# An instrument may have a scale value as well. If it doesn't just
# set it to 1.
# $tmp[3] its scale value if its an instrument.
$tmp[1] =~ tr/a-z/A-Z/; # Make sure element name is UPPERCASE.
if ( $tmp[1] =~ /DECLINATION/)
{
# If there is a scale given ie. $tmp[3] exists we don't write
an
# attribute for it as in the case of an instrument as it has no
# meaning for a declination.
$writer->emptyTag("AREA", "DECLINATION" => $tmp[2]);
}
else
{
if (!$tmp[3]) # if there is no scale then set its value to 1
{
$tmp[3] = 1;
}
$writer->startTag("INSTRUMENT");
$writer->emptyTag($tmp[1], "ZERO_CORRECT" => $tmp[2],
"SCALE_CORRECT" => $tmp[3]);
$writer->endTag("INSTRUMENT");
}
# Cope with case of a trailing comment.
if ($_[0] =~ /;/)
{
svx_comment($_[0]);
}
else
{
$writer->characters("\n");
}
}
sub svx_datatype
{
# Survex "*data <type> <ordering>" line to XML line
# Previously initialised using the hash %data_order_current.
my $i;
my @tmp = split(/\s+/, $_[0]);
# Cases might be:
# *data normal from to tape compass clino
# tmp[0] tmp[1] tmp[2] tmp[3] tmp[4] tmp[5] tmp[6]
# *data diving from to tape compass fromdepth todepth
# tmp[0] tmp[1] tmp[2] tmp[3] tmp[4] tmp[5] tmp[6] tmp[7]
$tmp[1] =~ tr/A-Z/a-z/; # Make sure element name if 'NORMAL' or
# 'DIVING' is converted
to lowercase.
if ( $tmp[1] =~ /normal/)
{
$data_type = "normal";
# We only want $tmp[2] through $tmp[6] ie the 5 fields
# from to tape compass clino
for ($i=2; $i<=6; $i++)
{
$data_order_current{$tmp[$i]} = $i-2;
# ie. $data_order_current{"from"} = 0
# $data_order_current{"to"} = 1 etc...
# debug line
# print STDERR "Data is $tmp[1] key=", $tmp[$i], "
value=", $i, "\n";
}
}
elsif ( $tmp[1] =~ /diving/)
{
$data_type = "diving";
# We only want $tmp[2] through $tmp[7] ie the 6 fields
# from to tape compass fromdepth todepth
for ($i=2; $i<=7; $i++)
{
$data_order_current{$tmp[$i]} = $i-2;
# debug line
# print STDERR "Data is $tmp[1] key ", $tmp[$i], "
value=", $i, "\n";
}
}
else
{
print STDERR "The *data type ", $tmp[1], " is not
supported.\n";
print STDERR "Correct line: ", $_[0], "\n";
}
# Cope with case of a trailing comment.
if ($_[0] =~ /;/)
{
svx_comment($_[0]);
}
else
{
$writer->characters("\n");
}
}
sub svx_end
{
# Survex "*end name ; comment" line to XML line.
# Might also be just "*end ".
my @tmp = split(/\s+/, $_[0]);
$writer->endTag("SERIES");
# Restore the default data-type and data-ordering.
$data_type = pop(@data_type_stack);
%data_order_current = %data_order_default;
# TODO restore other environments as we have only restored the default
# data type.
# DATA TYPE & ORDERING, DEFAULT CALIBRATION CASE SD INFER SET etc
#$data_order = pop(@data_order_stack);
# Cope with case of a trailing comment.
if ($_[0] =~ /;/)
{
svx_comment($_[0]);
}
else
{
$writer->characters("\n");
}
}
sub svx_equate
{
# Survex "*equate stn1 stn2 ; comment" line to XML line.
my @tmp;
@tmp = split(/\s+/, $_[0]);
$writer->startTag("EQUATE", "STN1" => $tmp[1],
"STN2" =>
$tmp[2]);
$writer->characters("\n");
$writer->endTag("EQUATE");
# Cope with case of a trailing comment.
if ($_[0] =~ /;/)
{
svx_comment($_[0]);
}
else
{
$writer->characters("\n");
}
}
sub svx_fix
{
# Survex *fix line to XML line.
# eg. "*fix 3a 1000 2000 700" fixes station 3a to 1000m East, 2000m
North
# and 700m Height.
# $tmp[0] is *fix, $tmp[1] is the station name,
# $tmp[2] is E, $tmp[3] is N, $tmp[4] is H.
my @tmp = split(/\s+/, $_[0]);
$writer->emptyTag("STN", "NAME" => $tmp[1],
"EAST" => $tmp[2],
"NORTH" => $tmp[3],
"HEIGHT" => $tmp[4]);
# Cope with case of a trailing comment.
if ($_[0] =~ /;/)
{
svx_comment($_[0]);
}
else
{
$writer->characters("\n");
}
}
sub svx_include
{
# Survex *include line to XML line.
# eg. "*include extension.svx
# $tmp[0] is "*include", $tmp[1] is "extension.svx"
my @tmp = split(/\s+/, $_[0]);
$writer->emptyTag("INCLUDE", "filename" => $tmp[1]);
# Cope with case of a trailing comment.
if ($_[0] =~ /;/)
{
svx_comment($_[0]);
}
else
{
$writer->characters("\n");
}
}
sub svx_sd
{
# Survex "*sd instrument value ; comment" line to XML line.
my @tmp = split(/\s+/, $_[0]);
# ie. @tmp array will be (*sd, instrument, value, units, ;, comments)
# $tmp[1] will be the instrument name
# $tmp[2] it's value
# $tmp[3] it's units.
$tmp[1] =~ tr/a-z/A-Z/; # Make sure instrument name is
UPPERCASE.
$writer->startTag("INSTRUMENT");
$writer->emptyTag($tmp[1], "SD" => $tmp[2],
"UNITS" => $tmp[3]);
$writer->endTag("INSTRUMENT");
# Cope with case of a trailing comment.
if ($_[0] =~ /;/)
{
svx_comment($_[0]);
}
else
{
$writer->characters("\n");
}
}
sub svx_survey_data
{
my ($i, $data_in) = @_;
my $line_no;
my @tmp = split(/\s+/, $data_in);
# ie. @tmp arrray might be (1, 2, 355, +24, 10.6 ; comments...)
# Final check - if data type=normal there must be at least 5 elements
# to this array.
# From...Elev = 5 elements and if there is a comment following
# then possibly more than 5 elements.
# TODO handle comment 1st then check for number of elements left
# for better error pickup.
if ( ($data_type eq "normal") && ($#tmp >= 4) )
{
# Notice that we access the fields with a hash. See the
# subroutine 'svx_datatype'.
$writer->emptyTag("SHOT",
"FROM" => $tmp[$data_order_current{"from"}],
"TO" => $tmp[$data_order_current{"to"}],
"DIST" => $tmp[$data_order_current{"tape"}],
"AZIM" => $tmp[$data_order_current{"compass"}],
"ELEV" => $tmp[$data_order_current{"clino"}]);
}
elsif ( ($data_type eq "diving") && ($#tmp >= 5) )
{
$writer->emptyTag("SHOT",
"FROM" => $tmp[$data_order_current{"from"}],
"TO" => $tmp[$data_order_current{"to"}],
"DIST" => $tmp[$data_order_current{"tape"}],
"AZIM" => $tmp[$data_order_current{"compass"}],
"FROMDEPTH" => $tmp[$data_order_current{"fromdepth"}],
"TODEPTH" => $tmp[$data_order_current{"todepth"}]);
}
elsif ( ($data_type eq "topofil") && ($#tmp >= 5) )
{
# FROM TO FROMCOUNT TOCOUNT [BACK]BEARING [BACK]GRADIENT
$writer->emptyTag("SHOT",
"FROM" => $tmp[$data_order_current{"from"}],
"TO" => $tmp[$data_order_current{"to"}],
"FROMCOUNT" => $tmp[$data_order_current{"fromcount"}],
"TOCOUNT" => $tmp[$data_order_current{"tocount"}],
"AZIM" => $tmp[$data_order_current{"compass"}],
"CLINO" => $tmp[$data_order_current{"clino"}]);
}
else
{
# Bail out and write data as an XML comment.
$line_no = $i + 1;
$tmp = "WARNING line $line_no in the Survex file has less than
";
$tmp = $tmp."5 data values.\n";
$tmp = $tmp."The line has been commented out in the XML
file!.\n";
print STDERR $tmp;
$writer->comment($tmp);
$writer->characters("\n");
$writer->comment($data_in[$i]);
$writer->characters("\n");
}
# Cope with case of a trailing comment.
if ($_[0] =~ /;/)
{
svx_comment($_[0]);
}
else
{
$writer->characters("\n");
}
}
#################################
### Miscellaneous Subroutines ###
#################################
sub read_stdin
{
my @data_in;
while (<>)
{
s/^\s*//; # get rid of all leading spaces
chomp; # get rid of newlines
push (@data_in, $_);
}
return @data_in;
}
# Function for debugging only.
sub print_array
{
# Print the array.
my ($i, $array_size, @array);
@array = @_;
for ($i=0; $i<=$#array; $i++)
{
printf "$array[$i]\n";
}
$array_size = @array;
return $array_size;
}
#!/usr/bin/perl -w
# XML to Survex converter
# This program is generated from noweb documentation.
# Programs svx2xml/xml2svx: convert cave surveying data between
# Survex and CaveScript XML format.
#
# Copyright (C) 2000 Michael R. Lake
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Usage is: xml2svx xml_file > svx_file
use strict;
use XML::Parser; # see perldoc XML::Parser
my $p1; # Instance of an XML parser
my $string;
my $file_xml = shift;
# The stack of station names built up as we encounter series after series
# of cave surveys.
my (@series, $last_series);
my $semi_needed = 1;
# First check that we have a well formed document.
# If no style is specified the parser will just check for well-formedness.
$p1 = new XML::Parser();
if (!$p1->parsefile($file_xml)) {
print "Document $file_xml not well-formed!\n";
exit(0);
}
# Set style to Subs.
$p1 = new XML::Parser(Style => 'Subs');
# XML declarations and Doctypes aren't needed by Survex but we will
# save them with the Survex file for version information.
#$p1->setHandlers( XMLDecl => \&handle_decl,
# Doctype => \&handle_doctype);
$p1->setHandlers( Comment => \&handle_xmlcomment);
$p1->setHandlers(Char => \&handle_char);
#$p1->setHandlers(Default => \&handle_default);
# temp remove date as it crashes Perl on M$ Windows
# print "; This file was generated from xml2svx on ".`date`;
print "; This file was generated from xml2svx;
$p1->parsefile($file_xml);
####################################################
# Functions for handling tags in Cave Script XML
####################################################
sub handle_decl
{
my ($p, $Version, $Encoding, $Standalone) = @_;
print "; XML: Ver=$Version\n";
}
sub handle_doctype
{
my ($p, $Name, $Sysid, $Pubid, $Internal) = @_;
print "; Sys=$Sysid\n";
}
sub handle_xmlcomment
{
my ($p, $string) = @_;
$string = trim_whitespace($string);
$string = trim_semicolon($string);
print "; $string\n";
}
sub handle_char
{
my ($p, $string) = @_;
# remove leading and trailing whitespace - including newlines
$string = trim_whitespace($string);
# If the string was just white space it will now be null.
# If the string is not null print it.
if ($string ne "") {
if ($semi_needed == 1) {
print " ; $string"; # Needs a semicolon.
}
elsif ($semi_needed == 0) {
print " $string"; # Doesn't need a
semicolon.
}
else {
# we should never be here
print "\nError in handler_char()";
}
}
# else string is null so don't even create a new line.
}
sub handle_default
{
# covers situation where there is no registered handler
my ($p, $string) = @_;
if ($string eq "") { return; }
my $line = $p->current_line;
print "\n; No support for ", $string;
}
sub CAVESURVEY
{
$semi_needed = 1;
}
sub CAVESURVEY_
{
$semi_needed = 0;
print "\n";
}
sub AREA
{
my ($p, $element, %attr) = @_;
print "\n; AREA";
if ($attr{"NAME"}) {
print "\n; Area Name: ", $attr{"NAME"};
$semi_needed = 0;
}
if ($attr{"DECLINATION"}) {
print "\n*calibrate declination ", $attr{"DECLINATION"};
$semi_needed = 1;
}
}
sub AREA_
{
print "\n";
}
sub CAVE
{
my ($p, $element, %attr) = @_;
print "\n; CAVE";
if ($attr{"NAME"}) {
print "\n; Cave Name: ", $attr{"NAME"};
}
if ($attr{"TAG"}) {
print "\n; Cave Tag: ", $attr{"TAG"};
}
$semi_needed = 0;
}
sub CAVE_
{
print "\n";
}
sub DATE
{
print "\n; DATE";
}
sub DATE_
{
print "\n";
}
sub SURVEYDATE
{
my ($p, $element, %attr) = @_;
print "\n; Survey Date: ";
date_format(%attr);
}
sub SURVEYDATE_
{
#print "\n";
}
sub CREATIONDATE
{
my ($p, $element, %attr) = @_;
print "\n; Creation Date: ";
date_format(%attr);
}
sub CREATIONDATE_
{
#print "\n";
}
sub MODIFICATIONDATE
{
my ($p, $element, %attr) = @_;
print "\n; Modification Date: ";
date_format(%attr);
}
sub MODIFICATIONDATE_
{
#print "\n";
}
sub SURVEYORS
{
print "\n; SURVEYORS";
}
sub SURVEYORS_
{
print "\n";
}
sub SURVEYOR
{
my ($p, $element, %attr) = @_;
if ($attr{"NAME"}) {
print "\n; Surveyor: ", $attr{"NAME"};
}
if ($attr{"AFFILIATION"}) {
print "\n; Affiliation: ", $attr{"AFFILIATION"};
}
$semi_needed = 1;
}
sub SURVEYOR_
{
#print "\n";
}
sub INSTRUMENT
{
print "\n; INSTRUMENT";
}
sub INSTRUMENT_
{
print "\n";
}
sub instrument_type
{
my ($instrument, %attr) = @_;
if ($attr{"ID"}) {
print "\n; $instrument ID: ", $attr{"ID"};
}
if ($attr{"UNITS"}) {
print "\n; $instrument units: ", $attr{"UNITS"};
}
if ($attr{"USED"}) {
print "\n; $instrument used: ", $attr{"USED"};
}
if ($attr{"ZERO_CORRECT"}) {
print "\n*calibrate $instrument ", $attr{"ZERO_CORRECT"};
}
if ($attr{"SD"}) {
print "\n*sd $instrument ", $attr{"SD"};
}
if (!keys(%attr)) {
print "\n; $instrument: ";
}
$semi_needed=1;
}
sub TAPE
{
my ($p, $element, %attr) = @_;
instrument_type("Tape", %attr);
}
sub TAPE_
{
#print "\n";
}
sub COMPASS
{
my ($p, $element, %attr) = @_;
instrument_type("Compass", %attr);
}
sub COMPASS_
{
#print "\n";
}
sub CLINO
{
my ($p, $element, %attr) = @_;
instrument_type("Clino", %attr);
}
sub CLINO_
{
#print "\n";
}
sub SERIES
{
my ($p, $element, %attr) = @_;
if ($attr{"NAME"}) {
print "\n*begin ", $attr{"NAME"};
push(@series, $attr{"NAME"});
}
else {
print "\n*begin";
push(@series, " ");
}
$semi_needed=1;
}
sub SERIES_
{
$last_series = pop(@series);
print "\n*end ", $last_series;
}
sub STN
{
my ($p, $element, %attr) = @_;
print "\n; Stn: ", $attr{"NAME"}, " ";
$semi_needed=0;
}
sub STN_
{
#print "\n";
}
sub SHOT
{
my ($p, $element, %attr) = @_;
print "\n", $attr{"FROM"}, "\t", $attr{"TO"}, "\t";
print $attr{"DIST"}, "\t", $attr{"AZIM"}, "\t", $attr{"ELEV"}, " ";
$semi_needed=1;
}
sub SHOT_
{
#print "\n";
}
sub EQUATE
{
my ($p, $element, %attr) = @_;
print "\n*equate ",$attr{"STN1"}, " ", $attr{"STN2"}, " ";
$semi_needed=1;
}
sub EQUATE_
{
#print "\n";
}
####################################################
# Miscellaneous functions
####################################################
sub trim_semicolon
{
my $string = $_[0];
$string =~ s/^;//; # remove leading semicolon
return $string;
}
sub trim_whitespace
{
my $string = $_[0];
$string =~ s/^\s*//; # remove leading whitespace
$string =~ s/\s*$//; # remove trailing whitespace+newline
# This will also have removed the trailing newline.
return $string;
}
sub date_format
{
my %attr = @_;
if ($attr{"DAY"}) {
print $attr{"DAY"};
}
else {
print "??";
}
print "-";
if ($attr{"MONTH"}) {
print $attr{"MONTH"};
}
else {
print "??";
}
print "-";
if ($attr{"YEAR"}) {
print $attr{"YEAR"};
}
else {
print "????";
}
print " (dd-mm-yyyy)"
}
####################################
### Not used
####################################
sub handle_start
{
my ($p, $element, %attr) = @_;
my $line = $p->current_line; print "$line START $element\n";
}
sub handle_end
{
my ($p, $element) = @_;
my $line = $p->current_line; print "$line END $element \n";
}
_______________________________________________
coders mailing list
[email protected]
http://lists.slug.org.au/listinfo/coders