>Hello
>
>
>This is off topic, thanks for a direct hint to a module or an
>appropriate mailing list.
>
>I want to read an HTML form into an hash. But I don't want to use
>HTML::Tree or similar DOM Object trees. I need simply all form relevant
>information as an hash which is human readable. Example:
Write your own like me ;-)
#!/usr/bin/perl
use strict;
use Data::Dumper;
my $htmlsource = '';
while() {
$htmlsource .= $_;
}
my $formfields = {};
my @tags = split(/[\<\>]/,$htmlsource);
my $i = 0;
for ($i = 0;$i < @tags; $i++) {
my $tag = $tags[$i];
if($tag =~ /^(input|select|option|textarea)\b/i) {
my $type = lc($1);
if($type eq "input") {
my $attr = parse($tag);
push(@{$formfields->{'printorder'}},($attr->{'name'} || ''));
$formfields->{$attr->{'name'}} = {
'tag' => $type,
'attr' => $attr,
'orig' => $tag
};
} elsif($type eq "select") {
my $attr = parse($tag);
push(@{$formfields->{'printorder'}},($attr->{'name'} || ''));
my $rec = {
'tag' => $type,
'attr' => $attr,
'orig' => $tag,
'options' => [],
};
while($tags[$i] !~ /^\/select/i) {
my $opt = $tags[$i];
if($opt =~ /^option/i) {
my $tmp = parse($opt);
$tmp->{'label'} = $tags[$i+1];
push(@{$rec->{'options'}},$tmp);
$i++;
}
$i++;
}
$formfields->{$attr->{'name'}} = $rec;
} elsif($type eq "textarea") {
my $attr = parse($tag);
push(@{$formfields->{'printorder'}},($attr->{'name'} || ''));
my $rec = {
'tag' => $type,
'attr' => $attr,
'orig' => $tag,
'options' => [],
};
my $val = '';
while($tags[$i] !~ /^\/textarea/i) {
$val .= $tags[$i];
$i++;
}
$rec->{'value'} = $val;
$formfields->{$attr->{'name'}} = $rec;
}
}
}
print Dumper($formfields);
sub parse {
my $tag = shift;
$tag =~ s/^\w+//;
$tag =~ s/^\s+//;
return {} unless $tag;
my $attr = {};
my $inquote = 0;
my $pair = '';
for my $char (split(//,$tag)) {
if($char eq '"') {
$inquote = $inquote ? 0 : 1;
next;
}
if($char =~ /\s/ && !$inquote) {
if($pair) {
my ($k,$v) = split(/\=/,$pair);
$v ||= 1 if($k =~ /selected|checked/i);
$attr->{lc($k)} = $v;
}
$pair = '';
next;
}
$pair .= $char;
}
if($pair) {
my ($k,$v) = split(/\=/,$pair);
$v ||= 1 if($k =~ /selected|checked/i);
$attr->{$k} = $v;
}
return $attr;
}
Call it script.pl.
$ perl script.pl < html_page_to_parse.html
Note that it doesn't differentiate between multiple forms on a single
page. I'll leave that as an exercise to the reader.
Enjoy,
Rob
p.s. This hasn't been fully tested.
--
When I used a Mac, they laughed because I had no command prompt. When
I used Linux, they laughed because I had no GUI.