>Hello
>
>
>This is off topic, thanks for a direct hint to a module or an
>appropriate mailing list.
>
>I want to read an HTML form into an hash. But I don't want to use
>HTML::Tree or similar DOM Object trees. I need simply all form relevant
>information as an hash which is human readable. Example:


Write your own.... like me ;-)

#!/usr/bin/perl

use strict;
use Data::Dumper;
my $htmlsource = '';
while(<STDIN>) {
        $htmlsource .= $_;
}
my $formfields = {};
my @tags = split(/[\<\>]/,$htmlsource);
my $i = 0;
for ($i = 0;$i < @tags; $i++) {
        my $tag = $tags[$i];
        if($tag =~ /^(input|select|option|textarea)\b/i) {
                my $type = lc($1);

                if($type eq "input") {
                        my $attr = parse($tag);
 
        push(@{$formfields->{'printorder'}},($attr->{'name'} || ''));
                        $formfields->{$attr->{'name'}} = {
                                'tag' => $type,
                                'attr' => $attr,
                                'orig' => $tag
                        };
                } elsif($type eq "select") {
                        my $attr = parse($tag);
 
        push(@{$formfields->{'printorder'}},($attr->{'name'} || ''));
                        my $rec = {
                                'tag' => $type,
                                'attr' => $attr,
                                'orig' => $tag,
                                'options' => [],
                        };

                        while($tags[$i] !~ /^\/select/i) {
                                my $opt = $tags[$i];
                                if($opt =~ /^option/i) {
                                        my $tmp = parse($opt);
                                        $tmp->{'label'} = $tags[$i+1];
                                        push(@{$rec->{'options'}},$tmp);
                                        $i++;

                                }
                                $i++;
                        }
                        $formfields->{$attr->{'name'}} = $rec;
                } elsif($type eq "textarea") {
                        my $attr = parse($tag);
 
        push(@{$formfields->{'printorder'}},($attr->{'name'} || ''));
                        my $rec = {
                                'tag' => $type,
                                'attr' => $attr,
                                'orig' => $tag,
                                'options' => [],
                        };
                        my $val = '';
                        while($tags[$i] !~ /^\/textarea/i) {
                                $val .= $tags[$i];
                                $i++;
                        }
                        $rec->{'value'} = $val;
                        $formfields->{$attr->{'name'}} = $rec;
                }
        }
}

print Dumper($formfields);

sub parse {
        my $tag = shift;
        $tag =~ s/^\w+//;
        $tag =~ s/^\s+//;
        return {} unless $tag;
        my $attr = {};
        my $inquote = 0;
        my $pair = '';
        for my $char (split(//,$tag)) {
                if($char eq '"') {
                        $inquote = $inquote ? 0 : 1;
                        next;
                }
                if($char =~ /\s/ && !$inquote) {
                        if($pair) {
                                my ($k,$v) = split(/\=/,$pair);
                                $v ||= 1 if($k =~ /selected|checked/i);
                                $attr->{lc($k)} = $v;
                        }
                        $pair = '';
                        next;
                }
                $pair .= $char;
        }
        if($pair) {
                my ($k,$v) = split(/\=/,$pair);
                $v ||= 1 if($k =~ /selected|checked/i);
                $attr->{$k} = $v;
        }
        return $attr;
}


Call it script.pl.

$ perl script.pl < html_page_to_parse.html

Note that it doesn't differentiate between multiple forms on a single 
page.  I'll leave that as an exercise to the reader.

Enjoy,

Rob

p.s.  This hasn't been fully tested.


--
When I used a Mac, they laughed because I had no command prompt. When 
I used Linux, they laughed because I had no GUI.  

Reply via email to