>Hello > > >This is off topic, thanks for a direct hint to a module or an >appropriate mailing list. > >I want to read an HTML form into an hash. But I don't want to use >HTML::Tree or similar DOM Object trees. I need simply all form relevant >information as an hash which is human readable. Example:
Write your own.... like me ;-) #!/usr/bin/perl use strict; use Data::Dumper; my $htmlsource = ''; while(<STDIN>) { $htmlsource .= $_; } my $formfields = {}; my @tags = split(/[\<\>]/,$htmlsource); my $i = 0; for ($i = 0;$i < @tags; $i++) { my $tag = $tags[$i]; if($tag =~ /^(input|select|option|textarea)\b/i) { my $type = lc($1); if($type eq "input") { my $attr = parse($tag); push(@{$formfields->{'printorder'}},($attr->{'name'} || '')); $formfields->{$attr->{'name'}} = { 'tag' => $type, 'attr' => $attr, 'orig' => $tag }; } elsif($type eq "select") { my $attr = parse($tag); push(@{$formfields->{'printorder'}},($attr->{'name'} || '')); my $rec = { 'tag' => $type, 'attr' => $attr, 'orig' => $tag, 'options' => [], }; while($tags[$i] !~ /^\/select/i) { my $opt = $tags[$i]; if($opt =~ /^option/i) { my $tmp = parse($opt); $tmp->{'label'} = $tags[$i+1]; push(@{$rec->{'options'}},$tmp); $i++; } $i++; } $formfields->{$attr->{'name'}} = $rec; } elsif($type eq "textarea") { my $attr = parse($tag); push(@{$formfields->{'printorder'}},($attr->{'name'} || '')); my $rec = { 'tag' => $type, 'attr' => $attr, 'orig' => $tag, 'options' => [], }; my $val = ''; while($tags[$i] !~ /^\/textarea/i) { $val .= $tags[$i]; $i++; } $rec->{'value'} = $val; $formfields->{$attr->{'name'}} = $rec; } } } print Dumper($formfields); sub parse { my $tag = shift; $tag =~ s/^\w+//; $tag =~ s/^\s+//; return {} unless $tag; my $attr = {}; my $inquote = 0; my $pair = ''; for my $char (split(//,$tag)) { if($char eq '"') { $inquote = $inquote ? 0 : 1; next; } if($char =~ /\s/ && !$inquote) { if($pair) { my ($k,$v) = split(/\=/,$pair); $v ||= 1 if($k =~ /selected|checked/i); $attr->{lc($k)} = $v; } $pair = ''; next; } $pair .= $char; } if($pair) { my ($k,$v) = split(/\=/,$pair); $v ||= 1 if($k =~ /selected|checked/i); $attr->{$k} = $v; } return $attr; } Call it script.pl. $ perl script.pl < html_page_to_parse.html Note that it doesn't differentiate between multiple forms on a single page. I'll leave that as an exercise to the reader. Enjoy, Rob p.s. This hasn't been fully tested. -- When I used a Mac, they laughed because I had no command prompt. When I used Linux, they laughed because I had no GUI.