hi... starting to explore libxml.. i have a question (the 1st of many, i'm sure!!)
i'm trying to parse the following.. and trying to get the text after the <strong></strong>... but i'm not sure how to access it.. i can get the information within the <strong></strong>.. any help would be useful... thanks... the sample html is: <tr> <td valign="top" colspan=4 class="sectionheading" bgcolor="#99ffcc"> ACCT 209 - 501 SURVEY OF ACCT PRIN <br /></td> </tr> <tr> <td valign="top" bgcolor="#99ffcc"> <strong>Instructor:</strong> STASNY M <br /> <strong>Total Seats:</strong> 450<br /> <strong>Available Seats</strong> 066<p /> </td> <td valign="top" bgcolor="#99ffcc"> NON-BUSINESS, NON-AGRIBUSINESS MAJORS ONLY<br /><br /> </td> <td valign="top" bgcolor="#99ffcc"> <a href="http://www.tamu.edu/map/building/overview/WCBA.html"> MWF 09:10AM-10:00AM WCBA 159</a><br /> </td> <td valign="top" bgcolor="#99ffcc"> CR 3 </td> </tr> <tr> <td colspan=4 bgcolor="#e2e2e2" valign="top" class="sectionheading"> ACCT 209 - 502 SURVEY OF ACCT PRIN </td> </tr> <tr> <td bgcolor="#e2e2e2" valign="top"> <strong>Instructor:</strong> STASNY M <br /> <strong>Total Seats:</strong> 325 <br /> <strong>Available Seats</strong> 001<p /> </td> <td bgcolor="#e2e2e2" valign="top"> NON-BUSINESS, NON-AGRIBUSINESS MAJORS ONLY<br /><br /> </td> <td bgcolor="#e2e2e2" valign="top"> <a href="http://www.tamu.edu/map/building/overview/KLCT.html"> MWF 12:40PM-01:30PM KLCT 115</a><br /> </td> <td bgcolor="#e2e2e2" valign="top"> CR 3 </td> </tr> <tr> <td valign="top" colspan=4 class="sectionheading" bgcolor="#99ffcc"> ACCT 209 - 503 SURVEY OF ACCT PRIN <br /></td> </tr> <tr> <td valign="top" bgcolor="#99ffcc"> <strong>Instructor:</strong> STRAWSER R <br /> <strong>Total Seats:</strong> 450<br /> <strong>Available Seats</strong> 194<p /> </td> <td valign="top" bgcolor="#99ffcc"> NON-BUSINESS, NON-AGRIBUSINESS MAJORS ONLY<br /><br /> </td> <td valign="top" bgcolor="#99ffcc"> <a href="http://www.tamu.edu/map/building/overview/WCBA.html"> TR 08:00AM-09:15AM WCBA 159</a><br /> </td> <td valign="top" bgcolor="#99ffcc"> CR 3 </td> </tr> the sample perl that i'm using is: --------------------------------------------------------------- #!/usr/bin/perl -w use HTML::TreeBuilder; use LWP::UserAgent; use WWW::Mechanize; use XML::LibXML; my $base_url = "http://courses.tamu.edu/ViewSections.aspx?department=ACCT&term=C&course=209 &year=2004&activity=00"; $ua = new LWP::UserAgent; $ua->timeout(30); $ua->agent("AgentName/0.1 " . $ua->agent); $section_url = $base_url; #testing for now... $req = new HTTP::Request GET => $section_url; $res = $ua->request($req); $q1c = $res->content; # print $q1c; $am_tree = HTML::TreeBuilder->new_from_content($res->content); # empty tree # trigger off the <td to get the class names.. $_ndx=0; @_section_tbl = $am_tree->look_down("_tag"=>"table", "cellpadding"=>"0"); #print "html = " .$_section_tbl[1]->dump() ."\n"; #die; $test_html = $_section_tbl[1]->as_HTML(); my $doc = XML::LibXML ->new({recover=>1}) ->parse_html_string($test_html); #or parse_html_file #print $doc->findvalue('//text()'); @nodes = $doc->findnodes('//[EMAIL PROTECTED]"sectionheading"]'); print "cnt = " . scalar @nodes ."\n"; print $nodes[0]->findvalue('./text()')."\n"; print $nodes[1]->findvalue('./text()')."\n"; print $nodes[2]->findvalue('./text()')."\n"; #<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> # # this is the area i'm having issues/questions about... # #<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> @nodes2 = $doc->findnodes('//td/descendant::strong'); print "cnt2 = " . scalar @nodes2 ."\n"; print $nodes2[0]->findvalue('./text()')."\n"; print $nodes2[1]->findvalue('./text()')."\n"; print $nodes2[2]->textContent ."\n"; print $nodes2[3]->findvalue('.')."\n"; print $nodes2[4]->findvalue('.')."\n"; print $nodes2[5]->findvalue('./text()')."\n"; print $nodes2[6]->findvalue('./text()')."\n"; print $nodes2[7]->findvalue('./text()')."\n"; print $nodes2[8]->findvalue('./text()')."\n"; die; thanks...