joe....

the following sample perl might be usefull... the pieces of code
might/should help... i'm including the entire apps so you can get a feel for
what they're doing.. the 1st app gets into the utaustin.edu site and
extracts information. this site requires the use of a user/passwd..

the code is ugly, as it has comments, etc.. and can easily be tightened, but
it gives you an idea of what's going on...


#############################################################
#
# utaustin.pl. test perl app to extract/parse class schedule
#       from the utaustin.edu registrar site...
#
#
#
#############################################################
#!/usr/bin/perl -w


use HTML::TreeBuilder;
use LWP::UserAgent;
use WWW::Mechanize;

my $b = WWW::Mechanize->new();

   $ua = new LWP::UserAgent;
   $ua->timeout(30);
   $ua->agent("AgentName/0.1 " . $ua->agent);

   my $cstr;


#
# get the top level university class site
#
  my $base_url = "http://www.utexas.edu/student/registrar/schedules/";;

   my $req = new HTTP::Request GET => $base_url;

   $req->content_type('application/x-www-form-urlencoded');

   my $res = $ua->request($req);
   my $q1c = $res->content;
   print $q1c;

# get 046/049/etc...
@_sessions = &getsessions($res->content);



#
# we'll parse the returned contents and use them to get to the
# next page...
#
   $sched_url = $base_url . "046/";
   $req = new HTTP::Request GET => $sched_url;
   $res = $ua->request($req);
   $q1c = $res->content;
   print $q1c;

   $ut_tree = HTML::TreeBuilder->new_from_content($res->content); # empty
tree
   # should be only one.. need to get the schedules..
   @_sem_td = $ut_tree->look_down("_tag"=>"td", "bgcolor"=>"#eeeeee");
   @_sem_a = $_sem_td[0]->look_down("_tag"=>"a");
   $sem_page_url = $_sem_a[0]->attr('href');

   print "href# $sem_page_url\n";

#
# now it get's tricky.. we parse the returned contents and use
# them to get to the next page. however, the next page is now the
# ut logon page.
#
# this requires that we get the returned results of the logon page
# and that we fill in the user/password...
#
# we should probably use mechanize.. if it works..
#
#Configuration:#########################
#my
$ut_url="https://utdirect.utexas.edu/registrar/clavnew/clav3.WBX?s_semester=
su2004";
my $ut_url=$sem_page_url;
my $username="foo";     <<<<<<<<<< these would be your own!!!!!!! >>>>>>>>
my $password="cat";     <<<<<<<<<< these would be your own!!!!!!! >>>>>>>>
#End Configuration######################
use Crypt::SSLeay;

my $b = WWW::Mechanize->new();

my $agent = WWW::Mechanize->new();

# Retrieve main page
$agent->get($ut_url);

  my $login = $agent->form_name('logonform2');
  if ( ! $login ) {
      warn "Nothing came back";
      exit;
  }

  $agent->set_fields('LOGON' => $username,
                    'PASSWORDS' => $password
                    );
  $agent->click_button('value' => 'Log In');


print "qqqqqqqq..\n";
# for now, we assume that this is the returned javascript dialog...
# it should be checked.. to see if it really is
# the app should include error checking...
#
print $agent->content();

#assuming that we got a good dialog.. means the login
#was good, go ahead and resend the registration page again...
#this gets the app to the course schedule page..
# now we begin to extract the information
# !!
#
$agent->get($ut_url);
print "7777777777777777777777..\n\n\n\n";
print $agent->content();
         open(F, ">out.html");
         print F $agent->content();
         close(F);

$filename = "out.html";

#
# go ahead and get the levels (grad/under/etc...
# and the list of depts...
#
@_level_array = &getlevels($filename);
@_dept_array = &getdepts($filename);


#
# begin to parse the top section to get a list of the classes..
#
my
$ut_base_class_url="https://utdirect.utexas.edu/registrar/clavnew/clav3_resu
lts.wb";
my $dept = "?c-key=";
my $class_str =
"&c-crs=%20&c-unique-single=&c-unique-begin=&c-unique-end=&c-cmd=lc";
my $semester = "&c-sem=";
my $level = "&c-sub=";

####
#test vars.. these vars will be in a loop..
#####
my $semester_var = "su2004";
my $level_var = "l";
my $dept_var ="ACC";

 #
 # start the loop for depts/levels
 #
 #
 print "level = ". scalar @_level_array ."\n";
 print "dept = " . scalar @_dept_array ."\n";

 $filename = "out.html";
 foreach $_lcnt (@_level_array)
 {
   foreach $_dcnt (@_dept_array)
   {
     #####
     #
     # the url--> ut_base_class + $dept.$dept_var + $class_str +
     #            $semester + $semester_var + $level + $level_var
     #
     ######
     $class_url = $class_url . $ut_base_class_url;
     $class_url = $class_url . $dept. $_dcnt;
     $class_url = $class_url . $class_str;
     $class_url = $class_url . $semester .$semester_var;
     $class_url = $class_url . $level . $_lcnt;

     print "$class_url \n";
     $agent->get($class_url);
     print "5555555555555555..\n\n\n\n";
     print $agent->content();
         open(F, ">$filename");
         print F $agent->content();
         close(F);


     @_class_array = &getclasses($filename);
     $class_url = "";
   }
 }

print "we're here...\n";
die;



#################################
#
#
#################################
sub getsessions
{
   my ($res) = @_;

   my $ut_tree = HTML::TreeBuilder->new_from_content($res); # empty tree

   # should be only one.. need to get the schedules..
   @_semester_tbl = $ut_tree->look_down("_tag"=>"table", "class"=>"tbg");

   print "tbl ".$_semester_tbl[0]->dump()."\n";
   @_semester_td = $_semester_tbl[0]->look_down("_tag"=>"td",
"align"=>"center");


   @_semester_a = $_semester_td[0]->look_down("_tag"=>"a");
   print "<a ".$_semester_a[1]->dump()."\n";
   $_ndx=0;
   foreach $a (@_semester_a)
   {
      $_semester[$_ndx++] = $a->attr('href');
   }

   #quickly strip extraneous chars...
   for ($i=0;$i<$_ndx;$i++)
   {
      $_semester[$i] =~ tr:\.\/::d;
   }
   return(@_semester);
}


#
# get the class information from the given page content
#       return the class_array/class_hash
#
sub getclasses
{
   my ($filename) = @_;
   my @level_array;
   my $tree = HTML::TreeBuilder->new(); # empty tree


  # add a "," to aid in processing...
  #
  #<p class="tbtx"> --> '<p class="tbtx">,'<<<<<
  #
  $_arg = "perl -i -p -e 's:\<p class\=\"tbtx\"\>:\<p class\=\"tbtx\"\>,:g;'
$filename";
  system($_arg);


   $tree->parse_file($filename);

   @tree_test = $tree->look_down("_tag"=>"table", "cellpadding"=>"2",
"bgcolor"=>"#cccccc");

 @tbtx_tree = $tree_test[0]->look_down("_tag"=>"tr");

  #iterate through the list...
  #skip the 1st one.. it's the header
  #get the class name/section information
  foreach $q (@tbtx_tree)
  {
    #print "tree tbtx = ". $q->as_HTML() ."\n";
    # parse the html for hdr/name/section...
    if ($q->as_HTML =~ /class\=\"tbh\"/)
    {
      # do nothing.. go ahead and skip
    }
    elsif($q->as_HTML() =~ /class\=\"em\"/)
    {
     $q1 = $q->as_HTML();
     $q1 =~ /em\"\>([^\<]+)\<[^\>]+\>([^\<]+)/;
     $classnumber = $1;
     $classname = $2;
     $classnumber =~ s/&nbsp;//;
    }
    elsif($q->as_HTML() =~ /\<a href\=\"clavcdet/)
    {
     $q1 = $q->as_text;
  print "number = $classnumber   name = $classname\n";
  print "q = $q1\n";
    }

  }

}



# }

# get the levels (under/grad/etc...)
#       return the level_array
#
sub getlevels
{
   my ($filename) = @_;
   my @level_array;
   my $tree = HTML::TreeBuilder->new(); # empty tree
   $tree->parse_file($filename);

   @tree_test = $tree->look_down("_tag"=>"input", "type"=>"radio",
"name"=>"s_course_level");

   my $_ndx=0;
   foreach $a (@tree_test)
   {
      $level_array[$_ndx++]=$a->attr('value');
   }
  return(@level_array);
}

#
# get the depts (/etc...)
#       return the depts_array
#
sub getdepts
{
   my ($filename) = @_;
   my @dept_array;
   my $tree = HTML::TreeBuilder->new(); # empty tree

  # prepare file for use
  #
  # we insert a "," to aid in data extraction
  #
  #perl -i -p -e 's///g;' out.html;
  #<p class="tbtx">  ==> '<p class="tbtx">,'
  $_arg = "perl -i -p -e 's/\<p class\=\"tbtx\"\>/\<p class\=\"tbtx\"\>,/g;'
". $filename;
  system($_arg);

   $tree->parse_file($filename);

   @tree_test = $tree->look_down("_tag"=>"select",
"name"=>"s_dept_abbr_select", "size"=>"1");

   @option_tree = $tree_test[0]->look_down("_tag"=>"option");

   my $_ndx=0;
   foreach $a (@option_tree)
   {
      $dept_array[$_ndx]=$a->attr('value');
      $dept_array[$_ndx]=~ s: :%20:g;   # sub/replace for " "s...

print "-----> $dept_array[$_ndx] \n";
      $_ndx++;
   }
  return(@dept_array);
}



=========================================================================
=========================================================================
=========================================================================
=========================================================================


this is a cookie app......
#############################################################
#
# auburn.pl. test perl app to extract/parse class
#       schedule from the auburn.edu
#       registrar site...
#
#GET /ia-bin/tsrvweb.exe?
#
#
# todo.. need to create the getinfo routine to parse the
#        class information, and write it out
#
#############################################################
#!/usr/bin/perl -w

use strict;

use HTML::TreeBuilder;
use LWP::UserAgent;
use URI::Escape;
use WWW::Mechanize;
use LWP::Simple;
use URI::URL;
use HTTP::Cookies;



#
# get the top level university class site
#
  my $registrar_url = "https://tideguide.ua.edu/";;
  my $host_url = "https://oasis.auburn.edu";;
  my $base_url = "https://oasis.auburn.edu/ia-bin/tsrvweb.exe?";;
  my $semester_query =
"tserve_trans_config=rclsterm-l.cfg&tserve_host_code=HostZero&tserve_tiphost
_code=TipZero";

  my $dept_url1 =
"tserve_host_code=HostZero&tserve_tiphost_code=TipZero&WID=W&ReqNum=1&tserve
_tip_write=%7C%7CWID%7CTerm%7CReqNum&tserve_trans_config=rclsavl1-l.cfg&Term
";


   # go ahead and get the selection page within the frame..
   # use www::machanize
   my $agent = WWW::Mechanize->new();
   $agent->cookie_jar({});

   $agent->get($base_url.$semester_query);


#
# get term
# get subject/dept
# get course
# get class/sections...
#
#
#


#
# iterate through the terms/depts/classes
   my ($semester_val, $semester_name, $semester);
   my ($dept_val, $dept_name, $dept);
   my ($course_val, $course_name, $course);
   my $content;

   # semester
   my ($s, $f) = &getSemesterAttribs($base_url.$semester_query);
   my (@semesters) = @$s;
   my (@FormAttribs) = @$f;

   foreach $semester (@semesters)
   {
      $semester_name = $semester->{'name'};
      $semester_val = $semester->{'val'};

      my $url = $base_url.$semester_query;
#      my $dept_url = &getSemesterForm(\$semester_val, [EMAIL PROTECTED],
\$url);

      #use dept list
      #my ($d, $f1) = &getDeptAttribs($base_url.$dept_url);
      my ($d, $f1) =
&getDeptAttribs($base_url.$dept_url1."=".$semester_val);
      my @depts = @$d;
      my @deptFormAttribs = @$f1;
      foreach $dept (@depts)
      {
         $dept_name = $dept->{'name'};
         $dept_val = $dept->{'val'};

         #my $durl = $base_url.$dept_url;
         my $durl = $base_url.$dept_url1."=".$semester_val;

         my $course_url = &getDeptForm(\$dept_val, [EMAIL PROTECTED],
\$durl);
         #use course list
         my ($d, $f1) = &getCourseAttribs($base_url.$course_url);
         my @courses = @$d;
         my @courseFormAttribs = @$f1;
         foreach $course (@courses)
         {
            $course_name = $course->{'name'};
            $course_val = $course->{'val'};
            my $durl = $base_url.$course_url;

            my $content = &getCourseForm(\$course_val, [EMAIL PROTECTED],
\$durl);

            print "content = $content\n";
            print "semester  $semester_val   dept  $dept_val  course
$course_val\n";

            &getClassInformation($content);
         }
      }
   }
print "we're here...\n";
die;



#################################
#
#
#################################
sub getSemesterAttribs
{
   my ($url) = @_;

   my (@semesterArray, @nameArray);

   $agent->get($url);
   my $q1c = $agent->content;

#print "q $q1c\n";
print "semester url $url\n";

   my $_tree = HTML::TreeBuilder->new_from_content($agent->content);

   #
   # get the form
   #
   my @_form = $_tree->look_down("_tag"=>"Form");

   #
   # get the semester list information
   #
   # get the specific "sel"
   my @_sel = $_form[0]->look_down("_tag"=>"select", "name"=>"Term");

   my @_opt = $_sel[0]->look_down("_tag"=>"option");

   my $a;
   foreach $a (@_opt)
   {
      #get the "a" list
      my (%semester);

      $semester{'val'} = $a->attr('value');
      $semester{'name'} = $a->as_text();
      push (@semesterArray, \%semester);
   }

   #
   # get the hidden name information for the form
   #
   # get the specific "input"
   my @_in = $_form[0]->look_down("_tag"=>"input", "type"=>"hidden");

   my $a;
   foreach $a (@_in)
   {
      #get the "a" list
      my (%name);

      $name{'val'} = $a->attr('value');
      $name{'name'} = $a->attr('name');
      push (@nameArray, \%name);
   }
   return([EMAIL PROTECTED], [EMAIL PROTECTED]);
}


#################################
#
# the auburn semester uses "put" on the form instead of
# "post"... which causes the "mech" object to have
# issues... can't seem to quickly figure out how to
# get around this..so we're going to cheat for now...
#
# come back to this later if we figure out how to use
# "put" as an action method for "mech"...
#
#################################
sub getSemesterForm
{
   my ($t, $f, $u) = @_;

   my ($term, @FormArray, $url) ;

   $term = $$t;
   @FormArray = @$f;
   $url = $$u;

#   $agent->get($url);

   #
   # use the global agent/cookie for the
   # session setup...
   my $agent1 = WWW::Mechanize->new() ;


   $agent1->get($url);
print "url = $url\n";
   my $_form = $agent1->form_number(1);
   if (!$_form) {
       warn "Nothing came back";
       exit;
   }
#   $agent1->request()->headers->method("POST");

   my $tmp;
   foreach $tmp (@FormArray)
   {
      $agent1->field($tmp->{'name'}, $tmp->{'val'});
      print "name = ".$tmp->{'name'}." val = ".$tmp->{'val'}."\n";
   }

   $agent1->field("Term", $term);
print " ...............\n";
   $agent1->submit();

   print "--+++++ ".$agent1->content(). "\n";
my $url2 = $agent1->response()->request()->content();
print "url = ".$url2."\n";
my $q1 = $url2;

   return($url2);
}



#################################
#
#
#################################
sub getDeptAttribs
{
   my ($url) = @_;

   my (@deptArray, @nameArray);

   my $res = $agent->get($url);
#   my $req = new HTTP::Request GET => ($url);
#   my $res = $ua->request($req);

   #my $q1c = $res->content;
   my $q1c = $agent->content;

#print "q $q1c\n";
#print "semester url $url\n";

   my $_tree = HTML::TreeBuilder->new_from_content($agent->content);

   #
   # get the form
   #
   my @_form = $_tree->look_down("_tag"=>"form", "method"=>"post");

   #
   # get the dept list information
   #
   # get the specific "sel"
   my @_sel = $_form[0]->look_down("_tag"=>"select", "name"=>"Subject");
   my @_opt = $_sel[0]->look_down("_tag"=>"option");

   my $a;
   foreach $a (@_opt)
   {
      #get the "a" list
      my (%dept);

      $dept{'val'} = $a->attr('value');
      $dept{'name'} = $a->as_text();
      push (@deptArray, \%dept);
   }

   #
   # get the hidden name information for the form
   #
   # get the specific "input"
   my @_in = $_form[0]->look_down("_tag"=>"input", "type"=>"hidden");

   my $a;
   foreach $a (@_in)
   {
      #get the "a" list
      my (%name);

      $name{'val'} = $a->attr('value');
      $name{'name'} = $a->attr('name');
      push (@nameArray, \%name);
   }
   return([EMAIL PROTECTED], [EMAIL PROTECTED]);
}


#################################
#
#
#################################
sub getDeptForm
{
   my ($s, $f, $u) = @_;

   my ($subject, @FormArray, $url) ;

   $subject = $$s;
   @FormArray = @$f;
   $url = $$u;

#   $req = new HTTP::Request GET => $url;
#   $req->content_type('application/x-www-form-urlencoded');
#   my $res = $ua->request($req);


   # go ahead and get the selection page within the frame..
   # use www::machanize
   #my $agent = WWW::Mechanize->new();

   $agent->get($url);
print "url = $url\n";
   my $_form = $agent->form_number(1);
   if (!$_form) {
       warn "Nothing came back";
       exit;
   }

   my $tmp;
   foreach $tmp (@FormArray)
   {
      $agent->field($tmp->{'name'}, $tmp->{'val'});
   }
   $agent->field("Subject", $subject);

   $agent->submit();

   print "-- ".$agent->content(). "\n";
my $url2 = $agent->response()->request()->content();
print "url = ".$url2."\n";
my $q1 = $url2;

   return($url2);
}


#################################
#
#
#################################
sub getCourseAttribs
{
   my ($url) = @_;

   my (@courseArray, @nameArray);

   $agent->get($url);
   #my $req = new HTTP::Request GET => ($url);
   #my $res = $ua->request($req);

   #my $q1c = $res->content;

#print "q $q1c\n";
print "semester url $url\n";

   my $_tree = HTML::TreeBuilder->new_from_content($agent->content);

   #
   # get the form
   #
   my @_form = $_tree->look_down("_tag"=>"form", "method"=>"post");

   #
   # get the dept list information
   #
   # get the specific "sel"
   my @_sel = $_form[0]->look_down("_tag"=>"select", "name"=>"CourseID");
   my @_opt = $_sel[0]->look_down("_tag"=>"option");

   my $a;
   foreach $a (@_opt)
   {
      #get the "a" list
      my (%course);

      $course{'val'} = $a->attr('value');
      $course{'name'} = $a->as_text();
      push (@courseArray, \%course);
   }

   #
   # get the hidden name information for the form
   #
   # get the specific "input"
   my @_in = $_form[0]->look_down("_tag"=>"input", "type"=>"hidden");

   my $a;
   foreach $a (@_in)
   {
      #get the "a" list
      my (%name);

      $name{'val'} = $a->attr('value');
      $name{'name'} = $a->attr('name');
      push (@nameArray, \%name);
   }
   return([EMAIL PROTECTED], [EMAIL PROTECTED]);
}


#################################
#
#
#################################
sub getCourseForm
{
   my ($c, $f, $u) = @_;

   my ($course, @FormArray, $url) ;

   $course = $$c;
   @FormArray = @$f;
   $url = $$u;

   #$agent->get($url);
   #$req = new HTTP::Request GET => $url;
   #$req->content_type('application/x-www-form-urlencoded');
   #my $res = $ua->request($req);


   # go ahead and get the selection page within the frame..
   # use www::machanize
   #my $agent = WWW::Mechanize->new();

   my $res = $agent->get($url);
print "url = $url\n";
   my $_form = $agent->form_number(1);
   if (!$_form) {
       warn "Nothing came back";
       exit;
   }

   my $tmp;
   foreach $tmp (@FormArray)
   {
      $agent->field($tmp->{'name'}, $tmp->{'val'});
   }
   $agent->field("CourseID", $course);

   $agent->submit();

   print "-- ".$agent->content(). "\n";
my $url2 = $agent->response()->request()->content();
print "url = ".$url2."\n";
my $q1 = $url2;

   return($agent->content());
}





#######################
#
#
#######################
sub getClassInformation
{
   my (@classlistArray);

   my ($content) = @_;

   my $_tree = HTML::TreeBuilder->new_from_content($content);

   # need to create the page parsing/information gathering...
   # functions...

   #
   # get each class via the "table"
   #
   my @_tbl = $_tree->look_down("_tag"=>"table", "class"=>"display");

   # return if we don't have any class information
   if (!$_tbl[0])
   {
     return();
   }

   my @_tr = $_tbl[0]->look_down("_tag"=>"tr");

   my $table;
   my ($section, $session, $course_title, $prof, $date, $time, $course) ;
   my $_tmp;
   my $i;

#print "tbl dump = ".$_tbl[0]->dump()."\n";
#print "tbl cnt = "[EMAIL PROTECTED]"\n";

   # skip the 1st/2nd tr...
   for ($i=2;$i<[EMAIL PROTECTED];$i++)
   {
      my @_td = $_tr[$i]->look_down("_tag"=>"td");

#print "tr dump = ".$_tr[$i]->dump()."\n";
#print "tr cnt = "[EMAIL PROTECTED]"\n";

      if($_td[0])
      {
         $section = $_td[0]->as_text();
      }

      if($_td[1])
      {
         $session = $_td[1]->as_text();
      }

      if($_td[2])
      {
         $course_title = $_td[2]->as_text();
      }

      if($_td[7])
      {
            $date = $_td[7]->as_text();
      }

      if($_td[8])
      {
            $time = $_td[8]->as_text();
      }

      if($_td[11])
      {
            $prof = $_td[11]->as_text();
      }

    print "section = $section \n  title = $course_title\n";
  }

}


#
# extracts the content from the class/section page for
# the given class
#
# basic flow...
#   @td = list of ->
#        <<TD height="30" rowspan="2" class="text8" valign="top"
align="left">
#
#   foreach $a (td)
#
#    @td_ = $a->look(td)
#
#    iterate through each td_ array and get the items...
#
#
sub extractContent
{
   my ($res) = @_;
   my ($a);
print "inside ex...\n";

print "res = $res\n";
#die;

   my $_tree = HTML::TreeBuilder->new_from_content($res);

   my @_td = $_tree->look_down("_tag"=>"td", "height"=>"30",
"class"=>"text8", "align"=>"left");

   print "td count = "[EMAIL PROTECTED]"\n";

   my $b;
   my $name;

   for $a (@_td)
   {
      print "course name ".$a->as_text()."\n";  #class/course name

      $b = $a->right();
      print "section ".$b->as_text()."\n";  #section

      $b = $b->right();
      print "course # ".$b->as_text()."\n";  #course #

      $b = $b->right();
      print "name  ".$b->as_text()."\n";  #class name

      $b = $b->right();
      print "prof ".$b->as_text()."\n";  #prof

      $b = $b->right(); #seats
      $b = $b->right(); #free
      $b = $b->right(); #bldg
      $b = $b->right(); #room

      $b = $b->right(); #date
      print "date ".$b->as_text()."\n";  #date

      $b = $b->right(); #start
      print "starttime ".$b->as_text()."\n";  #starttime
#die;
   }
#die;
}


good luck... hope this helps!!!!





-----Original Message-----
From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED]
Sent: Monday, August 23, 2004 1:49 AM
To: [EMAIL PROTECTED]
Subject: LWP::UserAgent and cookie_jar


I am trying to use LWP::UserAgent and cookie_jar, obviously according to the
subject of this email.
Anyhow, I cannot get my code to work, can someone look at it and tell me if
I am just using it wrong?

I'll show my code and the response I get...

I would really appreciate any help someone can offer.

Code: (Of course I changed all real domains and user/membership info...)

my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/8.0"); # pretend we are very capable browser
$ua->requests_redirectable;
$ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt",
autosave => 1));

my $req = HTTP::Request->new(POST =>
'http://www1.somedomain.com/weborder/xt_shopper_lookup.asp');
$req->content_type('application/x-www-form-urlencoded');
$req->content('shopper_memno=12345124&shopper_password=somepass&submit1=Cont
inue');
$req->referer('http://www1.somedomain.com/weborder/xt_shopper_lookup.asp');

# Pass request to the user agent and get a response back
my $res = $ua->request($req);

# Check the outcome of the response
if ($res->is_success) {
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/8.0"); # pretend we are very capable browser
$ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt",
autosave => 1));
$req = HTTP::Request->new(GET =>
"http://ftp.thedomainsftpsite.com/default.asp?memno=12345124";);
$req->referer('http://www1.somedomain.com/weborder/xt_shopper_lookup.asp');

# Pass request to the user agent and get a response back
my $res = $ua->request($req);

if($res->is_success) { # Check to see if I can access the FTP site...
    # Yep, it appears to have let me login... This is where I'm getting the
error that, "Login Required"...
    # When I go to the website and login manually, I can just click on the
FTP site to go download files from there, like txt descriptions and stuff,
one at a time.
    # So this SHOULD work with the cookie jar...
    $sth = $dbh->prepare (qq{ SELECT * FROM `my_items` ORDER BY `id` });
    $sth->execute();
    my $_cntr = 0;
    while($rs = $sth->fetchrow_hashref()) {
        $_pid = $rs->{pid};
        $file_name = "/home/userid/files/Descriptions/" . $_pid . ".txt";
        if(!-e "/$file_name/i") {# Ok it does not already exist, so go get
it...
            $_sub_folder = Return_Sub_Folder($_pid); # Get the folder it
would be in, based upon the ID number of it...
            my $ua = LWP::UserAgent->new;
            $ua->agent("Mozilla/8.0"); # pretend we are very capable browser
            $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt",
                                                autosave => 1));
            $req = HTTP::Request->new(GET =>
"http://ftp.thedomainsftpsite.com/items/descriptions/$_sub_folder/$_pid.txt";
);
                # $req->header('Accept' => 'text/plain'); # Don't want to
use this right now...

$req->referer("http://ftp.thedomainsftpsite.com/items/descriptions/$_sub_fol
der/");
                # send request
            $res = $ua->request($req);
            if ($res->is_success) { # Check if it can get the .txt file
description to save it to my account for later use...
                    # COOL IT WORKED!!!
                $ctypeb = $res->content;
                $_output_filename = "/home/userid/files/Descriptions/" .
$_pid . ".txt";
                open (OUTFILE,">$_output_filename");
                    print OUTFILE $ctypeb; # SAVE IT!
                close(OUTFILE);
            } else {
                    #Nope still did not work. Maybe it's .TXT instead of
.txt lets try again with the .TXT extension
                my $ua = LWP::UserAgent->new;
                $ua->agent("Mozilla/8.0"); # pretend we are very capable
browser
                $ua->cookie_jar(HTTP::Cookies->new(file => "lwpcookies.txt",
                                                       autosave => 1));
                $req = HTTP::Request->new(GET =>
"http://ftp.thedomainsftpsite.com/items/descriptions/$_sub_folder/$_pid.TXT";
);
                    # $req->header('Accept' => 'text/plain'); # Don't want
to use this right now...

$req->referer("http://ftp.thedomainsftpsite.com/items/descriptions/$_sub_fol
der/");
                    # send request
                $res1 = $ua->request($req);
                if ($res1->is_success) {
                    $ctypeb = $res1->content;
                    $_output_filename = "/home/userid/files/Descriptions/" .
$_pid . ".txt";
                    open (OUTFILE,">$_output_filename");
                        print OUTFILE $ctypeb;
                    close(OUTFILE);
                } else {# Nope still did not work, please send me the error
via email:
                    $_mail_message .= qq~I could not find $_pid.txt on
ourmemebersite.com so I could not add it to the directory.\n\nRun number 3:
Content says:\n$res1->content\n\nStatus Line says:\n$res->status_line\n\n~;
                }
            }
        }
....


Response:
bash$ perl text_desc_dl.cgi
Content-type: text/html/

Login Failed....


Thank you.
Richard

Reply via email to