hi!

failing to find a yosucker equivalent for gmail led me to creating this
small program. hope you find it useful.

#!/usr/bin/perl -w
# Fetch gmail mails and store it into an mbox.
# by Sherwin Daganato <[EMAIL PROTECTED]>, 20040903
#
# This program won't be possible without tcpdump and stealing some ideas
# from Mail::Webmail::Gmail, WWW::GMail, and libgmail. :-)

use strict;

# --- BEGIN config

my $USER       = 'your_user';
my $PASS       = 'your_password';
my $MBOX       = '/home/foo/Mail/IN.gmail';
my $REPOSITORY = '/home/foo/.fetchgmail/your_user.repository';
my $VERBOSE    = 1;

# --- END config

# use Crypt::SSLeay's internal proxy support which sends
# CONNECT request as expected by proxy servers
$ENV{HTTPS_PROXY} = 'http://your_proxy:your_port';

$|++;                                 # autoflush buffer

use LWP::UserAgent;
use HTTP::Cookies;
use HTTP::Request::Common qw(POST GET);
use Mail::Header;
use Date::Parse qw(str2time);
use Data::Dumper;


print "Loading repository\n" if $VERBOSE;

# mimic *do* with file locking
our $repository;
open(REPO, "+>>$REPOSITORY") or die "Can't open $REPOSITORY: $!";
flock(REPO, 2) or die "Can't flock $REPOSITORY: $!";
seek(REPO, 0, 0);                     # beginning of file
my $temp;
{ local $/; $temp = <REPO> }
eval $temp if $temp;

my $UA = LWP::UserAgent->new;
$UA->agent('Mozilla/5.0 (compatible;)');
my $cookie_jar = HTTP::Cookies->new;
$UA->cookie_jar($cookie_jar);


print "Logging in\n" if $VERBOSE;

my $req = POST 'https://www.google.com/accounts/ServiceLoginBoxAuth',
  [ continue => 'https://gmail.google.com/gmail', service => 'mail',
    Email => $USER, Passwd => $PASS, null => 'Sign in' ];
my $res = $UA->request($req);
die "Can't login (status ", $res->code, " ", $res->message, ")\n"
  unless ($res->is_success);

if ($res->content =~ /top.location = "(.*?)";/) {
  $req = GET "https://www.google.com/accounts/$1";;
  $res = $UA->request($req);
  die "Can't login (status ", $res->code, " ", $res->message, ")\n"
    unless ($res->is_success);
} else {
  die "Can't login (gmail interface has changed!)\n"
}


print "Retrieving thread list\n" if $VERBOSE;

$req = GET 'https://gmail.google.com/gmail?search=inbox&start=0&view=tl&init=1';
$res = $UA->request($req);
die "Can't view inbox (status ", $res->code, " ", $res->message, ")\n"
    unless ($res->is_success);
my $inbox = $res->content;
my $href = parse_page($inbox) or 
  die "Can't parse inbox (gmail interface has changed!)\n";


print "Saving messages in each threads into mbox\n" if $VERBOSE;

open(MBOX, ">>$MBOX") or die "Can't open $MBOX: $!";
flock(MBOX, 2) or die "Can't flock $MBOX: $!";

foreach my $t (@{ $href->{t} }) {
  foreach my $t_elt (@{ $t }) {

    # get the number of message in each thread
    my ($thnum) = $t_elt->[4] =~ /\((\d+)\)$/;
    $thnum ||= 1;

    # skip if that number didn't change
    next if exists $repository->{$t_elt->[0]} and $repository->{$t_elt->[0]} == $thnum;

    $req = GET "https://gmail.google.com/gmail?search=inbox&view=cv&th=$t_elt->[0]";
    $res = $UA->request($req);
    die "Can't view thread for $t_elt->[0] (status ", $res->code, " ", $res->message, 
")\n"
      unless ($res->is_success);
    my $thread = $res->content;
    my $href = parse_page($thread) or 
      die "Can't parse thread for $t_elt->[0] (gmail interface has changed!)\n";

    # iterate through all messages in each thread
    foreach my $mi (@{ $href->{mi} }) {
      next if exists $repository->{$mi->[2]};
      print '.' if $VERBOSE;
      if (my $msg = get_raw_message($mi->[2])) {
        print MBOX $msg;
        print "\b#" if $VERBOSE;
      }
      $repository->{$mi->[2]} = 1 if $mi->[2] ne $t_elt->[0];
    }
    $repository->{$t_elt->[0]} = $thnum;
  }
}
close MBOX;


print "\nSaving repository\n" if $VERBOSE;

seek(REPO, 0, 0);
truncate(REPO, 0);
print REPO Data::Dumper->Dump([ $repository ], [ 'repository' ]);
close REPO;


print "DONE\n" if $VERBOSE;

exit;


sub get_raw_message {
  my $msgid = shift;

  my $req = GET "https://gmail.google.com/gmail?view=om&th=$msgid";;
  my $res = $UA->request($req);
  unless ($res->is_success) {
    warn "Can't get raw message for $msgid (status ", $res->code, " ", $res->message, 
")\n";
    return;
  }
  my $raw_msg = $res->content;
  $raw_msg =~ s/^\s+//;               # remove leading spaces in header
  $raw_msg =~ s/\015?\012/\n/g;       # won't this affect attachment?

  # create a postmark line
  my $head = Mail::Header->new(Modify => 0);
  $head->extract([ split(/\n/, (split(/\n\n/, $raw_msg, 2))[0]) ]);
  my $from = $head->get('return-path') || $head->get('from') || '';
  my $stamp = str2time($head->get('date')) || time;
  my $sender = $from =~ /<?([EMAIL PROTECTED]>]+)>?/ ? $1 : 'unknown';

  # TODO - consider escaping "From " in body

  "From $sender " . (localtime $stamp) . "\n$raw_msg\n";
}

sub parse_page {
  my $page = shift;
  my %hash;
  while ($page =~ /D\((.*?)\);/sg) {
    (my $s = $1) =~ s/(\$|\@)/\\$1/g; # avoid substitution of scalars and lists
    $s = eval $s;
    if (defined $s && ref $s eq 'ARRAY') {
      push @{ $hash{ shift @{ $s } } }, $s;
    }
  }
  \%hash;
}

--
Philippine Linux Users' Group (PLUG) Mailing List
[EMAIL PROTECTED] (#PLUG @ irc.free.net.ph)
Official Website: http://plug.linux.org.ph
Searchable Archives: http://marc.free.net.ph
.
To leave, go to http://lists.q-linux.com/mailman/listinfo/plug
.
Are you a Linux newbie? To join the newbie list, go to
http://lists.q-linux.com/mailman/listinfo/ph-linux-newbie

Reply via email to