Can someone see anything wrong with my code? Sometimes it failed short
when the list of urls to check is long. Is ithread stable ? Here is
the output of perl -V :
Summary of my perl5 (revision 5.0 version 8 subversion 3) configuration:
Platform:
osname=linux, osvers=2.4.21-4.elsmp, archname=i386-linux-thread-multi
uname='linux tweety.devel.redhat.com 2.4.21-4.elsmp #1 smp fri oct
3 17:52:56 edt 2003 i686 i686 i386 gnulinux '
config_args='-des -Doptimize=-O2 -g -pipe -march=i386 -mcpu=i686
-Dversion=5.8.3 -Dmyhostname=localhost [EMAIL PROTECTED]
-Dcc=gcc -Dcf_by=Red Hat, Inc. -Dinstallprefix=/usr -Dprefix=/usr
-Darchname=i386-linux -Dvendorprefix=/usr -Dsiteprefix=/usr -Duseshrplib
-Dusethreads -Duseithreads -Duselargefiles -Dd_dosuid -Dd_semctl_semun
-Di_db -Ui_ndbm -Di_gdbm -Di_shadow -Di_syslog -Dman3ext=3pm -Duseperlio
-Dinstallusrbinperl -Ubincompat5005 -Uversiononly -Dpager=/usr/bin/less
-isr -Dinc_version_list=5.8.2 5.8.1 5.8.0'
hint=recommended, useposix=true, d_sigaction=define
usethreads=define use5005threads=undef useithreads=define
usemultiplicity=define
useperlio=define d_sfio=undef uselargefiles=define usesocks=undef
use64bitint=undef use64bitall=undef uselongdouble=undef
usemymalloc=n, bincompat5005=undef
Compiler:
cc='gcc', ccflags ='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS
-DDEBUGGING -fno-strict-aliasing -I/usr/local/include
-D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -I/usr/include/gdbm',
optimize='-O2 -g -pipe -march=i386 -mcpu=i686',
cppflags='-D_REENTRANT -D_GNU_SOURCE -DTHREADS_HAVE_PIDS
-DDEBUGGING -fno-strict-aliasing -I/usr/local/include -I/usr/include/gdbm'
ccversion='', gccversion='3.3.3 20040412 (Red Hat Linux 3.3.3-7)',
gccosandvers=''
intsize=4, longsize=4, ptrsize=4, doublesize=8, byteorder=1234
d_longlong=define, longlongsize=8, d_longdbl=define, longdblsize=12
ivtype='long', ivsize=4, nvtype='double', nvsize=8, Off_t='off_t',
lseeksize=8
alignbytes=4, prototype=define
Linker and Libraries:
ld='gcc', ldflags =' -L/usr/local/lib'
libpth=/usr/local/lib /lib /usr/lib
libs=-lnsl -lgdbm -ldb -ldl -lm -lcrypt -lutil -lpthread -lc
perllibs=-lnsl -ldl -lm -lcrypt -lutil -lpthread -lc
libc=/lib/libc-2.3.3.so, so=so, useshrplib=true, libperl=libperl.so
gnulibc_version='2.3.3'
Dynamic Linking:
dlsrc=dl_dlopen.xs, dlext=so, d_dlsymun=undef, ccdlflags='-rdynamic
-Wl,-rpath,/usr/lib/perl5/5.8.3/i386-linux-thread-multi/CORE'
cccdlflags='-fPIC', lddlflags='-shared -L/usr/local/lib'
Characteristics of this binary (from libperl):
Compile-time options: DEBUGGING MULTIPLICITY USE_ITHREADS
USE_LARGE_FILES PERL_IMPLICIT_CONTEXT
Built under linux
Compiled at Apr 15 2004 13:09:17
%ENV:
PERL5LIB="/home/khai/code/perllib"
@INC:
and here is the code:
#!/usr/bin/perl
use strict;
use threads;
use Data::Dumper;
use HTML::Parser;
use LWP::UserAgent;
use HTML::Tagset;
use Compress::Zlib;
use Getopt::Long;
use MIME::Lite ();
use Time::HiRes qw(gettimeofday tv_interval nanosleep usleep);
use lib "/code/perllib";
use MG::Utils;
use LWP::ConnCache();
my $seed = 0;
my $releaseTag = "";
my $originalOnly = 0;
my $port = "";
my $email = "";
my $level = "";
GetOptions('seed' => \$seed, 'releaseTag=s' => \$releaseTag,
'originalOnly' => \$originalOnly,'port=s' => \$port, 'email=s' => \$email);
my $self = MG::Utils->new();
$self->{"for"} = "rew";
$self->{"environment"} = "development";
my $userAgent = "Mozilla/4.0 (compatible, MSIE 6.0; Windows NT 5.1; SV1;
Maxthon; .NET CLR 1.";
my $accept = "image/gif, image/x-bitmap, image/jpeg, image/pjpeg,
application/vnd.ms-excel, application/vnd.ms-powerpoint,
application/msword, application/x-shockwave-flash, */*";
my $acceptLanguage = "en-us";
my $acceptEncoding = "gzip, deflate";
my $demoflag = "";
my $link_attr = {};
while (my($k,$v) = each (%HTML::Tagset::linkElements)) {
$v = { map {$_ => 1} @{$v} };
$link_attr->{$k} = $v;
}
my $urls = [
'http://www.webex.com/',
'http://www.webex.com/fileadmin/includes/searchtrack.js',
'http://www.webex.com/media/scripts/jsfunc.layermenu.js',
'http://www.webex.com/fileadmin/includes/webex_home.css',
'http://www.webex.com/fileadmin/includes/tabs.js',
'http://www.webex.com/overview/online-meetings.html',
'http://www.webex.com/overview/technology.html',
'http://www.webex.com/overview/technology-white-papers.html',
'http://www.webex.com/solutions/online-events.html',
'http://www.webex.com/solutions/e-learning-svc.html',
'http://www.webex.com/resources/quick-tours.html',
'http://www.webex.com/resources/trials-redirect.html',
'http://www.webex.com/support/training-elearning-overview.html',
'http://www.webex.com/support/client-overview.html',
'http://www.webex.com/webex/careers.html',
'http://www.webex.com/webex/partner-overview.html',
'http://www.webex.com/webex/contact.html',
'http://www.webex.com/solutions/conferencing-overview.html',
'http://www.webex.com/support/support-services.html',
'http://www.webex.com/solutions/small_business.html',
'http://www.webex.com/solutions/system-management.html',
'http://www.webex.com/pr/pr378.html',
'http://www.webex.com/web-seminars/?sid=webx1',
'http://www.webex.com/web-seminars/view_event/661889526?sid=webx1',
'http://www.webex.com/web-seminars?sid=webx1',
'http://www.webex.com/pr/pr287.html',
'http://www.webex.com/webex/awards.html',
'http://www.webex.com/pr/pr381.html',
'http://www.webex.com/pr/pr374.html',
'http://www.webex.com/go/demo',
'http://www.webex.com/go/choosetrial',
'http://www.webex.com/go/buy',
'http://support.webex.com/support/support-services.html',
'http://www.webex.com/webexhome.html',
'http://www.webex.com/sitemap.html',
'http://www.webex.com/webex_privacy.html',
'http://www.webex.com/webex_terms.html',
'http://www.webex.com/webex_copyright.html',
'http://support.webex.com/search/default.html?country=United+States',
'http://www.webex.com/overview/web-conferencing.html',
'http://www.webex.com/solutions/audio-conference.html',
'http://www.webex.com/overview/video-conferencing.html',
'http://www.webex.com/go/webtrust',
'http://www.webex.com/javascript/cookieCode.js',
'http://www.webex.com/fileadmin/includes/hbx.js',
'http://www.webex.com/fileadmin/includes/webex_styles.css',
];
my @workHorse = ();
my $numThreads = 30;
my $eachThread = int(scalar(@{$urls}) / $numThreads);
my $workLoad = [];
my $i = 0;
while (scalar(@{$urls})) {
my $url = shift @{$urls};
push @{$workLoad},$url;
if (scalar(@{$workLoad}) == $eachThread) {
$workHorse[$i] = threads->new(\&worker,$workLoad);
$workLoad = [];
$i++;
}
}
$workHorse[$i] = threads->new(\&worker,$workLoad);
foreach my $thr (@workHorse) {
$thr->join();
}
sub worker {
my $urls = shift;
my $environment = $self->getEnvironment();
my $h = HTTP::Headers->new();
$h->header("User-Agent", $userAgent);
$h->header("Accept-Encoding", $acceptEncoding);
$h->header("Accept-Language", $acceptLanguage);
my $customerID = "";
my $activityID = "";
my $contactID = "";
if (-e "/code/conf/environment") {
$customerID = 56;
$activityID = 2731;
$contactID = 18289;
} else {
}
my $repeat = 1;
my %toRun = ();
my $ua = LWP::UserAgent->new( timeout => 45);
$ua->cookie_jar({});
$ua->parse_head(0);
$ua->max_redirect(3);
$ua->agent($userAgent);
$ua->conn_cache(LWP::ConnCache->new( total_capacity => 300));
my $all = scalar(@{$urls});
my $i = -1;
while(scalar(@{$urls})) {
$i++;
my $url = shift @{$urls};
my $rwURL = "";
if ($seed) {
$repeat = 1;
my $host = $self->makeOurHostName({fromURL => "$url",
conformTo => $self->{conformTo} });
my $uri = URI->new($url);
$uri->host($host);
$uri->port($port) if ($port ne "");
$rwURL = "$uri";
if ($rwURL =~ /\?/) {
$rwURL .= "&mgseed=1&mgdemo=1";
} else {
$rwURL .= "?mgseed=1&mgdemo=1";
}
&fetch($ua,$h,$rwURL);
} else {
$repeat = 1; # once is good enough since we have a lot of
urls, and we are not fetching from the origin server
my $host = $self->makeOurHostName({fromURL => "$url",
conformTo => $self->{conformTo} });
my $uri = URI->new($url);
$uri->host($host);
$uri->port($port) if ($port ne "");
foreach $demoflag (1) { # 1,3,4,5
$rwURL = "$uri";
my $ch = "";
if ($rwURL =~ /\?/) { $ch = "&"; }
else { $ch = "?"; }
$rwURL .= $ch .
"mgdemo=$demoflag&mgcustid=$customerID&mgpid=$activityID&mgcid=$contactID&mgbench=1&mgReleaseTag=$releaseTag";
&fetch($ua,$h,$rwURL);
}
}
}
}
sub fetch {
my ($ua,$headers,$url) = @_;
print "URL: $url\n";
my $request = HTTP::Request->new("GET",$url,$headers,"");
my $response = $ua->request($request);
}
if ($seed) { exit(0); }
Is there any known issues with just including a non-thread-safe module
(but not using it)? Is the regex engine thread safe now?
Thanks
Khai