On Thursday 25 May 2000, at 10 h 48, the keyboard of Stephane Bortzmeyer
<[EMAIL PROTECTED]> wrote:
> > 4) How can I create a "top list" of searched words?
>
> By running a program on the log file.
Here is my modest contribution.
#!/usr/bin/perl -w
use English;
use strict;
my ($logfile) = shift (@ARGV);
my ($result, %results, $results, $page, $words, $request,
@words, $word, %numbers);
my ($characters) = '\w�����������';
if (! $logfile) {
die "Usage: $0 logfile";
}
open (LOG, "< $logfile") or
die "Cannot read $logfile: $OS_ERROR";
while (<LOG>) {
chomp;
$result = m/^([a-z]{3}\ \d+\ \d+:\d+:\d+)\ # Date
([a-z0-9]+)\ # Machine name
(htsearch\[\d+\]:)\ # Program name and PID
([a-z0-9\-\.]+)\ # Client name or address
(\[[a-z\-]+\])\ # Configuration file
\((and|or|boolean)\)\ # Operator
\[([$characters\'\"\-\?\!\&,;\+\* ]+)\]\ # Words
\[([$characters\'\"\-\?\!\&,;\+\*\(\) ]+)\]\ # Logical words
\(((\d+)\/\d+)\)\ # Results
\-\ # Separator
(\d+)\ # Page number
/xi;
if (! $result) {
warn "Cannot parse \"$_\"";
}
else {
$page = $11;
$words = $7;
$results = $10;
if ($page == 1) { # Display resultst only for the first page
$results{$words} = $results;
@words = split ('\s|,', $words);
foreach $word (@words) {
if ((! $word) or ($word =~ /^(and|et|or|ou|de|le)$/i)) {
next;
}
$numbers{$word}++;
}
}
}
}
close (LOG);
print "\n-- NUMBER OF REQUESTS PER WORD --\n";
foreach $word (reverse sort by_numbers keys %numbers) {
print "$word: $numbers{$word}\n";
}
print "\n-- NUMBER OF RESULTS PER QUESTION --\n";
foreach $request (reverse sort by_results keys %results) {
print "$request: $results{$request}\n";
}
sub by_results {
$results{$a} <=> $results{$b};
}
sub by_numbers {
$numbers{$a} <=> $numbers{$b};
}
------------------------------------
To unsubscribe from the htdig mailing list, send a message to
[EMAIL PROTECTED]
You will receive a message to confirm this.