Update of /cvsroot/fink/pdb
In directory sc8-pr-cvs17.sourceforge.net:/tmp/cvs-serv17398
Modified Files:
create-finkdb.pl
Log Message:
move to lwp for faster posting; also more robust obsolete entry cleanup
Index: create-finkdb.pl
===================================================================
RCS file: /cvsroot/fink/pdb/create-finkdb.pl,v
retrieving revision 1.11
retrieving revision 1.12
diff -u -d -r1.11 -r1.12
--- create-finkdb.pl 28 Nov 2007 15:19:21 -0000 1.11
+++ create-finkdb.pl 30 Nov 2007 16:00:45 -0000 1.12
@@ -58,7 +58,11 @@
use Data::Dumper;
use IO::File;
use XML::Writer;
-use XML::Simple;
+use LWP::UserAgent;
+use HTTP::Request::Common qw(POST);
+use URI;
+use URI::QueryParam;
+use XML::DOM;
use Encode;
use Text::Iconv;
@@ -77,17 +81,21 @@
$iconv
$releases
+ $solr_url
$disable_cvs
$disable_indexing
$disable_solr
$disable_delete
+
+ $ua
);
$csv = Text::CSV_XS->new({ binary => 1 });
$debug = 0;
$trace = 0;
$iconv = Text::Iconv->new("UTF-8", "UTF-8");
+$solr_url = 'http://localhost:8983/solr';
$tempdir = $topdir . '/work';
$xmldir = $tempdir . '/xml';
@@ -96,6 +104,8 @@
$disable_solr = 0;
$disable_delete = 0;
+$ua = LWP::UserAgent->new();
+
# process command-line
GetOptions(
'help' => \$wanthelp,
@@ -104,6 +114,8 @@
'verbose' => \$debug,
'trace' => \$trace,
+ 'url', => \$solr_url,
+
'disable-cvs' => \$disable_cvs,
'disable-indexing' => \$disable_indexing,
'disable-solr' => \$disable_solr,
@@ -179,7 +191,7 @@
unless ($disable_delete)
{
print "- removing obsolete $release files\n";
- remove_obsolete_xml_files($releases->{$release});
+ remove_obsolete_entries($releases->{$release});
}
}
@@ -436,7 +448,7 @@
);
}
-sub remove_obsolete_xml_files
+sub remove_obsolete_entries
{
my $release = shift;
my $release_id = $release->{'id'};
@@ -458,14 +470,13 @@
return unless (defined($doc_id) and
defined($infofile));
- my $infofile = $basepath . '/fink/dists/' .
$infofile;
- print "infofile = $infofile\n" if ($trace);
- if (defined $infofile and -f $infofile)
+ my $infofilename = $basepath . '/fink/dists/' .
$infofile;
+ if (-f $infofilename)
{
print " - package $name is still valid
($infofile)\n" if ($trace);
} else {
- # print "- removing obsolete package
$name\n" if ($debug);
- post_to_solr('<delete><query>+doc_id:'
. $doc_id . '</query></delete>');
+ print " - package $name is obsolete
($infofile)\n" if ($debug);
+ post_to_solr('<delete><query>+doc_id:"'
. $doc_id . '"</query></delete>');
unlink($file);
}
},
@@ -473,6 +484,21 @@
},
$xmlpath,
);
+
+ # second pass; in theory this should never be an issue, but it's
possible
+ # to have stale stuff in the index if it gets out-of-sync
+ my $packages = get_packages_from_solr( '+rel_id:' . $release_id,
'doc_id,pkg_id,infofile,name' );
+ for my $package (@{$packages})
+ {
+ my $infofilename = $basepath . '/fink/dists/' .
$package->{'infofile'};
+ if (-f $infofilename)
+ {
+ print " - package ", $package->{'name'}, " is still
valid (", $package->{'infofile'}, ")\n" if ($trace);
+ } else {
+ print " - package ", $package->{'name'}, " is obsolete
(", $package->{'infofile'}, ")\n" if ($debug);
+ post_to_solr('<delete><query>+doc_id:"' .
$package->{'doc_id'} . '"</query></delete>');
+ }
+ }
}
# get the name of a CVS tag given the version
@@ -587,6 +613,99 @@
return [];
}
+
+sub post_to_solr
+{
+ my $data = shift;
+
+ my $req = HTTP::Request->new(POST => $solr_url . '/update');
+ $req->content_type('text/xml; charset=utf-8');
+
+ # post the data
+ if (-f $data)
+ {
+ $req->content(scalar read_file($data));
+ } else {
+ $req->content($data);
+ }
+
+ my $response = $ua->request($req);
+ if ($response->is_error())
+ {
+ die "failed to post update: " . $response->status_line() .
"\ncontent was:\n" . $req->content;
+ }
+
+ # commit the data
+ $req->content('<commit />');
+ $response = $ua->request($req);
+ if ($response->is_error())
+ {
+ die "failed to commit update: " . $response->status_line();
+ }
+}
+
+sub get_packages_from_solr
+{
+ my $query = shift;
+ my $fields = shift;
+
+ my $uri = URI->new($solr_url . '/select');
+ $uri->query_param( q => $query );
+ $uri->query_param( version => '2.2' );
+ $uri->query_param( start => 0 );
+ $uri->query_param( rows => 100000 );
+ $uri->query_param( indent => 'on' );
+
+ if (defined $fields)
+ {
+ $uri->query_param( fl => $fields );
+ }
+
+ my $req = HTTP::Request->new(GET => $uri);
+ my $response = $ua->request($req);
+ if ($response->is_error())
+ {
+ die "failed to get $query: " . $response->status_line();
+ }
+
+ my $parser = XML::DOM::Parser->new();
+ my $xml = $parser->parse($response->decoded_content());
+
+ my $return = [];
+
+ my $documents = $xml->getElementsByTagName("doc");
+ my $num_docs = $documents->getLength();
+ print " - get_packages_from_solr($query) found $num_docs documents\n"
if ($debug);
+ for (my $i = 0; $i < $num_docs; $i++)
+ {
+ my $package = {};
+
+ my $doc = $documents->item($i);
+ for my $field ($doc->getChildNodes())
+ {
+ my $field_name;
+ for my $attr ($field->getAttributes())
+ {
+ next unless (defined $attr);
+ if ($attr->getNamedItem("name"))
+ {
+ $field_name =
$attr->getNamedItem("name")->getNodeValue();
+ }
+ }
+ for my $child ($field->getChildNodes())
+ {
+ if ($child->getNodeValue() ne "")
+ {
+ $package->{$field_name} =
$child->getNodeValue();
+ }
+ }
+ }
+ push(@{$return}, $package);
+ }
+
+ return $return;
+}
+
sub die_with_usage
{
die <<EOMSG;
@@ -597,6 +716,7 @@
--verbose verbose output
--trace extremely verbose output
+ --url=<path> where SOLR's root is (default:
http://localhost:8983/solr)
--tempdir=<path> where to put temporary files
--xmldir=<path> where to write the .xml files
@@ -608,26 +728,4 @@
EOMSG
}
-
-sub post_to_solr
-{
- my $contents = shift;
-
- my @curl = ( 'curl', 'http://localhost:8983/solr/update', '-s', '-o',
'/dev/null', '-H', 'Content-type:text/xml; charset=utf-8', '--data-binary' );
- my @command;
-
- if (-f $contents)
- {
- @command = (@curl, '@' . $contents);
- } else {
- @command = (@curl, $contents);
- }
-
- print " - posting $contents\n" if ($debug);
- system(@command) == 0 or die ("unable to post update ($contents) to
solr: $!");
-
- print " - committing $contents\n" if ($debug);
- system(@curl, '<commit />') == 0 or die ("unable to commit update
($contents): $!");
-}
-
# vim: ts=4 sw=4 noet
-------------------------------------------------------------------------
SF.Net email is sponsored by: The Future of Linux Business White Paper
from Novell. From the desktop to the data center, Linux is going
mainstream. Let it simplify your IT future.
http://altfarm.mediaplex.com/ad/ck/8857-50307-18918-4
_______________________________________________
Fink-commits mailing list
[email protected]
http://news.gmane.org/gmane.os.apple.fink.cvs