Prior to this patch, rebuild_zebra.pl -z was effectively
hanging on to a lock on the zebraqueue table, preventing
other scripts from inserting new entries into the table.
This had the effect of causing circulation operations
to time out.

Refactored by having rebuld_zebra.pl pull the active
queue into memory, then mark entries done by zebraqueue.id.
Consequently, rebuild_zebra.pl should no longer
block adding new entries into zebraqueue.
---
 misc/migration_tools/rebuild_zebra.pl |  118 +++++++++++++++++++-------------
 1 files changed, 70 insertions(+), 48 deletions(-)

diff --git a/misc/migration_tools/rebuild_zebra.pl 
b/misc/migration_tools/rebuild_zebra.pl
index 132f68c..38054c3 100755
--- a/misc/migration_tools/rebuild_zebra.pl
+++ b/misc/migration_tools/rebuild_zebra.pl
@@ -106,18 +106,14 @@ if ($do_munge) {
     munge_config();
 }
 
-$dbh->{AutoCommit} = 0; # don't autocommit - want a consistent view of the 
zebraqueue table
-
 if ($authorities) {
     index_records('authority', $directory, $skip_export, $process_zebraqueue, 
$as_xml, $noxml, $do_not_clear_zebraqueue);
-    $dbh->commit(); # commit changes to zebraqueue, if any
 } else {
     print "skipping authorities\n";
 }
 
 if ($biblios) {
     index_records('biblio', $directory, $skip_export, $process_zebraqueue, 
$as_xml, $noxml, $do_not_clear_zebraqueue);
-    $dbh->commit(); # commit changes to zebraqueue, if any
 } else {
     print "skipping biblios\n";
 }
@@ -163,21 +159,21 @@ sub index_records {
         mkdir "$directory" unless (-d $directory);
         mkdir "$directory/$record_type" unless (-d "$directory/$record_type");
         if ($process_zebraqueue) {
-            my $sth = select_zebraqueue_records($record_type, 'deleted');
+            my $entries = select_zebraqueue_records($record_type, 'deleted');
             mkdir "$directory/del_$record_type" unless (-d 
"$directory/del_$record_type");
-            $num_records_deleted = generate_deleted_marc_records($record_type, 
$sth, "$directory/del_$record_type", $as_xml);
-            mark_zebraqueue_done($record_type, 'deleted');
-            $sth = select_zebraqueue_records($record_type, 'updated');
+            $num_records_deleted = generate_deleted_marc_records($record_type, 
$entries, "$directory/del_$record_type", $as_xml);
+            mark_zebraqueue_batch_done($entries);
+            $entries = select_zebraqueue_records($record_type, 'updated');
             mkdir "$directory/upd_$record_type" unless (-d 
"$directory/upd_$record_type");
-            $num_records_exported = export_marc_records($record_type, $sth, 
"$directory/upd_$record_type", $as_xml, $noxml);
-            mark_zebraqueue_done($record_type, 'updated');
+            $num_records_exported = 
export_marc_records_from_list($record_type, 
+                                                                  $entries, 
"$directory/upd_$record_type", $as_xml, $noxml);
+            mark_zebraqueue_batch_done($entries);
         } else {
             my $sth = select_all_records($record_type);
+            $num_records_exported = export_marc_records_from_sth($record_type, 
$sth, "$directory/$record_type", $as_xml, $noxml);
             unless ($do_not_clear_zebraqueue) {
-                mark_zebraqueue_done($record_type, 'deleted');
-                mark_zebraqueue_done($record_type, 'updated');
+                mark_all_zebraqueue_done($record_type);
             }
-            $num_records_exported = export_marc_records($record_type, $sth, 
"$directory/$record_type", $as_xml, $noxml);
         }
     }
     
@@ -205,44 +201,37 @@ sub select_zebraqueue_records {
     my $server = ($record_type eq 'biblio') ? 'biblioserver' : 
'authorityserver';
     my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
 
-    my $sth = $dbh->prepare("SELECT DISTINCT biblio_auth_number 
+    my $sth = $dbh->prepare("SELECT id, biblio_auth_number 
                              FROM zebraqueue
                              WHERE server = ?
                              AND   operation = ?
-                             AND   done = 0");
+                             AND   done = 0
+                             ORDER BY id DESC");
     $sth->execute($server, $op);
-    return $sth;
+    my $entries = $sth->fetchall_arrayref({});
 }
 
-sub mark_zebraqueue_done {
-    my ($record_type, $update_type) = @_;
+sub mark_all_zebraqueue_done {
+    my ($record_type) = @_;
 
     my $server = ($record_type eq 'biblio') ? 'biblioserver' : 
'authorityserver';
-    my $op = ($update_type eq 'deleted') ? 'recordDelete' : 'specialUpdate';
 
-    if ($op eq 'recordDelete') {
-        my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
-                                 WHERE id IN (
-                                    SELECT id FROM (
-                                        SELECT z1.id
-                                        FROM zebraqueue z1
-                                        JOIN zebraqueue z2 ON 
z2.biblio_auth_number = z1.biblio_auth_number
-                                        WHERE z1.done = 0
-                                        AND   z1.server = ?
-                                        AND   z2.done = 0
-                                        AND   z2.server = ?
-                                        AND   z1.operation = ?
-                                    ) d2
-                                 )
-                                ");
-        $sth->execute($server, $server, $op); # if we've deleted a record, any 
prior specialUpdates are void
-    } else {
-        my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
-                                 WHERE server = ?
-                                 AND   operation = ?
-                                 AND   done = 0");
-        $sth->execute($server, $op); 
+    my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1
+                             WHERE server = ?
+                             AND done = 0");
+    $sth->execute($server);
+}
+
+sub mark_zebraqueue_batch_done {
+    my ($entries) = @_;
+
+    $dbh->{AutoCommit} = 0;
+    my $sth = $dbh->prepare("UPDATE zebraqueue SET done = 1 WHERE id = ?");
+    $dbh->commit();
+    foreach my $id (map { $_->{id} } @$entries) {
+        $sth->execute($id);
     }
+    $dbh->{AutoCommit} = 1;
 }
 
 sub select_all_records {
@@ -262,7 +251,7 @@ sub select_all_biblios {
     return $sth;
 }
 
-sub export_marc_records {
+sub export_marc_records_from_sth {
     my ($record_type, $sth, $directory, $as_xml, $noxml) = @_;
 
     my $num_exported = 0;
@@ -287,13 +276,41 @@ sub export_marc_records {
     return $num_exported;
 }
 
+sub export_marc_records_from_list {
+    my ($record_type, $entries, $directory, $as_xml, $noxml) = @_;
+
+    my $num_exported = 0;
+    open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
+    my $i = 0;
+    my %found = ();
+    foreach my $record_number ( map { $_->{biblio_auth_number} }
+                                grep { !$found{ $_->{biblio_auth_number} }++ }
+                                @$entries ) {
+        print ".";
+        print "\r$i" unless ($i++ %100);
+        my ($marc) = get_corrected_marc_record($record_type, $record_number, 
$noxml);
+        if (defined $marc) {
+            # FIXME - when more than one record is exported and $as_xml is 
true,
+            # the output file is not valid XML - it's just multiple <record> 
elements
+            # strung together with no single root element.  zebraidx doesn't 
seem
+            # to care, though, at least if you're using the GRS-1 filter.  It 
does
+            # care if you're using the DOM filter, which requires valid XML 
file(s).
+            print OUT ($as_xml) ? $marc->as_xml_record() : $marc->as_usmarc();
+            $num_exported++;
+        }
+    }
+    print "\nRecords exported: $num_exported\n";
+    close OUT;
+    return $num_exported;
+}
+
 sub generate_deleted_marc_records {
-    my ($record_type, $sth, $directory, $as_xml) = @_;
+    my ($record_type, $entries, $directory, $as_xml) = @_;
 
     my $num_exported = 0;
     open (OUT, ">:utf8 ", "$directory/exported_records") or die $!;
     my $i = 0;
-    while (my ($record_number) = $sth->fetchrow_array) {
+    foreach my $record_number (map { $_->{biblio_auth_number} } @$entries ) {
         print "\r$i" unless ($i++ %100);
         print ".";
 
@@ -348,21 +365,26 @@ sub get_raw_marc_record {
             $fetch_sth->execute($record_number);
             if (my ($blob) = $fetch_sth->fetchrow_array) {
                 $marc = MARC::Record->new_from_usmarc($blob);
+                $fetch_sth->finish();
             } else {
-                warn "failed to retrieve biblio $record_number";
+                return; # failure to find a bib is not a problem -
+                        # a delete could have been done before
+                        # trying to process a record update
             }
-            $fetch_sth->finish();
         } else {
             eval { $marc = GetMarcBiblio($record_number); };
             if ($@) {
-                warn "failed to retrieve biblio $record_number";
+                # here we do warn since catching an exception
+                # means that the bib was found but failed
+                # to be parsed
+                warn "error retrieving biblio $record_number";
                 return;
             }
         }
     } else {
         eval { $marc = GetAuthority($record_number); };
         if ($@) {
-            warn "failed to retrieve authority $record_number";
+            warn "error retrieving authority $record_number";
             return;
         }
     }
-- 
1.5.5.GIT

_______________________________________________
Koha-patches mailing list
[email protected]
http://lists.koha.org/mailman/listinfo/koha-patches

Reply via email to