Hi Dan,
Thanks for taking a look :-)
Here are the updated files. I *really* appreciate the distinction you made
between TCN and record_id - that was something that gave me no end of
confusion :-)
As far as Vandelay... just bad timing on my part :-/ I'm just now hearing
about it (that will teach me to stick my head in the sand!); I'm quite
excited by the prospect. If nothing else, though, monkeying with these
batch-loading scripts helped me to understand the system more :-D
Thanks again - I appreciate the feedback.
-David Christensen
On Thu, Oct 23, 2008 at 5:14 PM, Dan Wells <[EMAIL PROTECTED]> wrote:
> Hello David,
>
> I've only given the code a once-over at this point, but I expect to try it
> out soon, so thanks so much for taking this on! It looks very promising. I
> am also quite curious how "Vandelay" is going to affect our need for the
> various load scripts, but that is for another day (Oct. 30, to be exact).
>
> I did notice a few small things related to a recent update to marc2bre.pl.
> I am going to guess, in the marc2bre.pl section of eg_batch_loader.pl, that
> you meant this line:
>
> if (defined $config->{source_record}->{record_id}->{subfield}) {
>
> to say this:
>
> if (defined $config->{source_record}->{tcn}->{subfield}) {
>
> since record_id currently doesn't exist in your config file. It would be
> nice if it did, though, to support both internal ids and tcns, something
> like:
> ...
> push @args, "--tcnfield"; push @args, "'" .
> $config->{source_record}->{tcn}->{tag} . "'";
> if (defined $config->{source_record}->{tcn}->{subfield}) {
> push @args, " --tcnsubfield"; push @args, "'" .
> $config->{source_record}->{tcn}->{subfield} . "'";
> }
> if (defined $config->{source_record}->{record_id}->{tag}) {
> push @args, " --idfield"; push @args, "'" .
> $config->{source_record}->{record_id}->{tag} . "'";
> if (defined
> $config->{source_record}->{record_id}->{subfield}) {
> push @args, " --idsubfield"; push @args, "'" .
> $config->{source_record}->{record_id}->{subfield} . "'";
> }
> }
> ...
>
> with the config file changed to:
> ...
> <source_record>
> <tcn>
> <!-- Which field has the title control number?
> Commonly used are 001 (leave 'subfield' blank)
> or 035 subfield 'a'
> Note that the Evergreen loader will create record IDs, which are
> a different beast (see below).
> -->
> <tag>001</tag>
> <subfield></subfield>
> </tcn>
> <record_id>
> <!-- Optionally specify this field if you want your
> Evergreen internal record IDs to match your old
> system ids (for easier linking to other data)
> -->
> <tag>035</tag>
> <subfield>a</subfield>
> </record_id>
> ...
>
> I may have made a typo or two, but you get the idea. I was also
> uncomfortable with calling TCNs "system-unique ids", since they should by
> design be inherent to the record itself, not unique to any particular
> system, but I have no real intention of arguing semantics if things work :)
>
> Hope this helps, and thanks again,
> DW
>
>
--
WARNING: You are logged into reality as root....
#!/usr/bin/perl -W
use strict;
use Data::Dumper;
use Getopt::Long;
use XML::Simple qw(:strict);
use Log::Dispatch;
use Log::Dispatch::File;
use Log::Dispatch::Screen;
use File::Inplace;
use DBI;
use Cwd;
my @stages = qw(m2b b2i i2s edsql load qmm hold clrstg inshold gencop meta);
my %fcntable = ("m2b" => \&marc_to_bre,
"b2i" => \&bre_to_ingest,
"i2s" => \&ingest_to_sql,
"edsql" => \&edit_sql,
"load" => \&loader_v2,
"qmm" => \&quick_metamap_v2,
"hold" => \&parse_holdings,
"clrstg" => \&clear_staging_table,
"inshold" => \&insert_holdings,
"gencop" => \&generate_copies_v2,
"meta" => \&add_metarecords_v2
);
my $flag_clear = 0;
my $flag_help = 0;
my $records_file = "test.mrc";
my $start_at_stage = "m2b";
my $stop_at_stage = "end";
my $result = GetOptions("file=s" => \$records_file,
"start=s" => \$start_at_stage,
"stop=s" => \$stop_at_stage,
"clear" => \$flag_clear, # clear database before load
"help" => \$flag_help,
);
if ($flag_help) {
print "usage: $0 --file filename [--start start-stage] [--stop stop-stage] [--clear] [--help]\n";
print "\tLoad a file of MARC records into your Evergreen system.\n";
print "\tconfig file is eg_batch_loader_cfg.xml\n";
print "\n";
print "\toptions:\n";
print "\t\t--start: specify which processing stage to start at (defaults to 'm2b')\n";
print "\t\t--stop : specify which processing stage to stop at (defaults to 'end')\n";
print "\t\t--clear: WARNING! This will delete all bibs and holdings from your database!\n";
print "\n";
print "\tstages of processing:\n";
print "\t\tm2b : marc to bre conversion\n";
print "\t\tb2i : bre to ingest conversion\n";
print "\t\ti2s : ingest to sql conversion\n";
print "\t\tedsql : edit that sql file to allow it to commit\n";
print "\t\tload : import the records into postgres\n";
print "\t\tqmm : do the quick metarecord map\n";
print "\t\thold : parse holdings from your original marc file\n";
print "\t\tclrstg : clear the holdings staging table\n";
print "\t\tinshold: insert holdings into staging table\n";
print "\t\tgencop : generate copies\n";
print "\t\tmeta : add metarecord information to db\n";
print "\n";
print "\ttransactions are logged to the file (and at the level) specified in your config file\n";
print "\n";
exit 0;
}
my $xs = XML::Simple->new( ForceArray => 0,
KeyAttr => [],
SuppressEmpty => undef,
);
my $config = $xs->XMLin('eg_batch_loader_cfg.xml');
my $dispatcher = Log::Dispatch->new;
$dispatcher->add( Log::Dispatch::File->new( name => 'file1',
min_level => $config->{logging}->{file}->{loglevel},
filename => $config->{logging}->{file}->{logfile}
)
);
$dispatcher->add( Log::Dispatch::Screen->new( name => 'screen',
min_level => $config->{logging}->{screen}->{loglevel},
stderr => 1
)
);
$dispatcher->log( level => 'info',
message => "INFO: Database: name [" . $config->{database}->{db_name} . "]\n" );
$dispatcher->log( level => 'debug',
message => "DEBUG: " . Dumper($config) );
if ($flag_clear) { clear_database() };
my $started = 0;
foreach my $stage (@stages) {
last if ($stop_at_stage eq $stage);
if (($started) || ($start_at_stage eq $stage)) {
$started = 1;
if ( 0 != $fcntable{$stage}->() ) {
$dispatcher->log( level => 'error',
message => "ERR: $stage failed.\n");
exit(1)
};
} else {
$dispatcher->log( level => 'notice',
message => "NOTICE: Skipping $stage\n" );
}
}
exit(0);
#
# Stage 1
#
sub marc_to_bre {
my $msg_command_name = "MARC to BRE conversion";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $starting_id = get_starting_id();
if (defined $starting_id) {
$dispatcher->log( level => 'debug',
message => "DEBUG: starting id: " . $starting_id . "\n" );
} else {
$dispatcher->log( level => 'error',
message => "ERR: unable to get starting id\n" );
return 1;
}
my $command = $config->{paths}->{marc2bre} . "/marc2bre.pl";
my @args;
push @args, "--marctype"; push @args, "USMARC";
push @args, "--db_name"; push @args, $config->{database}->{db_name};
push @args, "--db_user"; push @args, $config->{database}->{db_user};
push @args, "--db_pw" ; push @args, $config->{database}->{db_pw};
push @args, "--startid"; push @args, $starting_id;
push @args, "--tcnfield"; push @args, "'" . $config->{source_record}->{tcn}->{tag} . "'";
if (defined $config->{source_record}->{tcn}->{subfield}) {
push @args, " --tcnsubfield"; push @args, "'" . $config->{source_record}->{tcn}->{subfield} . "'";
}
if (defined $config->{source_record}->{record_id}->{tag}) {
push @args, " --idfield"; push @args, "'" . $config->{source_record}->{record_id}->{tag} . "'";
if (defined $config->{source_record}->{record_id}->{subfield}) {
push @args, " --idsubfield"; push @args, "'" . $config->{source_record}->{record_id}->{subfield} . "'";
}
}
push @args, "--trash"; push @args, $config->{source_record}->{holdings}->{tag}; # holdings aren't generated from the BRE file.
push @args, $records_file;
$command .= " @args >$records_file.bre";
$dispatcher->log( level => 'info',
message => "INFO: marc2bre command: " . $command . "\n" );
return run_command( $command, $msg_command_name );
}
#
# Stage 2
#
sub bre_to_ingest {
#perl direct_ingest.pl sample_marc.bre > sample_marc.ingest
my $msg_command_name = "BRE to INGEST conversion";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $command = $config->{paths}->{direct_ingest} . "/direct_ingest.pl";
my @args;
push @args, "$records_file.bre";
$command .= " @args >$records_file.ingest";
$dispatcher->log( level => 'info',
message => "INFO: direct_ingest command: " . $command . "\n" );
return run_command( $command, $msg_command_name );
}
#
# Stage 3
#
sub ingest_to_sql {
my $msg_command_name = "INGEST to SQL conversion";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $command = $config->{paths}->{parallel_pg_loader} . "/parallel_pg_loader.pl";
my @args;
push @args, "-order"; push @args, "bre";
push @args, "-order"; push @args, "mrd";
push @args, "-order"; push @args, "mfr";
push @args, "-order"; push @args, "mtfe";
push @args, "-order"; push @args, "mafe";
push @args, "-order"; push @args, "msfe";
push @args, "-order"; push @args, "mkfe";
push @args, "-order"; push @args, "msefe";
push @args, "-autoprimary"; push @args, "mrd";
push @args, "-autoprimary"; push @args, "mfr";
push @args, "-autoprimary"; push @args, "mtfe";
push @args, "-autoprimary"; push @args, "mafe";
push @args, "-autoprimary"; push @args, "msfe";
push @args, "-autoprimary"; push @args, "mkfe";
push @args, "-autoprimary"; push @args, "msefe";
push @args, "-output"; push @args, "$records_file"; # this is the output file basename; gets appended with ".bre.sql", ".mrd.sql", ".sql", etc
$command .= " @args <$records_file.ingest";
$dispatcher->log( level => 'info',
message => "INFO: direct_ingest command: " . $command . "\n" );
return run_command( $command, $msg_command_name );
}
#
# Edit file in-place
#
sub edit_sql {
# Ok, File::Inplace is a bit of overkill here... but the module's available, and
# we may want to use it for some more tricky stuff later :-)
my $msg_command_name = "Edit SQL to change --COMMIT to COMMIT";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $name = "$records_file.sql";
my $editor = new File::Inplace(file => $name);
while (my ($line) = $editor->next_line) {
if ($line =~ /^-- COMMIT;$/) {
$editor->replace_line("COMMIT;");
}
}
$editor->commit;
$dispatcher->log( level => 'info',
message => "INFO: Completed $msg_command_name\n");
return (0);
}
#
# Stage 5
#
sub loader_v2 {
my $rv = 0; # return value of 0 indicates success
my $SQL;
my $msg_command_name = "Loading";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $cwd = getcwd();
my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
$config->{database}->{db_user},
$config->{database}->{db_pw},
{AutoCommit => 0}
);
if (defined $dbh) {
$dispatcher->log( level => 'debug',
message => "DEBUG: connected to database\n" );
$dbh->{AutoCommit} = 0; # enable transactions, if possible
$dbh->{RaiseError} = 1;
eval {
$SQL = "SET CLIENT_ENCODING TO 'UNICODE'";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "COPY biblio.record_entry (active,create_date,creator,deleted,edit_date,editor,fingerprint,id,last_xact_id,marc,quality,source,tcn_source,tcn_value) FROM '" . $cwd . "/" . $records_file . ".bre.sql'";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "COPY metabib.rec_descriptor (audience,bib_level,cat_form,char_encoding,control_type,enc_level,item_form,item_lang,item_type,lit_form,pub_status,record,type_mat,vr_format) FROM '" . $cwd . "/" . $records_file . ".mrd.sql'";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "COPY metabib.keyword_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".mkfe.sql'";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "COPY metabib.author_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".mafe.sql'";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "COPY metabib.title_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".mtfe.sql'";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "COPY metabib.full_rec (ind1,ind2,record,subfield,tag,value) FROM '" . $cwd . "/" . $records_file . ".mfr.sql'";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "COPY metabib.subject_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".msfe.sql'";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "COPY metabib.series_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".msefe.sql'";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$dbh->commit; # commit the changes if we get this far
};
if ($@) {
$dispatcher->log( level => 'error',
message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
# now rollback to undo the incomplete changes
# but do it in an eval{} as it may also fail
eval { $dbh->rollback };
# add other application on-error-clean-up code here
$rv = 1;
}
$dbh->disconnect();
} else {
$dispatcher->log( level => 'error',
message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
$rv = 1;
}
return $rv;
}
#
# Stage 6
#
sub quick_metamap_v2 {
my $rv = 0; # return value of 0 indicates success
my $SQL;
my $msg_command_name = "Quick metarecord map";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
$config->{database}->{db_user},
$config->{database}->{db_pw},
{AutoCommit => 0}
);
if (defined $dbh) {
$dispatcher->log( level => 'debug',
message => "DEBUG: connected to database\n" );
$dbh->{AutoCommit} = 0; # enable transactions, if possible
$dbh->{RaiseError} = 1;
eval {
$SQL = "ALTER TABLE metabib.metarecord_source_map DROP CONSTRAINT metabib_metarecord_source_map_metarecord_fkey";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "TRUNCATE metabib.metarecord";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "TRUNCATE metabib.metarecord_source_map";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = <<'EOSql';
INSERT INTO metabib.metarecord (fingerprint,master_record)
SELECT fingerprint,id
FROM (SELECT DISTINCT ON (fingerprint) fingerprint, id, quality
FROM biblio.record_entry ORDER BY fingerprint, quality desc
) AS x
WHERE fingerprint IS NOT NULL
EOSql
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = <<'EOSql';
INSERT INTO metabib.metarecord_source_map (metarecord,source)
SELECT m.id, b.id
FROM biblio.record_entry b
JOIN metabib.metarecord m ON (m.fingerprint = b.fingerprint)
EOSql
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "ALTER TABLE metabib.metarecord_source_map ADD CONSTRAINT metabib_metarecord_source_map_metarecord_fkey FOREIGN KEY (metarecord) REFERENCES metabib.metarecord (id)";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$dbh->commit; # commit the changes if we get this far
};
if ($@) {
$dispatcher->log( level => 'error',
message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
# now rollback to undo the incomplete changes
# but do it in an eval{} as it may also fail
eval { $dbh->rollback };
# add other application on-error-clean-up code here
$rv = 1;
}
$dbh->{AutoCommit} = 1; # disable transactions in order to do the vacuuming
$SQL = "VACUUM FULL ANALYZE VERBOSE metabib.metarecord";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "VACUUM FULL ANALYZE VERBOSE metabib.metarecord_source_map";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$dbh->disconnect();
} else {
$dispatcher->log( level => 'error',
message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
$rv = 1;
}
return $rv;
}
#
# Stage 7
#
sub parse_holdings {
# NOTE: generates file "insert_holdings.sql"
my $msg_command_name = "Parse holdings";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $command = $config->{paths}->{parse_holdings} . "/eg_batch_loader_parse_holdings.pl";
my @args;
push @args, "--file"; push @args, $records_file;
push @args, "--bibidtag"; push @args, $config->{source_record}->{tcn}->{tag};
if (defined $config->{source_record}->{tcn}->{subfield}) {
push @args, "--bibidsfd"; push @args, $config->{source_record}->{tcn}->{subfield};
}
push @args, "--holdingstag"; push @args, $config->{source_record}->{holdings}->{tag};
push @args, "--callno"; push @args, $config->{source_record}->{holdings}->{callno};
push @args, "--copyno"; push @args, $config->{source_record}->{holdings}->{copyno};
push @args, "--barcode"; push @args, $config->{source_record}->{holdings}->{barcode};
push @args, "--location"; push @args, $config->{source_record}->{holdings}->{location};
push @args, "--library"; push @args, $config->{source_record}->{holdings}->{owning_library};
push @args, "--itemtype"; push @args, $config->{source_record}->{holdings}->{itemtype};
push @args, "--created"; push @args, $config->{source_record}->{holdings}->{created};
$command .= " @args";
$dispatcher->log( level => 'info',
message => "INFO: parse holdings command: " . $command . "\n" );
return run_command( $command, $msg_command_name );
}
#
# Stage 8
#
sub clear_staging_table {
my $rv = 0; # return value of 0 indicates success
my $SQL;
my $msg_command_name = "Clear staging table";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
$config->{database}->{db_user},
$config->{database}->{db_pw},
{AutoCommit => 0}
);
if (defined $dbh) {
$dispatcher->log( level => 'debug',
message => "DEBUG: connected to database\n" );
$dbh->{AutoCommit} = 0; # enable transactions, if possible
$dbh->{RaiseError} = 1;
eval {
$SQL = "DELETE FROM staging_items";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$dbh->commit; # commit the changes if we get this far
};
if ($@) {
$dispatcher->log( level => 'error',
message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
# now rollback to undo the incomplete changes
# but do it in an eval{} as it may also fail
eval { $dbh->rollback };
# add other application on-error-clean-up code here
$rv = 1;
}
$dbh->disconnect();
} else {
$dispatcher->log( level => 'error',
message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
$rv = 1;
}
return $rv;
}
#
# Stage 9
#
sub insert_holdings {
# psql -U evergreen -f insert_holdings.sql evergreen
my $msg_command_name = "Insert holdings";
my $name = "insert_holdings.sql";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $command = "psql";
my @args;
push @args, "-U"; push @args, $config->{database}->{db_user};
push @args, "-f"; push @args, $name;
push @args, $config->{database}->{db_name};
$command .= " @args";
$dispatcher->log( level => 'info',
message => "INFO: insert holdings command: " . $command . "\n" );
$dispatcher->log( level => 'info',
message => "INFO: Reminder - your database password is '" . $config->{database}->{db_pw} . "'\n" );
return run_command( $command, $msg_command_name );
}
#
# Stage 10
#
sub generate_copies_v2 {
my $rv = 0; # return value of 0 indicates success
my $SQL;
my $msg_command_name = "Generate copies";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
$config->{database}->{db_user},
$config->{database}->{db_pw},
{AutoCommit => 0}
);
if (defined $dbh) {
$dispatcher->log( level => 'debug',
message => "DEBUG: connected to database\n" );
$dbh->{AutoCommit} = 0; # enable transactions, if possible
$dbh->{RaiseError} = 1;
eval {
# -- First, we build shelving location
# DAVID - ONLY DO THIS IF IT HASN'T BEEN ADDED YET!
# $SQL = <<'EOSql';
#INSERT INTO asset.copy_location (name, owning_lib)
# SELECT DISTINCT l.location, ou.id
# FROM staging_items l JOIN actor.org_unit ou
# ON (l.owning_lib = ou.shortname)
#EOSql
# $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
# $dbh->do( $SQL );
# -- Import call numbers for bibrecord->library mappings
$SQL = <<'EOSql';
INSERT INTO asset.call_number (creator,editor,record,label,owning_lib)
SELECT DISTINCT 1, 1, l.bibkey , l.callnum, ou.id
FROM staging_items l
JOIN biblio.record_entry b ON (l.bibkey = b.id)
JOIN actor.org_unit ou ON (l.owning_lib = ou.shortname)
EOSql
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
# -- Import base copy data
$SQL = <<'EOSql';
INSERT INTO asset.copy (
circ_lib, creator, editor, create_date, barcode,
status, location, loan_duration,
fine_level, circ_modifier, deposit, ref, call_number)
SELECT DISTINCT ou.id AS circ_lib,
1 AS creator,
1 AS editor,
l.createdate AS create_date,
l.barcode AS barcode,
0 AS status,
cl.id AS location,
2 AS loan_duration,
2 AS fine_level,
CASE
WHEN l.item_type IN ('REFERENCE', 'DEPOSIT_BK') THEN 'BOOK'
ELSE l.item_type
END AS circ_modifier,
CASE
WHEN l.item_type = 'DEPOSIT_BK' THEN TRUE
ELSE FALSE
END AS deposit,
CASE
WHEN l.item_type = 'REFERENCE' THEN TRUE
ELSE FALSE
END AS ref,
cn.id AS call_number
FROM staging_items l
JOIN actor.org_unit ou
ON (l.owning_lib = ou.shortname)
JOIN asset.copy_location cl
ON (ou.id = cl.owning_lib AND l.location = cl.name)
JOIN asset.call_number cn
ON (ou.id = cn.owning_lib AND l.bibkey = cn.record AND l.callnum = cn.label)
EOSql
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$dbh->commit; # commit the changes if we get this far
};
if ($@) {
$dispatcher->log( level => 'error',
message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
# now rollback to undo the incomplete changes
# but do it in an eval{} as it may also fail
eval { $dbh->rollback };
# add other application on-error-clean-up code here
$rv = 1;
}
$dbh->disconnect();
} else {
$dispatcher->log( level => 'error',
message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
$rv = 1;
}
return $rv;
}
#
# Stage 11
#
sub add_metarecords_v2 {
my $rv = 0; # return value of 0 indicates success
my $SQL;
my $msg_command_name = "Add metarecords";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
$config->{database}->{db_user},
$config->{database}->{db_pw},
{AutoCommit => 0}
);
if (defined $dbh) {
$dispatcher->log( level => 'debug',
message => "DEBUG: connected to database\n" );
$dbh->{AutoCommit} = 0; # enable transactions, if possible
$dbh->{RaiseError} = 1;
eval {
$SQL = <<'EOSql';
INSERT INTO metabib.metarecord (fingerprint, master_record)
SELECT b.fingerprint, b.id
FROM biblio.record_entry b
WHERE b.id IN (SELECT r.id FROM biblio.record_entry r
LEFT JOIN metabib.metarecord_source_map k
ON (k.source = r.id)
WHERE k.id IS NULL AND r.fingerprint IS NOT NULL)
AND NOT EXISTS ( SELECT 1 FROM metabib.metarecord
WHERE fingerprint = b.fingerprint )
EOSql
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = <<'EOSql';
INSERT INTO metabib.metarecord_source_map (metarecord, source)
SELECT m.id, r.id
FROM biblio.record_entry r
JOIN metabib.metarecord m USING (fingerprint)
WHERE r.id IN (SELECT b.id FROM biblio.record_entry b
LEFT JOIN metabib.metarecord_source_map k
ON (k.source = b.id) WHERE k.id IS NULL)
EOSql
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$dbh->commit; # commit the changes if we get this far
};
if ($@) {
$dispatcher->log( level => 'error',
message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
# now rollback to undo the incomplete changes
# but do it in an eval{} as it may also fail
eval { $dbh->rollback };
# add other application on-error-clean-up code here
$rv = 1;
}
$dbh->disconnect();
} else {
$dispatcher->log( level => 'error',
message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
$rv = 1;
}
return $rv;
}
sub clear_database {
my $rv = 0; # return value of 0 indicates success
my $SQL;
my $msg_command_name = "CLEAR DATABASE!!! Deleting all records and holdings.";
$dispatcher->log( level => 'info',
message => "INFO: Beginning $msg_command_name\n");
my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
$config->{database}->{db_user},
$config->{database}->{db_pw},
{AutoCommit => 0}
);
if (defined $dbh) {
$dispatcher->log( level => 'debug',
message => "DEBUG: connected to database\n" );
$dbh->{AutoCommit} = 0; # enable transactions, if possible
$dbh->{RaiseError} = 1;
eval {
$SQL = "CREATE TEMP TABLE special_bib AS SELECT * FROM biblio.record_entry WHERE id = -1";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "CREATE TEMP TABLE special_volume AS SELECT * FROM asset.call_number WHERE id = -1";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "TRUNCATE biblio.record_entry CASCADE";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "INSERT INTO biblio.record_entry SELECT * FROM special_bib";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$SQL = "INSERT INTO asset.call_number SELECT * FROM special_volume";
$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
$dbh->do( $SQL );
$dbh->commit; # commit the changes if we get this far
};
if ($@) {
$dispatcher->log( level => 'error',
message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
# now rollback to undo the incomplete changes
# but do it in an eval{} as it may also fail
eval { $dbh->rollback };
# add other application on-error-clean-up code here
$rv = 1;
}
$dbh->disconnect();
} else {
$dispatcher->log( level => 'error',
message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
$rv = 1;
}
return $rv;
}
#
#
#
sub get_starting_id {
# SELECT MAX(id)+1 FROM biblio.record_entry;
my $id = undef;
my $SQL = 'SELECT MAX(id)+1 FROM biblio.record_entry';
my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
$config->{database}->{db_user},
$config->{database}->{db_pw},
{AutoCommit => 0}
);
if (defined $dbh) {
$dispatcher->log( level => 'debug',
message => "DEBUG: connected to database\n" );
my $ary_ref = $dbh->selectrow_arrayref( $SQL );
if (defined $ary_ref) {
$id = $ary_ref->[0];
$dbh->disconnect();
} else {
$dispatcher->log( level => 'error',
message => "ERR: query failed! -- " . $SQL . "\n" );
}
} else {
$dispatcher->log( level => 'error',
message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
}
return $id;
}
#
# Run the command and check for success/failure
#
sub run_command {
my ($command, $msg_command_name) = @_;
if ( system( $command ) ) {
# returned non-zero... Houston, we have a problem.
if ($? == -1) {
$dispatcher->log( level => 'error',
message => "ERR: failed to execute: $!\n" );
} elsif ($? & 127) {
$dispatcher->log( level => 'error',
message => sprintf("ERR: child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without') );
} else {
$dispatcher->log( level => 'error',
message => sprintf("ERR: child exited with value %d\n", $? >> 8) );
}
$dispatcher->log( level => 'error',
message => "ERR: $msg_command_name did not complete\n");
#exit(1);
} else {
# completed ok
$dispatcher->log( level => 'info',
message => "INFO: Completed $msg_command_name\n");
}
return $?;
}
<config>
<logging>
<file>
<logfile>ebl.log</logfile>
<!-- loglevel can be one of:
debug
info
notice
warning
error
critical
alert
emergency
(though in practice, you'll likely specify warning or notice)
-->
<loglevel>debug</loglevel>
</file>
<screen>
<loglevel>info</loglevel>
</screen>
</logging>
<database>
<db_name>evergreen</db_name>
<db_user>evergreen</db_user>
<db_pw>evergreen</db_pw>
</database>
<paths>
<!-- paths to various utilities -->
<marc2bre>.</marc2bre>
<direct_ingest>.</direct_ingest>
<parallel_pg_loader>.</parallel_pg_loader>
<parse_holdings>.</parse_holdings>
</paths>
<source_record>
<tcn>
<!-- Which field has the TCN (title control number)?
Commonly used are 001 (leave 'subfield' blank)
or 035 subfield 'a'
Note that the Evergreen loader will create record IDs, which are
a different beast (see below).
-->
<tag>001</tag>
<subfield></subfield>
</tcn>
<record_id>
<!-- Optionally specify this field if you want your
Evergreen internal record IDs to match your old
system ids (for easier linking to other data)
-->
<tag>035</tag>
<subfield>a</subfield>
</record_id>
<holdings>
<tag>852</tag>
<callno>h</callno>
<copyno>t</copyno>
<barcode>p</barcode>
<owning_library>b</owning_library>
<location>b</location>
<itemtype>a</itemtype>
<created>d</created>
</holdings>
</source_record>
</config>
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
# vim:noet:ts=4:sw=4:
# DC - Our bibids are placed in subfield 001
# Our holdings are stored in field 852 with the following structure (roughly):
# h - call number
# t - copy number
# p - barcode
# b - library
# b - location
# a - item type
# d - create date
my $filename = "test.mrc";
my $bibidtag = '001';
my $bibidsfd;
my $src_holdingstag = '852';
my $src_callno = 'h';
my $src_copyno = 't';
my $src_barcode = 'p';
my $src_location = 'b';
my $src_library = 'b';
my $src_itemtype = 'a';
my $src_date_created = 'd';
my $flag_help = 0;
my $result = GetOptions("file=s" => \$filename,
"bibidtag=s" => \$bibidtag,
"bibidsfd=s" => \$bibidsfd,
"holdingstag=s" => \$src_holdingstag,
"callno=s" => \$src_callno,
"copyno=s" => \$src_copyno,
"barcode=s" => \$src_barcode,
"location=s" => \$src_location,
"library=s" => \$src_library,
"itemtype=s" => \$src_itemtype,
"created=s" => \$src_date_created,
"help" => \$flag_help,
);
if ($flag_help) {
print "usage: $0 --file filename [--bibtag xxx] [--callno x] [--copyno x] [--barcode x]\n";
print " [--location x] [--library x] [--itemtype x] [--created x] [--help]\n";
print "\n";
print "\tParse the source marc file for holdings information\n";
print "\n";
print "\tOptions:\n";
print "\t\tbibidtag: record ID tag (035)\n";
print "\t\tbibidsfd: record ID subfield (a)\n";
print "\n";
print "\t\tholdingstag: MARC holdings tag (852)\n";
print "\t\tcallno : subfield containing call number (h)\n";
print "\t\tcopyno : subfield containing copy number (t)\n";
print "\t\tbarcode : subfield containing barcode (p)\n";
print "\t\tlocation: subfield containing location (b)\n";
print "\t\tlibrary : subfield contianing owning library (b)\n";
print "\t\titemtype: subfield contianing item type (a)\n";
print "\t\tcreated : subfield containing creation date (d)\n";
print "\n";
print "\tGenerates 'insert_holdings.sql' file.\n";
exit 0;
}
use MARC::Batch;
use MARC::File::XML (BinaryEncoding => "utf8", RecordFormat => "USMARC");
my $sql_file = 'insert_holdings.sql';
# We'll dump our holdings entries in this list
my @holdings;
#my $batch = MARC::Batch->new('XML', $holdings_file);
my $batch = MARC::Batch->new('USMARC', $filename);
parse_holdings($batch, [EMAIL PROTECTED]);
format_holdings_for_insert([EMAIL PROTECTED]);
exit(0);
sub parse_holdings {
my $batch = shift;
my $holdings = shift;
my $record;
while ($record = $batch->next()) {
my $bibid_fld = $record->field( $bibidtag );
if ($bibid_fld) {
my $bibid;
if ($bibid_fld->is_control_field()) {
$bibid = $bibid_fld->data();
} else {
$bibid = $bibid_fld->subfield( $bibidsfd );
}
$bibid =~ s/\D+//gso;
my @copies = $record->field( $src_holdingstag );
foreach my $copy (@copies) {
my %copy_attributes;
$copy_attributes{'bibid'} = $bibid;
$copy_attributes{'callnum'} = escape_quotes($copy->subfield( $src_callno ));
$copy_attributes{'barcode'} = escape_quotes($copy->subfield( $src_barcode ));
$copy_attributes{'location'} = escape_quotes($copy->subfield( $src_location ));
$copy_attributes{'library'} = escape_quotes( uc $copy->subfield( $src_library ));
$copy_attributes{'item_type'} = escape_quotes($copy->subfield( $src_itemtype ));
$copy_attributes{'createdate'} = escape_quotes($copy->subfield( $src_date_created ));
push(@$holdings, \%copy_attributes);
}
} else {
# need to log something about a missing id...
}
}
}
sub format_holdings_for_insert {
my $holdings = shift;
open(SQLFH, '>', $sql_file) or die $!;
binmode(SQLFH, ":utf8");
# ORIGINAL COMMENTS FROM 'parse_holdings.pl':
# We could do the inserts inside a transaction in case of problems
#
# Right now, the data contains an invalid date format (DD/M/YYYY)
# that PostgreSQL doesn't like (it wants YYYY-MM-DD), so we'll go
# ahead and just commit each row if possible
#
# print SQLFH "BEGIN;\n\n";
foreach my $holding (@$holdings) {
printf SQLFH "INSERT INTO staging_items (bibkey, callnum, createdate, location, barcode, item_type, owning_lib) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s');\n",
$holding->{'bibid'},
$holding->{'callnum'},
$holding->{'createdate'},
$holding->{'location'},
$holding->{'barcode'},
$holding->{'item_type'},
$holding->{'library'}
;
}
# print SQLFH "\nCOMMIT;";
close SQLFH;
}
sub escape_quotes {
my $string = shift;
$string =~ s/'/''/g;
return $string;
}