Hi Dan,

Thanks for taking a look :-)

Here are the updated files.   I *really* appreciate the distinction you made
between TCN and record_id - that was something that gave me no end of
confusion :-)

As far as Vandelay... just bad timing on my part :-/   I'm just now hearing
about it (that will teach me to stick my head in the sand!); I'm quite
excited by the prospect.  If nothing else, though, monkeying with these
batch-loading scripts helped me to understand the system more :-D

Thanks again - I appreciate the feedback.

-David Christensen

On Thu, Oct 23, 2008 at 5:14 PM, Dan Wells <[EMAIL PROTECTED]> wrote:

> Hello David,
>
> I've only given the code a once-over at this point, but I expect to try it
> out soon, so thanks so much for taking this on!  It looks very promising.  I
> am also quite curious how "Vandelay" is going to affect our need for the
> various load scripts, but that is for another day (Oct. 30, to be exact).
>
> I did notice a few small things related to a recent update to marc2bre.pl.
>  I am going to guess, in the marc2bre.pl section of eg_batch_loader.pl, that
> you meant this line:
>
>    if (defined $config->{source_record}->{record_id}->{subfield}) {
>
> to say this:
>
>    if (defined $config->{source_record}->{tcn}->{subfield}) {
>
> since record_id currently doesn't exist in your config file.  It would be
> nice if it did, though, to support both internal ids and tcns, something
> like:
> ...
>    push @args, "--tcnfield"; push @args, "'" .
> $config->{source_record}->{tcn}->{tag} . "'";
>    if (defined $config->{source_record}->{tcn}->{subfield}) {
>        push @args, " --tcnsubfield"; push @args, "'" .
> $config->{source_record}->{tcn}->{subfield} . "'";
>    }
>    if (defined $config->{source_record}->{record_id}->{tag}) {
>        push @args, " --idfield"; push @args, "'" .
> $config->{source_record}->{record_id}->{tag} . "'";
>                if (defined
> $config->{source_record}->{record_id}->{subfield}) {
>                          push @args, " --idsubfield"; push @args, "'" .
> $config->{source_record}->{record_id}->{subfield} . "'";
>                }
>    }
> ...
>
> with the config file changed to:
> ...
>  <source_record>
>    <tcn>
>      <!-- Which field has the title control number?
>           Commonly used are 001 (leave 'subfield' blank)
>           or 035 subfield 'a'
>           Note that the Evergreen loader will create record IDs, which are
>           a different beast (see below).
>        -->
>      <tag>001</tag>
>      <subfield></subfield>
>    </tcn>
>    <record_id>
>      <!-- Optionally specify this field if you want your
>              Evergreen internal record IDs to match your old
>              system ids (for easier linking to other data)
>        -->
>      <tag>035</tag>
>      <subfield>a</subfield>
>    </record_id>
> ...
>
> I may have made a typo or two, but you get the idea.  I was also
> uncomfortable with calling TCNs "system-unique ids", since they should by
> design be inherent to the record itself, not unique to any particular
> system, but I have no real intention of arguing semantics if things work :)
>
> Hope this helps, and thanks again,
> DW
>
>


-- 
WARNING: You are logged into reality as root....
#!/usr/bin/perl -W

use strict;
use Data::Dumper;
use Getopt::Long;
use XML::Simple qw(:strict);
use Log::Dispatch;
use Log::Dispatch::File;
use Log::Dispatch::Screen;
use File::Inplace;
use DBI;
use Cwd;

my @stages = qw(m2b b2i i2s edsql load qmm hold clrstg inshold gencop meta);
my %fcntable = ("m2b"     => \&marc_to_bre,
		"b2i"     => \&bre_to_ingest,
		"i2s"     => \&ingest_to_sql,
		"edsql"   => \&edit_sql,
		"load"    => \&loader_v2,
		"qmm"     => \&quick_metamap_v2,
		"hold"    => \&parse_holdings,
		"clrstg"  => \&clear_staging_table,
		"inshold" => \&insert_holdings,
		"gencop"  => \&generate_copies_v2,
		"meta"    => \&add_metarecords_v2
    );
 
my $flag_clear = 0;
my $flag_help = 0;
my $records_file = "test.mrc";
my $start_at_stage = "m2b";
my $stop_at_stage = "end";
my $result = GetOptions("file=s"     => \$records_file,
			"start=s"    => \$start_at_stage,
			"stop=s"     => \$stop_at_stage,
			"clear"      => \$flag_clear,          # clear database before load
			"help"       => \$flag_help,
    );

if ($flag_help) {
    print "usage: $0 --file filename [--start start-stage] [--stop stop-stage] [--clear] [--help]\n";
    print "\tLoad a file of MARC records into your Evergreen system.\n";
    print "\tconfig file is eg_batch_loader_cfg.xml\n";
    print "\n";
    print "\toptions:\n";
    print "\t\t--start: specify which processing stage to start at (defaults to 'm2b')\n";
    print "\t\t--stop : specify which processing stage to stop at (defaults to 'end')\n";
    print "\t\t--clear: WARNING!  This will delete all bibs and holdings from your database!\n";
    print "\n";
    print "\tstages of processing:\n";
    print "\t\tm2b    : marc to bre conversion\n";
    print "\t\tb2i    : bre to ingest conversion\n";
    print "\t\ti2s    : ingest to sql conversion\n";
    print "\t\tedsql  : edit that sql file to allow it to commit\n";
    print "\t\tload   : import the records into postgres\n";
    print "\t\tqmm    : do the quick metarecord map\n";
    print "\t\thold   : parse holdings from your original marc file\n";
    print "\t\tclrstg : clear the holdings staging table\n";
    print "\t\tinshold: insert holdings into staging table\n";
    print "\t\tgencop : generate copies\n";
    print "\t\tmeta   : add metarecord information to db\n";
    print "\n";
    print "\ttransactions are logged to the file (and at the level) specified in your config file\n";
    print "\n";
    exit 0;
}

my $xs = XML::Simple->new( ForceArray => 0, 
			   KeyAttr => [],
			   SuppressEmpty => undef,
    );
my $config = $xs->XMLin('eg_batch_loader_cfg.xml');

my $dispatcher = Log::Dispatch->new;
$dispatcher->add( Log::Dispatch::File->new( name => 'file1',
					    min_level => $config->{logging}->{file}->{loglevel},
					    filename =>  $config->{logging}->{file}->{logfile} 
		  ) 
    );
$dispatcher->add( Log::Dispatch::Screen->new( name      => 'screen',
					      min_level => $config->{logging}->{screen}->{loglevel},
					      stderr    => 1
		  )
    );

$dispatcher->log( level => 'info',
		  message => "INFO: Database: name [" . $config->{database}->{db_name} . "]\n" );
$dispatcher->log( level => 'debug', 
		  message => "DEBUG: " . Dumper($config) );

if ($flag_clear) { clear_database() };

my $started = 0;

foreach my $stage (@stages) {
    last if ($stop_at_stage eq $stage);
    if (($started) || ($start_at_stage eq $stage))    {
	$started = 1;
	if ( 0 != $fcntable{$stage}->() ) { 
	    $dispatcher->log( level => 'error',
			      message => "ERR: $stage failed.\n");
	    exit(1)
	};
    } else {
	$dispatcher->log( level => 'notice', 
			  message => "NOTICE: Skipping $stage\n" ); 
    }
}

exit(0);


#
# Stage 1
#
sub marc_to_bre {
    my $msg_command_name = "MARC to BRE conversion";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");

    my $starting_id = get_starting_id();
    if (defined $starting_id) {
	$dispatcher->log( level => 'debug',
			  message => "DEBUG: starting id: " . $starting_id . "\n" );
    } else {
	$dispatcher->log( level => 'error',
			  message => "ERR: unable to get starting id\n" );
	return 1;
    }

    my $command = $config->{paths}->{marc2bre} . "/marc2bre.pl";
    my @args;
    push @args, "--marctype"; push @args, "USMARC";
    push @args, "--db_name";  push @args, $config->{database}->{db_name};
    push @args, "--db_user";  push @args, $config->{database}->{db_user};
    push @args, "--db_pw" ;   push @args, $config->{database}->{db_pw};
    push @args, "--startid";  push @args, $starting_id;
    push @args, "--tcnfield"; push @args, "'" . $config->{source_record}->{tcn}->{tag} . "'";
    if (defined $config->{source_record}->{tcn}->{subfield}) {
	push @args, " --tcnsubfield"; push @args, "'" . $config->{source_record}->{tcn}->{subfield} . "'";
    }
    if (defined $config->{source_record}->{record_id}->{tag}) {
	push @args, " --idfield"; push @args, "'" . $config->{source_record}->{record_id}->{tag} . "'";
	if (defined $config->{source_record}->{record_id}->{subfield}) {
	    push @args, " --idsubfield"; push @args, "'" . $config->{source_record}->{record_id}->{subfield} . "'";
	}
    }
    push @args, "--trash";    push @args, $config->{source_record}->{holdings}->{tag};  # holdings aren't generated from the BRE file.
    push @args, $records_file;
    $command .= " @args >$records_file.bre";
    
    $dispatcher->log( level => 'info',
		      message => "INFO: marc2bre command: " . $command . "\n" );
    return run_command( $command, $msg_command_name );
}

#
# Stage 2
#
sub bre_to_ingest {
#perl direct_ingest.pl sample_marc.bre > sample_marc.ingest
    my $msg_command_name = "BRE to INGEST conversion";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $command = $config->{paths}->{direct_ingest} . "/direct_ingest.pl";
    my @args;
    push @args, "$records_file.bre";
    $command .= " @args >$records_file.ingest";
    
    $dispatcher->log( level => 'info',
		      message => "INFO: direct_ingest command: " . $command . "\n" );
    
    return run_command( $command, $msg_command_name );
}


#
# Stage 3
#
sub ingest_to_sql {
    my $msg_command_name = "INGEST to SQL conversion";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $command = $config->{paths}->{parallel_pg_loader} . "/parallel_pg_loader.pl";
    my @args;

    push @args, "-order"; push @args, "bre";
    push @args, "-order"; push @args, "mrd"; 
    push @args, "-order"; push @args, "mfr";
    push @args, "-order"; push @args, "mtfe";
    push @args, "-order"; push @args, "mafe";
    push @args, "-order"; push @args, "msfe";
    push @args, "-order"; push @args, "mkfe";
    push @args, "-order"; push @args, "msefe";
    push @args, "-autoprimary"; push @args, "mrd";
    push @args, "-autoprimary"; push @args, "mfr";
    push @args, "-autoprimary"; push @args, "mtfe";
    push @args, "-autoprimary"; push @args, "mafe";
    push @args, "-autoprimary"; push @args, "msfe";
    push @args, "-autoprimary"; push @args, "mkfe";
    push @args, "-autoprimary"; push @args, "msefe";
    push @args, "-output"; push @args, "$records_file"; # this is the output file basename; gets appended with ".bre.sql", ".mrd.sql", ".sql", etc

    $command .= " @args <$records_file.ingest";
    
    $dispatcher->log( level => 'info',
		      message => "INFO: direct_ingest command: " . $command . "\n" );
    
    return run_command( $command, $msg_command_name );
}


#
# Edit file in-place
#
sub edit_sql {
    # Ok, File::Inplace is a bit of overkill here... but the module's available, and
    # we may want to use it for some more tricky stuff later :-)
    my $msg_command_name = "Edit SQL to change --COMMIT to COMMIT";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $name = "$records_file.sql";

    my $editor = new File::Inplace(file => $name);
    while (my ($line) = $editor->next_line) {
	if ($line =~ /^-- COMMIT;$/) {
	    $editor->replace_line("COMMIT;");
	}
    }
    $editor->commit;

    $dispatcher->log( level => 'info',
		      message => "INFO: Completed $msg_command_name\n");
    return (0);
}

#
# Stage 5
#
sub loader_v2 {
    my $rv = 0; # return value of 0 indicates success
    my $SQL;
    my $msg_command_name = "Loading";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");

    my $cwd = getcwd();

    my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
			   $config->{database}->{db_user},
			   $config->{database}->{db_pw},
			   {AutoCommit => 0}
	);
    if (defined $dbh) {
	$dispatcher->log( level => 'debug',
			  message => "DEBUG: connected to database\n" );

	$dbh->{AutoCommit} = 0;  # enable transactions, if possible
	$dbh->{RaiseError} = 1;
	eval {
	    $SQL = "SET CLIENT_ENCODING TO 'UNICODE'";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "COPY biblio.record_entry (active,create_date,creator,deleted,edit_date,editor,fingerprint,id,last_xact_id,marc,quality,source,tcn_source,tcn_value) FROM '" . $cwd . "/" . $records_file . ".bre.sql'";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "COPY metabib.rec_descriptor (audience,bib_level,cat_form,char_encoding,control_type,enc_level,item_form,item_lang,item_type,lit_form,pub_status,record,type_mat,vr_format) FROM '" . $cwd . "/" . $records_file . ".mrd.sql'";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "COPY metabib.keyword_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".mkfe.sql'";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "COPY metabib.author_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".mafe.sql'";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "COPY metabib.title_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".mtfe.sql'";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "COPY metabib.full_rec (ind1,ind2,record,subfield,tag,value) FROM '" . $cwd . "/" . $records_file . ".mfr.sql'";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "COPY metabib.subject_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".msfe.sql'";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "COPY metabib.series_field_entry (field,source,value) FROM '" . $cwd . "/" . $records_file . ".msefe.sql'";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $dbh->commit;   # commit the changes if we get this far
	};
	if ($@) {
	    $dispatcher->log( level => 'error',
			      message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
	    # now rollback to undo the incomplete changes
	    # but do it in an eval{} as it may also fail
	    eval { $dbh->rollback };
	    # add other application on-error-clean-up code here
	    $rv = 1;
	}
	$dbh->disconnect();

    } else {
	$dispatcher->log( level => 'error',
			  message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
	$rv = 1;
    }
    return $rv;

}


#
# Stage 6
#
sub quick_metamap_v2 {
    my $rv = 0; # return value of 0 indicates success
    my $SQL;
    my $msg_command_name = "Quick metarecord map";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
			   $config->{database}->{db_user},
			   $config->{database}->{db_pw},
			   {AutoCommit => 0}
	);
    if (defined $dbh) {
	$dispatcher->log( level => 'debug',
			  message => "DEBUG: connected to database\n" );

	$dbh->{AutoCommit} = 0;  # enable transactions, if possible
	$dbh->{RaiseError} = 1;
	eval {
	    $SQL = "ALTER TABLE metabib.metarecord_source_map DROP CONSTRAINT metabib_metarecord_source_map_metarecord_fkey";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    $SQL = "TRUNCATE metabib.metarecord";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    $SQL = "TRUNCATE metabib.metarecord_source_map";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    $SQL = <<'EOSql';
INSERT INTO metabib.metarecord (fingerprint,master_record)
 SELECT fingerprint,id
 FROM (SELECT DISTINCT ON (fingerprint) fingerprint, id, quality
       FROM biblio.record_entry ORDER BY fingerprint, quality desc
      ) AS x
 WHERE fingerprint IS NOT NULL
EOSql
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    $SQL = <<'EOSql';
INSERT INTO metabib.metarecord_source_map (metarecord,source)
 SELECT m.id, b.id
 FROM biblio.record_entry b
 JOIN metabib.metarecord m ON (m.fingerprint = b.fingerprint)
EOSql
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    $SQL = "ALTER TABLE metabib.metarecord_source_map ADD CONSTRAINT metabib_metarecord_source_map_metarecord_fkey FOREIGN KEY (metarecord) REFERENCES metabib.metarecord (id)";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    $dbh->commit;   # commit the changes if we get this far
	};
	if ($@) {
	    $dispatcher->log( level => 'error',
			      message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
	    # now rollback to undo the incomplete changes
	    # but do it in an eval{} as it may also fail
	    eval { $dbh->rollback };
	    # add other application on-error-clean-up code here
	    $rv = 1;
	}
	$dbh->{AutoCommit} = 1;  # disable transactions in order to do the vacuuming
	$SQL = "VACUUM FULL ANALYZE VERBOSE metabib.metarecord";
	$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	$dbh->do( $SQL );

	$SQL = "VACUUM FULL ANALYZE VERBOSE metabib.metarecord_source_map";
	$dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	$dbh->do( $SQL );

	$dbh->disconnect();

    } else {
	$dispatcher->log( level => 'error',
			  message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
	$rv = 1;
    }
    return $rv;
}



#
# Stage 7
#
sub parse_holdings {
    # NOTE: generates file "insert_holdings.sql"

    my $msg_command_name = "Parse holdings";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $command = $config->{paths}->{parse_holdings} . "/eg_batch_loader_parse_holdings.pl";

    my @args;
    push @args, "--file";  push @args, $records_file;
    push @args, "--bibidtag"; push @args, $config->{source_record}->{tcn}->{tag};
    if (defined $config->{source_record}->{tcn}->{subfield}) {
	push @args, "--bibidsfd"; push @args, $config->{source_record}->{tcn}->{subfield};
    }
    push @args, "--holdingstag"; push @args, $config->{source_record}->{holdings}->{tag};
    push @args, "--callno";      push @args, $config->{source_record}->{holdings}->{callno};
    push @args, "--copyno";      push @args, $config->{source_record}->{holdings}->{copyno};
    push @args, "--barcode";     push @args, $config->{source_record}->{holdings}->{barcode};
    push @args, "--location";    push @args, $config->{source_record}->{holdings}->{location};
    push @args, "--library";     push @args, $config->{source_record}->{holdings}->{owning_library};
    push @args, "--itemtype";    push @args, $config->{source_record}->{holdings}->{itemtype};
    push @args, "--created";     push @args, $config->{source_record}->{holdings}->{created};
    $command .= " @args";
    
    $dispatcher->log( level => 'info',
		      message => "INFO: parse holdings command: " . $command . "\n" );
    
    return run_command( $command, $msg_command_name );
}

#
# Stage 8
#
sub clear_staging_table {
    my $rv = 0; # return value of 0 indicates success
    my $SQL;
    my $msg_command_name = "Clear staging table";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
			   $config->{database}->{db_user},
			   $config->{database}->{db_pw},
			   {AutoCommit => 0}
	);
    if (defined $dbh) {
	$dispatcher->log( level => 'debug',
			  message => "DEBUG: connected to database\n" );

	$dbh->{AutoCommit} = 0;  # enable transactions, if possible
	$dbh->{RaiseError} = 1;
	eval {
	    $SQL = "DELETE FROM staging_items";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $dbh->commit;   # commit the changes if we get this far
	};
	if ($@) {
	    $dispatcher->log( level => 'error',
			      message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
	    # now rollback to undo the incomplete changes
	    # but do it in an eval{} as it may also fail
	    eval { $dbh->rollback };
	    # add other application on-error-clean-up code here
	    $rv = 1;
	}
	$dbh->disconnect();

    } else {
	$dispatcher->log( level => 'error',
			  message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
	$rv = 1;
    }
    return $rv;

}


#
# Stage 9
#
sub insert_holdings {
    # psql -U evergreen -f insert_holdings.sql evergreen

    my $msg_command_name = "Insert holdings";
    my $name = "insert_holdings.sql";

    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $command = "psql";
    my @args;
    push @args, "-U"; push @args, $config->{database}->{db_user};
    push @args, "-f"; push @args, $name;
    push @args, $config->{database}->{db_name};
    $command .= " @args";
    
    $dispatcher->log( level => 'info',
		      message => "INFO: insert holdings command: " . $command . "\n" );
    
    $dispatcher->log( level => 'info',
		      message => "INFO: Reminder - your database password is '" . $config->{database}->{db_pw} . "'\n" );
    
    return run_command( $command, $msg_command_name );
}



#
# Stage 10
#
sub generate_copies_v2 {
    my $rv = 0; # return value of 0 indicates success
    my $SQL;
    my $msg_command_name = "Generate copies";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
			   $config->{database}->{db_user},
			   $config->{database}->{db_pw},
			   {AutoCommit => 0}
	);
    if (defined $dbh) {
	$dispatcher->log( level => 'debug',
			  message => "DEBUG: connected to database\n" );

	$dbh->{AutoCommit} = 0;  # enable transactions, if possible
	$dbh->{RaiseError} = 1;
	eval {
	    # -- First, we build shelving location
	    # DAVID - ONLY DO THIS IF IT HASN'T BEEN ADDED YET!
#	    $SQL = <<'EOSql';
#INSERT INTO asset.copy_location (name, owning_lib)
#        SELECT  DISTINCT l.location, ou.id
#          FROM  staging_items l JOIN actor.org_unit ou
#                        ON (l.owning_lib = ou.shortname)
#EOSql
#	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
#	    $dbh->do( $SQL );
	    
	    # -- Import call numbers for bibrecord->library mappings
	    $SQL = <<'EOSql';
INSERT INTO asset.call_number (creator,editor,record,label,owning_lib)
        SELECT  DISTINCT 1, 1, l.bibkey , l.callnum, ou.id
          FROM  staging_items l
                JOIN biblio.record_entry b ON (l.bibkey = b.id)
                JOIN actor.org_unit ou ON (l.owning_lib = ou.shortname)
EOSql
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    # -- Import base copy data
	    $SQL = <<'EOSql';
INSERT INTO asset.copy (
        circ_lib, creator, editor, create_date, barcode,
        status, location, loan_duration,
        fine_level, circ_modifier, deposit, ref, call_number)
        SELECT  DISTINCT ou.id AS circ_lib,
                1 AS creator,
                1 AS editor,
                l.createdate AS create_date,
                l.barcode AS barcode,
                0 AS status,
                cl.id AS location,
                2 AS loan_duration,
                2 AS fine_level,
                CASE
                        WHEN l.item_type IN ('REFERENCE', 'DEPOSIT_BK') THEN 'BOOK'
                        ELSE l.item_type
                END AS circ_modifier,
                CASE
                        WHEN l.item_type = 'DEPOSIT_BK' THEN TRUE
                        ELSE FALSE
                END AS deposit,
                CASE
                        WHEN l.item_type = 'REFERENCE' THEN TRUE
                        ELSE FALSE
                END AS ref,
                cn.id AS call_number
          FROM  staging_items l
                JOIN actor.org_unit ou
                        ON (l.owning_lib = ou.shortname)
                JOIN asset.copy_location cl
                        ON (ou.id = cl.owning_lib AND l.location = cl.name)
                JOIN asset.call_number cn
                        ON (ou.id = cn.owning_lib AND l.bibkey = cn.record AND l.callnum = cn.label)
EOSql
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    $dbh->commit;   # commit the changes if we get this far
	};
	if ($@) {
	    $dispatcher->log( level => 'error',
			      message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
	    # now rollback to undo the incomplete changes
	    # but do it in an eval{} as it may also fail
	    eval { $dbh->rollback };
	    # add other application on-error-clean-up code here
	    $rv = 1;
	}
	$dbh->disconnect();

    } else {
	$dispatcher->log( level => 'error',
			  message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
	$rv = 1;
    }
    return $rv;
}

#
# Stage 11
#
sub add_metarecords_v2 {
    my $rv = 0; # return value of 0 indicates success
    my $SQL;
    my $msg_command_name = "Add metarecords";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
			   $config->{database}->{db_user},
			   $config->{database}->{db_pw},
			   {AutoCommit => 0}
	);
    if (defined $dbh) {
	$dispatcher->log( level => 'debug',
			  message => "DEBUG: connected to database\n" );

	$dbh->{AutoCommit} = 0;  # enable transactions, if possible
	$dbh->{RaiseError} = 1;
	eval {
	    $SQL = <<'EOSql';
INSERT INTO metabib.metarecord (fingerprint, master_record)
 SELECT  b.fingerprint, b.id
 FROM  biblio.record_entry b
 WHERE b.id IN (SELECT r.id FROM biblio.record_entry r
                       LEFT JOIN metabib.metarecord_source_map k
                       ON (k.source = r.id)
                       WHERE k.id IS NULL AND r.fingerprint IS NOT NULL)
 AND NOT EXISTS ( SELECT 1 FROM metabib.metarecord
                           WHERE fingerprint = b.fingerprint )
EOSql
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    $SQL = <<'EOSql';
INSERT INTO metabib.metarecord_source_map (metarecord, source)
 SELECT  m.id, r.id
 FROM  biblio.record_entry r
 JOIN metabib.metarecord m USING (fingerprint)
 WHERE r.id IN (SELECT b.id FROM biblio.record_entry b
                LEFT JOIN metabib.metarecord_source_map k
                ON (k.source = b.id) WHERE k.id IS NULL)
EOSql
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );

	    $dbh->commit;   # commit the changes if we get this far
	};
	if ($@) {
	    $dispatcher->log( level => 'error',
			      message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
	    # now rollback to undo the incomplete changes
	    # but do it in an eval{} as it may also fail
	    eval { $dbh->rollback };
	    # add other application on-error-clean-up code here
	    $rv = 1;
	}
	$dbh->disconnect();

    } else {
	$dispatcher->log( level => 'error',
			  message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
	$rv = 1;
    }
    return $rv;
}


sub clear_database {
    my $rv = 0; # return value of 0 indicates success
    my $SQL;
    my $msg_command_name = "CLEAR DATABASE!!!  Deleting all records and holdings.";
    $dispatcher->log( level => 'info',
		      message => "INFO: Beginning $msg_command_name\n");
    my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
			   $config->{database}->{db_user},
			   $config->{database}->{db_pw},
			   {AutoCommit => 0}
	);
    if (defined $dbh) {
	$dispatcher->log( level => 'debug',
			  message => "DEBUG: connected to database\n" );

	$dbh->{AutoCommit} = 0;  # enable transactions, if possible
	$dbh->{RaiseError} = 1;
	eval {
	    $SQL = "CREATE TEMP TABLE special_bib AS SELECT * FROM biblio.record_entry WHERE id = -1";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "CREATE TEMP TABLE special_volume AS SELECT * FROM asset.call_number WHERE id = -1";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "TRUNCATE biblio.record_entry CASCADE";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "INSERT INTO biblio.record_entry SELECT * FROM special_bib";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $SQL = "INSERT INTO asset.call_number SELECT * FROM special_volume";
	    $dispatcher->log( level => 'debug', message => "DEBUG: $SQL\n" );
	    $dbh->do( $SQL );
	    
	    $dbh->commit;   # commit the changes if we get this far
	};
	if ($@) {
	    $dispatcher->log( level => 'error',
			      message => "ERR: transaction aborted because [EMAIL PROTECTED]" );
	    # now rollback to undo the incomplete changes
	    # but do it in an eval{} as it may also fail
	    eval { $dbh->rollback };
	    # add other application on-error-clean-up code here
	    $rv = 1;
	}
	$dbh->disconnect();

    } else {
	$dispatcher->log( level => 'error',
			  message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
	$rv = 1;
    }
    return $rv;
}



#
#
#
sub get_starting_id {
    # SELECT MAX(id)+1 FROM biblio.record_entry;
    my $id = undef;
    my $SQL = 'SELECT MAX(id)+1 FROM biblio.record_entry';
    my $dbh = DBI->connect("dbi:Pg:dbname=" . $config->{database}->{db_name},
			   $config->{database}->{db_user},
			   $config->{database}->{db_pw},
			   {AutoCommit => 0}
	);
    if (defined $dbh) {
	$dispatcher->log( level => 'debug',
			  message => "DEBUG: connected to database\n" );
	my $ary_ref = $dbh->selectrow_arrayref( $SQL );
	if (defined $ary_ref) {
	    $id = $ary_ref->[0];
	    $dbh->disconnect();
	} else {
	    $dispatcher->log( level => 'error',
			      message => "ERR: query failed! -- " . $SQL . "\n" );
	}

    } else {
	$dispatcher->log( level => 'error',
			  message => "ERR: could not connect to database: " . $DBI::errstr . "\n" );
    }
    return $id;
}




#
# Run the command and check for success/failure
#
sub run_command {
    my ($command, $msg_command_name) = @_;

    if ( system( $command ) ) {
	# returned non-zero... Houston, we have a problem.
	if ($? == -1) {
	    $dispatcher->log( level => 'error', 
			      message => "ERR: failed to execute: $!\n" );
	} elsif ($? & 127) {
	    $dispatcher->log( level => 'error', 
			      message => sprintf("ERR: child died with signal %d, %s coredump\n", ($? & 127), ($? & 128) ? 'with' : 'without') );
	    
	} else {
	    $dispatcher->log( level => 'error', 
			      message => sprintf("ERR: child exited with value %d\n", $? >> 8) );
	}
	
	$dispatcher->log( level => 'error',
			  message => "ERR: $msg_command_name did not complete\n");
	#exit(1);
    } else {
	# completed ok
	$dispatcher->log( level => 'info',
			  message => "INFO: Completed $msg_command_name\n");
    }

    return $?;
}



<config>
  <logging>
    <file>
      <logfile>ebl.log</logfile>
      <!-- loglevel can be one of:
	   debug
	   info
	   notice
	   warning
	   error
	   critical
	   alert
	   emergency
	   
	   (though in practice, you'll likely specify warning or notice)
	-->
      <loglevel>debug</loglevel>
    </file>

    <screen>
      <loglevel>info</loglevel>
    </screen>
  </logging>

  <database>
    <db_name>evergreen</db_name>
    <db_user>evergreen</db_user>
    <db_pw>evergreen</db_pw>
  </database>

  <paths>
    <!-- paths to various utilities -->
    <marc2bre>.</marc2bre>
    <direct_ingest>.</direct_ingest>
    <parallel_pg_loader>.</parallel_pg_loader>
    <parse_holdings>.</parse_holdings>
  </paths>

  <source_record>
    <tcn>
      <!-- Which field has the TCN (title control number)?
	   Commonly used are 001 (leave 'subfield' blank)
	   or 035 subfield 'a'
	   Note that the Evergreen loader will create record IDs, which are
	   a different beast (see below).
	-->
      <tag>001</tag>
      <subfield></subfield>
    </tcn>

    <record_id>
      <!-- Optionally specify this field if you want your
           Evergreen internal record IDs to match your old
           system ids (for easier linking to other data)
	-->
      <tag>035</tag>
      <subfield>a</subfield>
    </record_id>

    <holdings>
      <tag>852</tag>
      <callno>h</callno>
      <copyno>t</copyno>
      <barcode>p</barcode>
      <owning_library>b</owning_library>
      <location>b</location>
      <itemtype>a</itemtype>
      <created>d</created>
    </holdings>
  </source_record>
    
</config>
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;

# vim:noet:ts=4:sw=4:

# DC - Our bibids are placed in subfield 001
# Our holdings are stored in field 852 with the following structure (roughly):
# h - call number
# t - copy number
# p - barcode
# b - library
# b - location
# a - item type
# d - create date

my $filename = "test.mrc";
my $bibidtag = '001';
my $bibidsfd;
my $src_holdingstag = '852';
my $src_callno = 'h';
my $src_copyno = 't';
my $src_barcode = 'p';
my $src_location = 'b';
my $src_library = 'b';
my $src_itemtype = 'a';
my $src_date_created = 'd';
my $flag_help = 0;
my $result = GetOptions("file=s"     => \$filename,
			"bibidtag=s" => \$bibidtag,
			"bibidsfd=s" => \$bibidsfd,
			"holdingstag=s" => \$src_holdingstag,
			"callno=s"   => \$src_callno,
			"copyno=s"   => \$src_copyno,
			"barcode=s"  => \$src_barcode,
			"location=s" => \$src_location,
			"library=s"  => \$src_library,
			"itemtype=s" => \$src_itemtype,
			"created=s"  => \$src_date_created,
			"help"       => \$flag_help,
    );

if ($flag_help) {
    print "usage: $0 --file filename [--bibtag xxx] [--callno x] [--copyno x] [--barcode x]\n";
    print "       [--location x] [--library x] [--itemtype x] [--created x] [--help]\n";
    print "\n";
    print "\tParse the source marc file for holdings information\n";
    print "\n";
    print "\tOptions:\n";
    print "\t\tbibidtag: record ID tag (035)\n";
    print "\t\tbibidsfd: record ID subfield (a)\n";
    print "\n";
    print "\t\tholdingstag: MARC holdings tag (852)\n";
    print "\t\tcallno  : subfield containing call number (h)\n";
    print "\t\tcopyno  : subfield containing copy number (t)\n";
    print "\t\tbarcode : subfield containing barcode (p)\n";
    print "\t\tlocation: subfield containing location (b)\n";
    print "\t\tlibrary : subfield contianing owning library (b)\n";
    print "\t\titemtype: subfield contianing item type (a)\n";
    print "\t\tcreated : subfield containing creation date (d)\n";
    print "\n";
    print "\tGenerates 'insert_holdings.sql' file.\n";
    exit 0;
}


use MARC::Batch;
use MARC::File::XML (BinaryEncoding => "utf8", RecordFormat => "USMARC");

my $sql_file = 'insert_holdings.sql';

# We'll dump our holdings entries in this list
my @holdings;

#my $batch = MARC::Batch->new('XML', $holdings_file);
my $batch = MARC::Batch->new('USMARC', $filename);

parse_holdings($batch, [EMAIL PROTECTED]);
format_holdings_for_insert([EMAIL PROTECTED]);

exit(0);

sub parse_holdings {
    my $batch = shift;
    my $holdings = shift;
    
    my $record;
    while ($record = $batch->next()) {
	my $bibid_fld = $record->field( $bibidtag );
	if ($bibid_fld) {
	    my $bibid;
	    if ($bibid_fld->is_control_field()) {
		$bibid = $bibid_fld->data();
	    } else {
		$bibid = $bibid_fld->subfield( $bibidsfd );
	    }
	    $bibid =~ s/\D+//gso;

	    my @copies = $record->field( $src_holdingstag );
	    foreach my $copy (@copies) {
		my %copy_attributes;
		$copy_attributes{'bibid'} = $bibid;
		$copy_attributes{'callnum'}    = escape_quotes($copy->subfield( $src_callno ));
		$copy_attributes{'barcode'}    = escape_quotes($copy->subfield( $src_barcode ));
		$copy_attributes{'location'}   = escape_quotes($copy->subfield( $src_location ));
		$copy_attributes{'library'}    = escape_quotes( uc $copy->subfield( $src_library ));
		$copy_attributes{'item_type'}  = escape_quotes($copy->subfield( $src_itemtype ));
		$copy_attributes{'createdate'} = escape_quotes($copy->subfield( $src_date_created ));
		push(@$holdings, \%copy_attributes);
	    }
	} else {
	    # need to log something about a missing id...
	}
    }
}

sub format_holdings_for_insert {
	my $holdings = shift;

	open(SQLFH, '>', $sql_file) or die $!;
	binmode(SQLFH, ":utf8");

# ORIGINAL COMMENTS FROM 'parse_holdings.pl':
# We could do the inserts inside a transaction in case of problems
#
# Right now, the data contains an invalid date format (DD/M/YYYY)
# that PostgreSQL doesn't like (it wants YYYY-MM-DD), so we'll go
# ahead and just commit each row if possible
#
#	print SQLFH "BEGIN;\n\n";

	foreach my $holding (@$holdings) {
		printf SQLFH "INSERT INTO staging_items (bibkey, callnum, createdate, location, barcode, item_type, owning_lib) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s');\n", 
		       $holding->{'bibid'},
		       $holding->{'callnum'},
		       $holding->{'createdate'},
		       $holding->{'location'},
		       $holding->{'barcode'},
		       $holding->{'item_type'},
		       $holding->{'library'}
		;
	}

#	print SQLFH "\nCOMMIT;";

	close SQLFH;
}

sub escape_quotes {
	my $string = shift;
	$string =~ s/'/''/g;
	return $string;
}

Reply via email to