Modified: trunk/opsview-core/bin/ndoutils_configdumpend
===================================================================
--- trunk/opsview-core/bin/ndoutils_configdumpend 2013-08-14 11:08:21 UTC (rev 13232)
+++ trunk/opsview-core/bin/ndoutils_configdumpend 2013-08-14 16:25:59 UTC (rev 13233)
@@ -42,8 +42,9 @@
$0 = "ndoutils_configdumpend";
my $logger = Log::Log4perl->get_logger($0);
+my $MAX_RETRIES = 5;
+
my $dbh = Runtime->db_Main;
-$dbh->{HandleError} = sub { $logger->fatal(shift) };
$logger->info( "Start" );
my $opsview_db = Opsview::Config->db;
@@ -61,8 +62,9 @@
$new_table = $org_table . '_tmp';
}
$logger->debug( 'Copying ' . $org_table . ' to ' . $new_table );
- $dbh->do( 'DROP TABLE IF EXISTS ' . $new_table );
- $dbh->do( $create . ' TABLE ' . $new_table . ' LIKE ' . $org_table );
+ deadlock_protected( 'DROP TABLE IF EXISTS ' . $new_table );
+ deadlock_protected(
+ $create . ' TABLE ' . $new_table . ' LIKE ' . $org_table );
return $new_table;
}
@@ -71,37 +73,71 @@
( my $new_table = $org_table ) =~ s/_tmp$//;
my $old_table = $new_table . '_old';
$logger->debug( 'Renaming ' . $org_table . ' to ' . $new_table );
- $dbh->do( 'RENAME TABLE '
+ deadlock_protected( 'RENAME TABLE '
. $new_table . ' TO '
. $old_table . ', '
. $org_table . ' TO '
. $new_table );
$logger->debug( 'Dropping ' . $old_table );
- $dbh->do( 'DROP TABLE ' . $old_table );
+ deadlock_protected( 'DROP TABLE ' . $old_table );
}
+sub deadlock_protected {
+ my @args = @_;
+ my $try = 0;
+ SQL_DO: eval {
+ $try++;
+ $logger->warn("Restarting query due to deadlock, try: $try.\n")
+ if $try > 1;
+ if ( @args == 1 && ref $args[0] eq 'CODE' ) {
+ $args[0]->();
+ }
+ else {
+ $dbh->do(@args);
+ }
+ };
+ if ( my $e = $@ ) {
+ chomp $e;
+ if ( $e =~ /Deadlock.*transaction/ ) {
+ my $wait_for = $MAX_RETRIES - $try + 1;
+ $logger->warn( "Deadlock detected, retry in ${wait_for}s.\n$e\n" );
+ sleep $wait_for;
+ goto SQL_DO unless $try >= $MAX_RETRIES;
+ }
+ else {
+ $logger->fatal($e);
+ }
+ }
+}
+
my $table;
my $temp_table;
# Matches contacts between runtime and opsview
-$table = copy_table( 'opsview_contacts' );
-$dbh->do( "
-INSERT INTO $table
-SELECT c.id, o.object_id, c.name
-FROM
- $opsview_db.contacts c,
- nagios_objects o
-WHERE
- o.objecttype_id = 10
- AND binary c.name = o.name1
-" );
-rename_table($table);
+deadlock_protected(
+ sub {
+ $table = copy_table('opsview_contacts');
+ $dbh->do( "
+ INSERT INTO $table
+ SELECT c.id, o.object_id, c.name
+ FROM
+ $opsview_db.contacts c,
+ nagios_objects o
+ WHERE
+ o.objecttype_id = 10
+ AND binary c.name = o.name1
+ " );
+ rename_table($table);
+ }
+);
# Matches contacts with services based on contactgroups
# Note: if a contact is marked against a service manually, this will not get picked up
# Note: duplicate rows are ignored because of the intermediate table
-$table = copy_table( 'opsview_contact_services' );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table('opsview_contact_services');
+ $dbh->do( "
INSERT INTO $table
SELECT DISTINCT c.id, s.service_object_id
FROM
@@ -117,24 +153,29 @@
AND scg.service_id = s.service_id
AND s.config_type = 1
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
# Copies hostgroup table from opsview
-$table = copy_table( 'opsview_hostgroups' );
-$dbh->do(
- "INSERT INTO $table SELECT id, parentid, name, lft, rgt, matpath, matpathid FROM $opsview_db.hostgroups"
+deadlock_protected(
+ sub {
+ $table = copy_table('opsview_hostgroups');
+ $dbh->do(
+ "INSERT INTO $table SELECT id, parentid, name, lft, rgt, matpath, matpathid FROM $opsview_db.hostgroups"
+ );
+ rename_table($table);
+ }
);
-rename_table($table);
-$dbh->do(
- "CREATE TEMPORARY TABLE temp_hosts (host_object_id int, hostname varchar(64))"
+deadlock_protected(
+ "CREATE TEMPORARY TABLE temp_hosts SELECT object_id AS host_object_id, name1 AS hostname FROM nagios_objects WHERE objecttype_id=1 AND is_active=1"
);
-$dbh->do(
- "INSERT INTO temp_hosts SELECT object_id, name1 FROM nagios_objects WHERE objecttype_id=1 AND is_active=1"
-);
-$table = copy_table( 'opsview_hostgroup_hosts' );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table('opsview_hostgroup_hosts');
+ $dbh->do( "
INSERT INTO $table
SELECT parent.id, o.host_object_id
FROM $opsview_db.hosts oh, opsview_hostgroups node, opsview_hostgroups parent, temp_hosts o
@@ -143,10 +184,14 @@
AND node.lft BETWEEN parent.lft AND parent.rgt
AND o.hostname = oh.name
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
-$table = copy_table( 'opsview_monitoringservers' );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table( 'opsview_monitoringservers' );
+ $dbh->do( "
INSERT INTO $table
SELECT monitoringservers.id, monitoringservers.name, monitoringservers.activated, monitoringservers.passive,
GROUP_CONCAT(hosts.name)
@@ -162,10 +207,14 @@
monitoringservers.host = hosts.id
GROUP BY monitoringservers.id
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
-$table = copy_table( 'opsview_monitoringclusternodes' );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table('opsview_monitoringclusternodes');
+ $dbh->do( "
INSERT INTO $table
SELECT monitoringclusternodes.id, oh.name, oh.ip
FROM
@@ -174,10 +223,15 @@
WHERE
monitoringclusternodes.host = oh.id
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
my $opsview_hosts_new = copy_table( 'opsview_hosts' );
-$dbh->do( "
+
+deadlock_protected(
+ sub {
+ $dbh->do( "
INSERT INTO $opsview_hosts_new
SELECT temp_hosts.host_object_id, oh.id, temp_hosts.hostname, oh.ip, oh.alias, icons.filename, oh.hostgroup, oh.monitored_by, h2mcn.primary_node, h2mcn.secondary_node, 0, 0, 0
FROM $opsview_db.hosts oh
@@ -187,21 +241,32 @@
WHERE temp_hosts.hostname = oh.name
AND oh.icon = icons.name
" );
-$dbh->do( "DROP TABLE temp_hosts" );
+ }
+);
+deadlock_protected( "DROP TABLE temp_hosts" );
+
# Update num_interfaces with number of active interfaces set for this host
$logger->debug( 'Updating num_interfaces' );
-$dbh->do( "
+
+deadlock_protected(
+ sub {
+ $dbh->do( "
UPDATE
$opsview_hosts_new hosts,
(SELECT hostid, COUNT(*) AS c FROM $opsview_db.hostsnmpinterfaces WHERE active=1 GROUP BY hostid) tt
SET hosts.num_interfaces=tt.c
WHERE tt.hostid=hosts.opsview_host_id
" );
+ }
+);
# Matches hosts with services, based on ndoutils' object ids
-$table = copy_table( 'opsview_host_services' );
-$dbh->do( "
+
+deadlock_protected(
+ sub {
+ $table = copy_table( 'opsview_host_services' );
+ $dbh->do( "
INSERT INTO $table
SELECT s.host_object_id, o.name1, o.object_id, o.name2, (s.notes_url != ''), sc.markdown_filter, sc.id, sc.servicegroup, hosts.icon_filename
FROM nagios_objects o
@@ -214,9 +279,11 @@
WHERE
o.objecttype_id=2 AND s.config_type = 1
" );
+ }
+);
# Above misses out multi-servicechecks. Have to do in two steps otherwise the LIKE brings in other servicechecks
-$dbh->do( "
+deadlock_protected( "
UPDATE $table
LEFT JOIN $opsview_db.servicechecks sc
ON servicename LIKE CONCAT(sc.name,': %')
@@ -227,7 +294,7 @@
# Update opsview_hosts' num_services. We don't rename this until after we populated opsview_host_services
$logger->debug( 'Updating num_services' );
-$dbh->do( "
+deadlock_protected( "
UPDATE
$opsview_hosts_new hosts,
(SELECT host_object_id AS id, COUNT(*) AS c FROM $table GROUP BY id) tt
@@ -239,8 +306,10 @@
rename_table($opsview_hosts_new);
rename_table($table);
-$table = copy_table( "opsview_servicechecks" );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table( "opsview_servicechecks" );
+ $dbh->do( "
INSERT INTO $table
SELECT DISTINCT(sc.id),sc.name,sc.description,(sc.attribute IS NOT NULL),(sc.checktype=1 || sc.checktype=5),sc.markdown_filter,sc.cascaded_from,sc.servicegroup
FROM
@@ -249,10 +318,14 @@
WHERE
opsview_host_services.servicecheck_id = sc.id
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
-$table = copy_table( 'opsview_servicegroups' );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table( 'opsview_servicegroups' );
+ $dbh->do( "
INSERT INTO $table
SELECT DISTINCT(sg.id),sg.name
FROM
@@ -261,11 +334,15 @@
WHERE
opsview_servicechecks.servicegroup_id = sg.id
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
# Create lookup table so that can link opsview_hostgroup_hosts to nagios_statehistory
-$table = copy_table( 'opsview_host_objects' );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table( 'opsview_host_objects' );
+ $dbh->do( "
INSERT INTO $table
(
SELECT host_object_id, hostname, service_object_id, servicename, perfdata_available, markdown_filter, servicecheck_id, servicegroup_id FROM opsview_host_services
@@ -275,11 +352,15 @@
SELECT id, name, id, NULL, 0, 0, 0, 0 FROM opsview_hosts
)
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
# Make a hosts only table
-$table = copy_table( 'opsview_contact_hosts' );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table('opsview_contact_hosts');
+ $dbh->do( "
INSERT INTO $table
SELECT DISTINCT opsview_contact_services.contactid, opsview_host_services.host_object_id
FROM
@@ -288,26 +369,34 @@
WHERE
opsview_host_services.service_object_id = opsview_contact_services.service_object_id
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
# Make a generic objects table (primarily for joining to nagios_statechanges)
-$table = copy_table( 'opsview_contact_objects' );
-$logger->debug( 'Running first insert' );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table('opsview_contact_objects');
+ $logger->debug('Running first insert');
+ $dbh->do( "
INSERT INTO $table
SELECT contactid, service_object_id
FROM opsview_contact_services
" );
-$logger->debug( 'Running second insert' );
-$dbh->do( "
+ $logger->debug('Running second insert');
+ $dbh->do( "
INSERT INTO $table
SELECT contactid, host_object_id
FROM opsview_contact_hosts
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
-$temp_table = copy_table( 'opsview_viewports', 'temporary' );
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $temp_table = copy_table( 'opsview_viewports', 'temporary' );
+ $dbh->do( "
INSERT INTO $temp_table
SELECT
kh.keywordid,
@@ -333,7 +422,7 @@
AND k.all_hosts = 0
AND k.all_servicechecks = 0
" );
-$dbh->do( "
+ $dbh->do( "
INSERT INTO $temp_table
SELECT
k.id,
@@ -353,7 +442,7 @@
AND ohs.hostname = h.name
AND k.all_servicechecks = 1
" );
-$dbh->do( "
+ $dbh->do( "
INSERT INTO $temp_table
SELECT
k.id,
@@ -373,89 +462,102 @@
AND (ohs.servicename = sc.name || ohs.servicename like concat(sc.name,': %') )
AND k.all_hosts = 1
" );
-$table = copy_table( 'opsview_viewports' );
+ }
+);
-# Inserts the services
-$dbh->do( "
+deadlock_protected(
+ sub {
+ $table = copy_table('opsview_viewports');
+
+ # Inserts the services
+ $dbh->do( "
INSERT INTO $table
SELECT DISTINCT viewportid, keyword, hostname, servicename, host_object_id, object_id
FROM $temp_table
" );
-# Adds the hosts
-$dbh->do( "
+ # Adds the hosts
+ $dbh->do( "
INSERT INTO $table
SELECT DISTINCT viewportid, keyword, hostname, NULL, host_object_id, host_object_id
FROM $temp_table
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
# Make a table with all known hostnames, servicenames and metricnames
-$table = copy_table( 'opsview_performance_metrics' );
-my @perfmetric_values;
-my $do_large_perfmetric_insert = sub {
- my $force = shift;
+deadlock_protected(
+ sub {
+ $table = copy_table( 'opsview_performance_metrics' );
+ my @perfmetric_values;
+ my $do_large_perfmetric_insert = sub {
+ my $force = shift;
- # Choose 1000 as a general optimisation
- if ( ( ( my $vals = scalar @perfmetric_values / 4 ) >= 1000 )
- || ( $force && @perfmetric_values ) )
- {
- $dbh->do(
- "INSERT INTO $table (hostname, servicename, metricname, uom) VALUES "
- . ( join ',', ("(?,?,?,?)") x $vals ),
- {}, @perfmetric_values
- );
- @perfmetric_values = ();
- }
-};
-
-# Read all hostnames, servicenames, metricnames from var/rrd directory
-my $rootdir = $ENV{OPSVIEW_TEST_ROOTDIR} || Opsview::Config->root_dir;
-my $rrddir = "$rootdir/var/rrd";
-opendir( RRDDIR, "$rrddir" );
-foreach my $hostname ( sort grep !/^\./, readdir RRDDIR ) {
- opendir( HOST, "$rrddir/$hostname" );
- foreach my $servicename ( sort grep !/^\./, readdir HOST ) {
- opendir( SERVICE, "$rrddir/$hostname/$servicename" );
- foreach my $metric ( sort grep !/^\./, readdir SERVICE ) {
- my $uom = "";
- if ( -e "$rrddir/$hostname/$servicename/$metric/uom"
- && -s "$rrddir/$hostname/$servicename/$metric/uom" )
+ # Choose 1000 as a general optimisation
+ if ( ( ( my $vals = scalar @perfmetric_values / 4 ) >= 1000 )
+ || ( $force && @perfmetric_values ) )
{
- open UOM, "$rrddir/$hostname/$servicename/$metric/uom";
- $uom = <UOM>;
- chomp $uom;
- close UOM;
+ $dbh->do(
+ "INSERT INTO $table (hostname, servicename, metricname, uom) VALUES "
+ . ( join ',', ("(?,?,?,?)") x $vals ),
+ {}, @perfmetric_values
+ );
+ @perfmetric_values = ();
}
- push @perfmetric_values, urldecode($hostname),
- urldecode($servicename), urldecode($metric), $uom;
+ };
+
+ # Read all hostnames, servicenames, metricnames from var/rrd directory
+ my $rootdir = $ENV{OPSVIEW_TEST_ROOTDIR} || Opsview::Config->root_dir;
+ my $rrddir = "$rootdir/var/rrd";
+ opendir( RRDDIR, "$rrddir" );
+ foreach my $hostname ( sort grep !/^\./, readdir RRDDIR ) {
+ opendir( HOST, "$rrddir/$hostname" );
+ foreach my $servicename ( sort grep !/^\./, readdir HOST ) {
+ opendir( SERVICE, "$rrddir/$hostname/$servicename" );
+ foreach my $metric ( sort grep !/^\./, readdir SERVICE ) {
+ my $uom = "";
+ if ( -e "$rrddir/$hostname/$servicename/$metric/uom"
+ && -s "$rrddir/$hostname/$servicename/$metric/uom" )
+ {
+ open UOM, "$rrddir/$hostname/$servicename/$metric/uom";
+ $uom = <UOM>;
+ chomp $uom;
+ close UOM;
+ }
+ push @perfmetric_values, urldecode($hostname),
+ urldecode($servicename), urldecode($metric), $uom;
+ }
+ }
+ $do_large_perfmetric_insert->();
}
- }
- $do_large_perfmetric_insert->();
-}
-$do_large_perfmetric_insert->(1);
+ $do_large_perfmetric_insert->(1);
-# Set the service_object_id
-$dbh->do(
- "UPDATE $table, opsview_host_services
+ # Set the service_object_id
+ $dbh->do(
+ "UPDATE $table, opsview_host_services
SET $table.service_object_id=opsview_host_services.service_object_id
WHERE
opsview_host_services.hostname = $table.hostname
AND opsview_host_services.servicename = $table.servicename
"
-);
+ );
-# These are rrds that exist, but the services are not defined in Opsview/Nagios
-$dbh->do( "DELETE FROM $table WHERE service_object_id=0" );
+ # These are rrds that exist, but the services are not defined in Opsview/Nagios
+ $dbh->do("DELETE FROM $table WHERE service_object_id=0");
-rename_table($table);
+ rename_table($table);
+ }
+);
# Network topology map
-$table = copy_table( 'opsview_topology_map' );
+deadlock_protected(
+ sub {
+ $table = copy_table( 'opsview_topology_map' );
-$dbh->do(
- qq{
+ $dbh->do(
+ qq{
INSERT INTO $table ( monitored_by, object_id, host_id, opsview_host_id,
name, parent_id, parent_object_id, parent_name, child_id, child_object_id, child_name )
SELECT * FROM (
@@ -483,11 +585,11 @@
) order by opsview_host_id, IF(ISNULL(parent_id),1,0), parent_id, IF(ISNULL(child_id),1,0), child_id
) _
}
-);
+ );
-# Update num_interfaces with number of active interfaces set for this host
-$logger->debug( 'Updating num_children' );
-$dbh->do( "
+ # Update num_interfaces with number of active interfaces set for this host
+ $logger->debug( 'Updating num_children' );
+ $dbh->do( "
UPDATE
opsview_hosts hosts,
(SELECT object_id, COUNT(*) AS c FROM $table WHERE child_id IS NOT NULL GROUP BY object_id) tt
@@ -495,19 +597,22 @@
WHERE tt.object_id=hosts.id
" );
-rename_table($table);
+ rename_table($table);
+ }
+);
-$table = copy_table( 'opsview_hosts_matpaths' );
-{
+deadlock_protected(
+ sub {
+ $table = copy_table( 'opsview_hosts_matpaths' );
- my $all_host_ids = $dbh->selectcol_arrayref(
- q{
+ my $all_host_ids = $dbh->selectcol_arrayref(
+ q{
SELECT id FROM opsview_hosts
}
- );
+ );
- my $find_parent_sth = $dbh->prepare_cached(
- q{
+ my $sth_find_parent = $dbh->prepare_cached(
+ q{
SELECT parent_object_id
FROM opsview_topology_map
WHERE object_id = ?
@@ -515,59 +620,60 @@
AND parent_object_ID IS NOT NULL
GROUP BY parent_object_id
}
- );
+ );
- my $set_host_matpath = $dbh->prepare_cached(
- qq{
+ my $sth_host_matpath = $dbh->prepare_cached(
+ qq{
INSERT INTO $table (object_id, matpath, depth) VALUES(?, ?, ?)
}
- );
+ );
- my $find_parent;
- $find_parent = sub {
- my $id = shift;
+ my $find_parent;
+ $find_parent = sub {
+ my $id = shift;
- return unless $id;
+ return unless $id;
- my $parents = $dbh->selectcol_arrayref( $find_parent_sth, {}, $id );
+ my $parents = $dbh->selectcol_arrayref( $sth_find_parent, {}, $id );
- return @$parents;
- };
+ return @$parents;
+ };
- my $set_host_matpath = sub {
- my $object_id = shift;
+ my $set_host_matpath = sub {
+ my $object_id = shift;
- my $make_tree;
- $make_tree = sub {
- my ( $id, $depth, @tree ) = @_;
+ my $make_tree;
+ $make_tree = sub {
+ my ( $id, $depth, @tree ) = @_;
- return unless $id;
+ return unless $id;
- my @parents = $find_parent->($id);
+ my @parents = $find_parent->($id);
- if (@parents) {
- for my $parent (@parents) {
- $make_tree->( $parent, $depth + 1, @tree, $id );
+ if (@parents) {
+ for my $parent (@parents) {
+ $make_tree->( $parent, $depth + 1, @tree, $id );
+ }
}
- }
- else { # reached root
- push @tree, $id;
- my $matpath = join( ',', reverse @tree ) . ",";
- $set_host_matpath->execute( $object_id, $matpath, $depth );
- }
+ else { # reached root
+ push @tree, $id;
+ my $matpath = join( ',', reverse @tree ) . ",";
+ $sth_host_matpath->execute( $object_id, $matpath, $depth );
+ }
+ };
+ $make_tree->( $object_id, 1 );
};
- $make_tree->( $object_id, 1 );
- };
- for my $host_id (@$all_host_ids) {
- $set_host_matpath->($host_id);
+ for my $host_id (@$all_host_ids) {
+ $set_host_matpath->($host_id);
+ }
+ rename_table($table);
}
-}
-rename_table($table);
+);
# Clear out downtime that should have finished. Nagios should pass an event
# to the db to say the downtime is complete but some circumstances such as
# db being unavailable can prevent this
-$dbh->do(
+deadlock_protected(
'delete from nagios_scheduleddowntime where scheduled_end_time < NOW()'
);