Modified: trunk/opsview-core/bin/import_ndologsd
===================================================================
--- trunk/opsview-core/bin/import_ndologsd 2013-02-06 16:43:57 UTC (rev 11370)
+++ trunk/opsview-core/bin/import_ndologsd 2013-02-06 16:49:35 UTC (rev 11371)
@@ -114,7 +114,7 @@
my @files = sort ( grep !/^\.\.?\z/, readdir IMPORT_DIR );
closedir IMPORT_DIR;
foreach my $file (@files) {
- last MAINLOOP if ($break);
+ last MAINLOOP if $break;
my $start = gettimeofday();
my $file_size = -s $file;
unless ($file_size) {
@@ -124,8 +124,9 @@
}
# Send the log to NDO. Blocks if connection refuses until either it can, or $break is set
- if ( $importer->send_log( $file, $file_size ) )
- { # file processed successfully
+
+ # file processed successfully
+ if ( $importer->send_log( $file, $file_size ) ) {
if ( $logger->is_debug() ) {
my $debug_dir = "/usr/local/nagios/var/ndologs.archive";
unless ( -d $debug_dir ) {
@@ -136,17 +137,20 @@
else {
unlink $file;
}
+ my $duration = gettimeofday() - $start;
+ $logger->debug( "Finished $file. Duration=$duration" );
+ if ( $duration > 5 ) {
+ my $nice_duration = sprintf( "%0.2f", $duration );
+ $logger->warn(
+ "Import of $file, size=$file_size, took $nice_duration seconds > 5 seconds"
+ );
+ }
}
- last MAINLOOP if ($break);
- my $duration = gettimeofday() - $start;
- $logger->debug( "Finished $file. Duration=$duration" );
- if ( $duration > 5 ) {
- my $nice_duration = sprintf( "%0.2f", $duration );
- $logger->warn(
- "Import of $file, size=$file_size, took $nice_duration seconds > 5 seconds"
- );
+ else {
+ $logger->warn( "Failed to import $file" );
}
$found = 1;
+ last MAINLOOP if $break;
}
$last_update = $dir_update;
}
@@ -161,8 +165,6 @@
sub cleanup {
return if ( $^S == 1 );
- # This appears to be called twice if a forced death occurs. Not sure why
- #close FILE2SOCK;
$logger->info("Error found: $@") if $@;
$logger->info( "Stopping $0" );
unlink $pidfile;
Modified: trunk/opsview-core/lib/Opsview/Utils/NDOLogsImporter.pm
===================================================================
--- trunk/opsview-core/lib/Opsview/Utils/NDOLogsImporter.pm 2013-02-06 16:43:57 UTC (rev 11370)
+++ trunk/opsview-core/lib/Opsview/Utils/NDOLogsImporter.pm 2013-02-06 16:49:35 UTC (rev 11371)
@@ -727,6 +727,9 @@
our @INPUT_DATA_TYPE = map {0} 0 .. NDO_MAX_DATA_TYPES() + 1;
our @HANDLERS;
+my $DBCONNERR =
+ qr/(?:server has gone away)|(?:Lost connection to MySQL server during query)|(?:Can't connect to .*? MySQL server)/;
+my $LOGGER;
my $DB;
my $LATEST_REALTIME_DATA_TIME = 0;
my $LOADING_RETENTION_DATA_FLAG = 0;
@@ -886,8 +889,10 @@
}
sub new {
- my $class = shift;
+ my ( $class, %args ) = @_;
+ $LOGGER = delete $args{logger};
+
my $self = bless {
last_table_trim_time => time(),
@@ -900,15 +905,29 @@
event_handlers => \@HANDLERS,
- @_
+ %args
+
}, $class;
- $self->db_connect();
+ eval {
+ $self->db_connect();
- $self->create_default_instance();
+ $self->create_default_instance();
- $self->get_cached_object_ids();
+ $self->get_cached_object_ids();
+ };
+ if ( my $e = $@ ) {
+ if ( !$DB || $e =~ /$DBCONNERR/ ) {
+ # that will block until DB is back
+ $self->db_reconnect;
+ }
+ else {
+ $LOGGER->fatal( "Failed to start $0" );
+ ${ $self->{break} }++;
+ }
+ }
+
return $self;
}
@@ -960,7 +979,7 @@
. Opsview::Config->runtime_db
. ";host=localhost";
- $DB = DBI->connect_cached(
+ $DB = DBI->connect(
$dsn,
Opsview::Config->runtime_dbuser,
Opsview::Config->runtime_dbpasswd,
@@ -972,14 +991,15 @@
mysql_server_prepare => 1,
Callbacks => {
connected => sub {
- $_[0]->do("SET time_zone='+00:00'");
+ $_[0]->do( "SET time_zone='+00:00'" );
# needs to return undef
return;
}
}
}
- );
+ ) or die $DBI::errstr;
+
return;
}
@@ -990,12 +1010,9 @@
}
sub send_log {
- my $result = 0;
-
- # $self->{break} is the global break
- $_[ ARG_SELF() ]->{logger}->debug(
+ $LOGGER->debug(
"Importing " . $_[ ARG_FILE() ] . ". Size=" . $_[ ARG_FILESIZE() ] )
- if $_[ ARG_SELF() ]->{logger}->is_debug;
+ if $LOGGER->is_debug;
$LOADING_RETENTION_DATA_FLAG = 0;
@@ -1003,13 +1020,28 @@
my $data =
"" ARG_SELF() ]->parse_c( $_[ ARG_FILE() ], $_[ ARG_FILESIZE() ] );
- set_latest_data_times();
+ eval { set_latest_data_times(); };
+ if ( my $e = $@ ) {
+ if ( !$DB || $e =~ /$DBCONNERR/ ) {
+ # that will block until DB is back
+ $_[ ARG_SELF() ]->db_reconnect;
+ }
+ else {
+ $LOGGER->fatal( "Error for " . $_[ ARG_FILE() ] . ": $e" );
+ return 0;
+ }
+ }
+
#$LAST_NOTIFICATION_ID = 0;
#$LAST_CONTACT_NOTIFICATION_ID = 0;
- for ( my $i = 0; $i < scalar @$data; $i += 2 ) {
+ my $events = @$data;
+ EVENTS: for ( my $i = 0; $i < $events; $i += 2 ) {
+ # caught signal - $self->{break} is the global break
+ return 0 if ${ $_[ ARG_SELF() ]->{break} };
+
# if ( $event_type eq NDO_API_STARTCONFIGDUMP() ) {
# there was a reload
# fork;
@@ -1017,23 +1049,31 @@
if ( my $m = $HANDLERS[ $data->[$i] ] ) {
- eval {
- #$DB->txn( ping => sub {
- #DBI->trace($ENV{DBI_TRACE2} || 0) unless $ENV{DBI_TRACE};
- $result = $_[ ARG_SELF() ]->$m( $data->[ $i + 1 ] );
+ eval { $_[ ARG_SELF() ]->$m( $data->[ $i + 1 ] ); };
+ if ( my $e = $@ ) {
+ if ( !$DB || $e =~ /$DBCONNERR/ ) {
- #DBI->trace(0) unless $ENV{DBI_TRACE};
- #});
- };
- if ( my $e = $@ ) {
- $_[ ARG_SELF() ]->{logger}->fatal( "Error in $m: $e" );
+ # that will block until DB is back
+ $_[ ARG_SELF() ]->db_reconnect;
+
+ # restart with failed event
+ redo EVENTS;
+ }
+ else {
+ $LOGGER->fatal(
+ "Error for " . $_[ ARG_FILE() ] . " in $m: $e"
+ );
+
+ return 0;
+ }
}
+
}
}
# $_[ ARG_SELF() ]->db_perform_maintenance();
- return $result;
+ return 1;
}
sub db_clear_table {
@@ -1043,7 +1083,7 @@
$sth->execute();
}
-my $sth_SET_INACTIVE;
+our $sth_SET_INACTIVE;
sub set_all_objects_as_inactive {
@@ -1060,7 +1100,7 @@
$sth_SET_INACTIVE->execute();
}
-my $sth_SET_ACTIVE;
+our $sth_SET_ACTIVE;
sub set_object_as_active {
$sth_SET_ACTIVE = $DB->prepare_cached(
@@ -1089,7 +1129,7 @@
ORDER BY } . $_[ ARG_COLUMN() ] . q{ DESC
LIMIT 1
}
- ) or die $DB->errstr;
+ );
$sth->execute();
my $latest_time;
@@ -1199,8 +1239,8 @@
}
}
-my $sth_update_handle_NOTIFICATIONDATA;
-my $sth_insert_handle_NOTIFICATIONDATA;
+our $sth_update_handle_NOTIFICATIONDATA;
+our $sth_insert_handle_NOTIFICATIONDATA;
sub handle_NOTIFICATIONDATA { # 205
@@ -1309,7 +1349,7 @@
return 1;
}
-my $sth_insert_handle_SERVICECHECKDATA;
+our $sth_insert_handle_SERVICECHECKDATA;
sub handle_SERVICECHECKDATA {
@@ -1403,7 +1443,7 @@
return 1;
}
-my $sth_insert_handle_HOSTCHECKDATA;
+our $sth_insert_handle_HOSTCHECKDATA;
sub handle_HOSTCHECKDATA {
@@ -1502,12 +1542,12 @@
return 1;
}
-my $sth_update_handle_COMMENTDATA;
-my $sth_insert_handle_COMMENTDATA;
-my $sth_delete_handle_COMMENTDATA;
-my $sth_update_comments_handle_COMMENTDATA;
-my $sth_insert_comments_handle_COMMENTDATA;
-my $sth_delete_comments_handle_COMMENTDATA;
+our $sth_update_handle_COMMENTDATA;
+our $sth_insert_handle_COMMENTDATA;
+our $sth_delete_handle_COMMENTDATA;
+our $sth_update_comments_handle_COMMENTDATA;
+our $sth_insert_comments_handle_COMMENTDATA;
+our $sth_delete_comments_handle_COMMENTDATA;
sub handle_COMMENTDATA {
@@ -1720,14 +1760,14 @@
return 1;
}
-my $sth_update_handle_DOWNTIMEDATA;
-my $sth_insert_handle_DOWNTIMEDATA;
-my $sth_update_start_handle_DOWNTIMEDATA;
-my $sth_update_stop_handle_DOWNTIMEDATA;
-my $sth_update_schedule_handle_DOWNTIMEDATA;
-my $sth_insert_schedule_handle_DOWNTIMEDATA;
-my $sth_update_start_schedule_handle_DOWNTIMEDATA;
-my $sth_update_stop_schedule_handle_DOWNTIMEDATA;
+our $sth_update_handle_DOWNTIMEDATA;
+our $sth_insert_handle_DOWNTIMEDATA;
+our $sth_update_start_handle_DOWNTIMEDATA;
+our $sth_update_stop_handle_DOWNTIMEDATA;
+our $sth_update_schedule_handle_DOWNTIMEDATA;
+our $sth_insert_schedule_handle_DOWNTIMEDATA;
+our $sth_update_start_schedule_handle_DOWNTIMEDATA;
+our $sth_update_stop_schedule_handle_DOWNTIMEDATA;
sub handle_DOWNTIMEDATA {
@@ -2037,8 +2077,8 @@
return 1;
}
-my $sth_update_handle_PROGRAMSTATUSDATA;
-my $sth_insert_handle_PROGRAMSTATUSDATA;
+our $sth_update_handle_PROGRAMSTATUSDATA;
+our $sth_insert_handle_PROGRAMSTATUSDATA;
sub handle_PROGRAMSTATUSDATA {
@@ -2160,10 +2200,10 @@
return 1;
}
-my $sth_insert_handle_HOSTSTATUSDATA;
-my $sth_update_handle_HOSTSTATUSDATA;
-my $sth_insert_downtime_handle_HOSTSTATUSDATA;
-my $sth_update_downtime_handle_HOSTSTATUSDATA;
+our $sth_insert_handle_HOSTSTATUSDATA;
+our $sth_update_handle_HOSTSTATUSDATA;
+our $sth_insert_downtime_handle_HOSTSTATUSDATA;
+our $sth_update_downtime_handle_HOSTSTATUSDATA;
sub handle_HOSTSTATUSDATA {
@@ -2494,10 +2534,10 @@
return 1;
}
-my $sth_insert_handle_SERVICESTATUSDATA;
-my $sth_update_handle_SERVICESTATUSDATA;
-my $sth_insert_downtime_handle_SERVICESTATUSDATA;
-my $sth_update_downtime_handle_SERVICESTATUSDATA;
+our $sth_insert_handle_SERVICESTATUSDATA;
+our $sth_update_handle_SERVICESTATUSDATA;
+our $sth_insert_downtime_handle_SERVICESTATUSDATA;
+our $sth_update_downtime_handle_SERVICESTATUSDATA;
sub handle_SERVICESTATUSDATA {
@@ -2903,7 +2943,7 @@
}
}
-my $sth_insert_handle_MULTI_PARENTHOST;
+our $sth_insert_handle_MULTI_PARENTHOST;
sub handle_MULTI_PARENTHOST {
@@ -2928,7 +2968,7 @@
}
}
-my $sth_insert_handle_MULTI_TIMERANGE;
+our $sth_insert_handle_MULTI_TIMERANGE;
sub handle_MULTI_TIMERANGE {
@@ -2956,8 +2996,8 @@
}
}
-my $sth_insert_handle_MULTI_CUSTOMVARIABLE;
-my $sth_update_handle_MULTI_CUSTOMVARIABLE;
+our $sth_insert_handle_MULTI_CUSTOMVARIABLE;
+our $sth_update_handle_MULTI_CUSTOMVARIABLE;
sub handle_MULTI_CUSTOMVARIABLE {
@@ -3004,8 +3044,8 @@
}
}
-my $sth_insert_handle_MULTI_CUSTOMVARIABLESTATUS;
-my $sth_update_handle_MULTI_CUSTOMVARIABLESTATUS;
+our $sth_insert_handle_MULTI_CUSTOMVARIABLESTATUS;
+our $sth_update_handle_MULTI_CUSTOMVARIABLESTATUS;
sub handle_MULTI_CUSTOMVARIABLESTATUS {
@@ -3050,12 +3090,12 @@
}
}
-my $sth_update_handle_CONTACTSTATUSDATA;
-my $sth_insert_handle_CONTACTSTATUSDATA;
+our $sth_update_handle_CONTACTSTATUSDATA;
+our $sth_insert_handle_CONTACTSTATUSDATA;
sub handle_CONTACTSTATUSDATA {
- my $sth_update_handle_CONTACTSTATUSDATA = $DB->prepare_cached(
+ $sth_update_handle_CONTACTSTATUSDATA = $DB->prepare_cached(
q{
UPDATE nagios_contactstatus SET
@@ -3074,7 +3114,7 @@
}
) unless defined $sth_update_handle_CONTACTSTATUSDATA;
- my $sth_insert_handle_CONTACTSTATUSDATA = $DB->prepare_cached(
+ $sth_insert_handle_CONTACTSTATUSDATA = $DB->prepare_cached(
q{
INSERT INTO nagios_contactstatus SET
@@ -3162,7 +3202,7 @@
return 1;
}
-my $sth_insert_handle_ACKNOWLEDGEMENTDATA;
+our $sth_insert_handle_ACKNOWLEDGEMENTDATA;
sub handle_ACKNOWLEDGEMENTDATA {
@@ -3230,9 +3270,9 @@
return 1;
}
-my $sth_insert_handle_STATECHANGEDATA;
-my $sth_select_downtimehist_handle_STATECHANGEDATA;
-my $sth_update_downtimehist_handle_STATECHANGEDATA;
+our $sth_insert_handle_STATECHANGEDATA;
+our $sth_select_downtimehist_handle_STATECHANGEDATA;
+our $sth_update_downtimehist_handle_STATECHANGEDATA;
sub handle_STATECHANGEDATA {
@@ -3395,8 +3435,8 @@
return 1;
}
-my $sth_insert_handle_PROCESSDATA;
-my $sth_update_endtime_handle_PROCESSDATA;
+our $sth_insert_handle_PROCESSDATA;
+our $sth_update_endtime_handle_PROCESSDATA;
sub handle_PROCESSDATA {
@@ -3513,9 +3553,9 @@
return 1;
}
-my $sth_update_handle_HOSTDEFINITION;
-my $sth_insert_handle_HOSTDEFINITION;
-my $sth_fetch_ID_handle_HOSTDEFINITION;
+our $sth_update_handle_HOSTDEFINITION;
+our $sth_insert_handle_HOSTDEFINITION;
+our $sth_fetch_ID_handle_HOSTDEFINITION;
sub handle_HOSTDEFINITION {
@@ -3811,9 +3851,9 @@
return 1;
}
-my $sth_update_handle_SERVICEDEFINITION;
-my $sth_insert_handle_SERVICEDEFINITION;
-my $sth_fetch_ID_handle_SERVICEDEFINITION;
+our $sth_update_handle_SERVICEDEFINITION;
+our $sth_insert_handle_SERVICEDEFINITION;
+our $sth_fetch_ID_handle_SERVICEDEFINITION;
sub handle_SERVICEDEFINITION {
@@ -4093,8 +4133,8 @@
return 1;
}
-my $sth_update_handle_SERVICEDEPENDENCYDEFINITION;
-my $sth_insert_handle_SERVICEDEPENDENCYDEFINITION;
+our $sth_update_handle_SERVICEDEPENDENCYDEFINITION;
+our $sth_insert_handle_SERVICEDEPENDENCYDEFINITION;
sub handle_SERVICEDEPENDENCYDEFINITION {
@@ -4190,8 +4230,8 @@
return 1;
}
-my $sth_update_handle_COMMANDDEFINITION;
-my $sth_insert_handle_COMMANDDEFINITION;
+our $sth_update_handle_COMMANDDEFINITION;
+our $sth_insert_handle_COMMANDDEFINITION;
sub handle_COMMANDDEFINITION {
@@ -4249,9 +4289,9 @@
return 1;
}
-my $sth_update_handle_TIMEPERIODDEFINITION;
-my $sth_insert_handle_TIMEPERIODDEFINITION;
-my $sth_fetch_ID_handle_TIMEPERIODDEFINITION;
+our $sth_update_handle_TIMEPERIODDEFINITION;
+our $sth_insert_handle_TIMEPERIODDEFINITION;
+our $sth_fetch_ID_handle_TIMEPERIODDEFINITION;
sub handle_TIMEPERIODDEFINITION {
@@ -4345,8 +4385,8 @@
return 1;
}
-my $sth_update_handle_CONTACTDEFINITION;
-my $sth_insert_handle_CONTACTDEFINITION;
+our $sth_update_handle_CONTACTDEFINITION;
+our $sth_insert_handle_CONTACTDEFINITION;
#my $sth_fetch_ID_handle_CONTACTDEFINITION;
@@ -4519,7 +4559,7 @@
return 1;
}
-my $sth_insert_handle_HOSTGROUPDEFINITION;
+our $sth_insert_handle_HOSTGROUPDEFINITION;
sub handle_HOSTGROUPDEFINITION {
@@ -4569,7 +4609,7 @@
return 1;
}
-my $sth_insert_handle_CONTACTGROUPDEFINITION;
+our $sth_insert_handle_CONTACTGROUPDEFINITION;
sub handle_CONTACTGROUPDEFINITION {
@@ -4623,6 +4663,59 @@
return 1;
}
+sub db_reconnect {
+
+ if ( defined $DB ) {
+ for my $sth ( @{ $DB->{ChildHandles} } ) {
+ next unless defined $sth;
+
+ $sth->finish;
+
+ # $sth is a tied hash
+ $sth->DESTROY;
+ }
+
+ $DB->STORE( CachedKids => {} );
+
+ $DB->disconnect;
+
+ undef $DB;
+ }
+
+ DB_RECONNECT: while () {
+
+ $LOGGER->warn( "Reconnecting to database" );
+ sleep 3;
+
+ eval { $_[ ARG_SELF() ]->db_connect(); };
+ if ($@) {
+ $LOGGER->fatal( "..reconnecting failed" );
+ next DB_RECONNECT;
+ }
+
+ reset_cached_statements();
+
+ return 1;
+ }
+
+ return 0;
+}
+
+sub reset_cached_statements {
+
+ # get package variables
+ my %pv = %Opsview::Utils::NDOLogsImporter::;
+
+ no strict 'refs';
+ for my $sth ( grep {/^sth_/} keys %pv ) {
+ my $v = \${ __PACKAGE__ . "::$sth" };
+ next unless defined $$v;
+ $$v->finish;
+ $$v->DESTROY;
+ undef $$v;
+ }
+}
+
1;
__END__