Title: [opsview] [11371] safely reconnect to database
Revision
11371
Author
aburzynski
Date
2013-02-06 16:49:35 +0000 (Wed, 06 Feb 2013)

Log Message

safely reconnect to database

Modified Paths


Modified: trunk/opsview-core/bin/import_ndologsd
===================================================================
--- trunk/opsview-core/bin/import_ndologsd	2013-02-06 16:43:57 UTC (rev 11370)
+++ trunk/opsview-core/bin/import_ndologsd	2013-02-06 16:49:35 UTC (rev 11371)
@@ -114,7 +114,7 @@
         my @files = sort ( grep !/^\.\.?\z/, readdir IMPORT_DIR );
         closedir IMPORT_DIR;
         foreach my $file (@files) {
-            last MAINLOOP if ($break);
+            last MAINLOOP if $break;
             my $start     = gettimeofday();
             my $file_size = -s $file;
             unless ($file_size) {
@@ -124,8 +124,9 @@
             }
 
             # Send the log to NDO. Blocks if connection refuses until either it can, or $break is set
-            if ( $importer->send_log( $file, $file_size ) )
-            { # file processed successfully
+
+            # file processed successfully
+            if ( $importer->send_log( $file, $file_size ) ) {
                 if ( $logger->is_debug() ) {
                     my $debug_dir = "/usr/local/nagios/var/ndologs.archive";
                     unless ( -d $debug_dir ) {
@@ -136,17 +137,20 @@
                 else {
                     unlink $file;
                 }
+                my $duration = gettimeofday() - $start;
+                $logger->debug( "Finished $file. Duration=$duration" );
+                if ( $duration > 5 ) {
+                    my $nice_duration = sprintf( "%0.2f", $duration );
+                    $logger->warn(
+                        "Import of $file, size=$file_size, took $nice_duration seconds > 5 seconds"
+                    );
+                }
             }
-            last MAINLOOP if ($break);
-            my $duration = gettimeofday() - $start;
-            $logger->debug( "Finished $file. Duration=$duration" );
-            if ( $duration > 5 ) {
-                my $nice_duration = sprintf( "%0.2f", $duration );
-                $logger->warn(
-                    "Import of $file, size=$file_size, took $nice_duration seconds > 5 seconds"
-                );
+            else {
+                $logger->warn( "Failed to import $file" );
             }
             $found = 1;
+            last MAINLOOP if $break;
         }
         $last_update = $dir_update;
     }
@@ -161,8 +165,6 @@
 sub cleanup {
     return if ( $^S == 1 );
 
-    # This appears to be called twice if a forced death occurs. Not sure why
-    #close FILE2SOCK;
     $logger->info("Error found: $@") if $@;
     $logger->info( "Stopping $0" );
     unlink $pidfile;

Modified: trunk/opsview-core/lib/Opsview/Utils/NDOLogsImporter.pm
===================================================================
--- trunk/opsview-core/lib/Opsview/Utils/NDOLogsImporter.pm	2013-02-06 16:43:57 UTC (rev 11370)
+++ trunk/opsview-core/lib/Opsview/Utils/NDOLogsImporter.pm	2013-02-06 16:49:35 UTC (rev 11371)
@@ -727,6 +727,9 @@
 
 our @INPUT_DATA_TYPE = map {0} 0 .. NDO_MAX_DATA_TYPES() + 1;
 our @HANDLERS;
+my $DBCONNERR =
+  qr/(?:server has gone away)|(?:Lost connection to MySQL server during query)|(?:Can't connect to .*? MySQL server)/;
+my $LOGGER;
 my $DB;
 my $LATEST_REALTIME_DATA_TIME   = 0;
 my $LOADING_RETENTION_DATA_FLAG = 0;
@@ -886,8 +889,10 @@
 }
 
 sub new {
-    my $class = shift;
+    my ( $class, %args ) = @_;
 
+    $LOGGER = delete $args{logger};
+
     my $self = bless {
         last_table_trim_time => time(),
 
@@ -900,15 +905,29 @@
 
         event_handlers => \@HANDLERS,
 
-        @_
+        %args
+
     }, $class;
 
-    $self->db_connect();
+    eval {
+        $self->db_connect();
 
-    $self->create_default_instance();
+        $self->create_default_instance();
 
-    $self->get_cached_object_ids();
+        $self->get_cached_object_ids();
+    };
+    if ( my $e = $@ ) {
+        if ( !$DB || $e =~ /$DBCONNERR/ ) {
 
+            # that will block until DB is back
+            $self->db_reconnect;
+        }
+        else {
+            $LOGGER->fatal( "Failed to start $0" );
+            ${ $self->{break} }++;
+        }
+    }
+
     return $self;
 }
 
@@ -960,7 +979,7 @@
       . Opsview::Config->runtime_db
       . ";host=localhost";
 
-    $DB = DBI->connect_cached(
+    $DB = DBI->connect(
         $dsn,
         Opsview::Config->runtime_dbuser,
         Opsview::Config->runtime_dbpasswd,
@@ -972,14 +991,15 @@
             mysql_server_prepare => 1,
             Callbacks            => {
                 connected => sub {
-                    $_[0]->do("SET time_zone='+00:00'");
+                    $_[0]->do( "SET time_zone='+00:00'" );
 
                     # needs to return undef
                     return;
                   }
             }
         }
-    );
+    ) or die $DBI::errstr;
+
     return;
 }
 
@@ -990,12 +1010,9 @@
 }
 
 sub send_log {
-    my $result = 0;
-
-    # $self->{break} is the global break
-    $_[ ARG_SELF() ]->{logger}->debug(
+    $LOGGER->debug(
         "Importing " . $_[ ARG_FILE() ] . ". Size=" . $_[ ARG_FILESIZE() ] )
-      if $_[ ARG_SELF() ]->{logger}->is_debug;
+      if $LOGGER->is_debug;
 
     $LOADING_RETENTION_DATA_FLAG = 0;
 
@@ -1003,13 +1020,28 @@
     my $data =
       "" ARG_SELF() ]->parse_c( $_[ ARG_FILE() ], $_[ ARG_FILESIZE() ] );
 
-    set_latest_data_times();
+    eval { set_latest_data_times(); };
+    if ( my $e = $@ ) {
+        if ( !$DB || $e =~ /$DBCONNERR/ ) {
 
+            # that will block until DB is back
+            $_[ ARG_SELF() ]->db_reconnect;
+        }
+        else {
+            $LOGGER->fatal( "Error for " . $_[ ARG_FILE() ] . ": $e" );
+            return 0;
+        }
+    }
+
     #$LAST_NOTIFICATION_ID = 0;
     #$LAST_CONTACT_NOTIFICATION_ID = 0;
 
-    for ( my $i = 0; $i < scalar @$data; $i += 2 ) {
+    my $events = @$data;
+    EVENTS: for ( my $i = 0; $i < $events; $i += 2 ) {
 
+        # caught signal - $self->{break} is the global break
+        return 0 if ${ $_[ ARG_SELF() ]->{break} };
+
         # if ( $event_type eq NDO_API_STARTCONFIGDUMP() ) {
         #   there was a reload
         #   fork;
@@ -1017,23 +1049,31 @@
 
         if ( my $m = $HANDLERS[ $data->[$i] ] ) {
 
-            eval {
-                #$DB->txn( ping => sub {
-                #DBI->trace($ENV{DBI_TRACE2} || 0) unless $ENV{DBI_TRACE};
-                $result = $_[ ARG_SELF() ]->$m( $data->[ $i + 1 ] );
+            eval { $_[ ARG_SELF() ]->$m( $data->[ $i + 1 ] ); };
+            if ( my $e = $@ ) {
+                if ( !$DB || $e =~ /$DBCONNERR/ ) {
 
-                #DBI->trace(0) unless $ENV{DBI_TRACE};
-                #});
-            };
-            if ( my $e = $@ ) {
-                $_[ ARG_SELF() ]->{logger}->fatal( "Error in $m: $e" );
+                    # that will block until DB is back
+                    $_[ ARG_SELF() ]->db_reconnect;
+
+                    # restart with failed event
+                    redo EVENTS;
+                }
+                else {
+                    $LOGGER->fatal(
+                        "Error for " . $_[ ARG_FILE() ] . " in $m: $e"
+                    );
+
+                    return 0;
+                }
             }
+
         }
     }
 
     # $_[ ARG_SELF() ]->db_perform_maintenance();
 
-    return $result;
+    return 1;
 }
 
 sub db_clear_table {
@@ -1043,7 +1083,7 @@
     $sth->execute();
 }
 
-my $sth_SET_INACTIVE;
+our $sth_SET_INACTIVE;
 
 sub set_all_objects_as_inactive {
 
@@ -1060,7 +1100,7 @@
     $sth_SET_INACTIVE->execute();
 }
 
-my $sth_SET_ACTIVE;
+our $sth_SET_ACTIVE;
 
 sub set_object_as_active {
     $sth_SET_ACTIVE = $DB->prepare_cached(
@@ -1089,7 +1129,7 @@
             ORDER BY } . $_[ ARG_COLUMN() ] . q{ DESC
             LIMIT 1
         }
-    ) or die $DB->errstr;
+    );
 
     $sth->execute();
     my $latest_time;
@@ -1199,8 +1239,8 @@
     }
 }
 
-my $sth_update_handle_NOTIFICATIONDATA;
-my $sth_insert_handle_NOTIFICATIONDATA;
+our $sth_update_handle_NOTIFICATIONDATA;
+our $sth_insert_handle_NOTIFICATIONDATA;
 
 sub handle_NOTIFICATIONDATA { # 205
 
@@ -1309,7 +1349,7 @@
     return 1;
 }
 
-my $sth_insert_handle_SERVICECHECKDATA;
+our $sth_insert_handle_SERVICECHECKDATA;
 
 sub handle_SERVICECHECKDATA {
 
@@ -1403,7 +1443,7 @@
     return 1;
 }
 
-my $sth_insert_handle_HOSTCHECKDATA;
+our $sth_insert_handle_HOSTCHECKDATA;
 
 sub handle_HOSTCHECKDATA {
 
@@ -1502,12 +1542,12 @@
     return 1;
 }
 
-my $sth_update_handle_COMMENTDATA;
-my $sth_insert_handle_COMMENTDATA;
-my $sth_delete_handle_COMMENTDATA;
-my $sth_update_comments_handle_COMMENTDATA;
-my $sth_insert_comments_handle_COMMENTDATA;
-my $sth_delete_comments_handle_COMMENTDATA;
+our $sth_update_handle_COMMENTDATA;
+our $sth_insert_handle_COMMENTDATA;
+our $sth_delete_handle_COMMENTDATA;
+our $sth_update_comments_handle_COMMENTDATA;
+our $sth_insert_comments_handle_COMMENTDATA;
+our $sth_delete_comments_handle_COMMENTDATA;
 
 sub handle_COMMENTDATA {
 
@@ -1720,14 +1760,14 @@
     return 1;
 }
 
-my $sth_update_handle_DOWNTIMEDATA;
-my $sth_insert_handle_DOWNTIMEDATA;
-my $sth_update_start_handle_DOWNTIMEDATA;
-my $sth_update_stop_handle_DOWNTIMEDATA;
-my $sth_update_schedule_handle_DOWNTIMEDATA;
-my $sth_insert_schedule_handle_DOWNTIMEDATA;
-my $sth_update_start_schedule_handle_DOWNTIMEDATA;
-my $sth_update_stop_schedule_handle_DOWNTIMEDATA;
+our $sth_update_handle_DOWNTIMEDATA;
+our $sth_insert_handle_DOWNTIMEDATA;
+our $sth_update_start_handle_DOWNTIMEDATA;
+our $sth_update_stop_handle_DOWNTIMEDATA;
+our $sth_update_schedule_handle_DOWNTIMEDATA;
+our $sth_insert_schedule_handle_DOWNTIMEDATA;
+our $sth_update_start_schedule_handle_DOWNTIMEDATA;
+our $sth_update_stop_schedule_handle_DOWNTIMEDATA;
 
 sub handle_DOWNTIMEDATA {
 
@@ -2037,8 +2077,8 @@
     return 1;
 }
 
-my $sth_update_handle_PROGRAMSTATUSDATA;
-my $sth_insert_handle_PROGRAMSTATUSDATA;
+our $sth_update_handle_PROGRAMSTATUSDATA;
+our $sth_insert_handle_PROGRAMSTATUSDATA;
 
 sub handle_PROGRAMSTATUSDATA {
 
@@ -2160,10 +2200,10 @@
     return 1;
 }
 
-my $sth_insert_handle_HOSTSTATUSDATA;
-my $sth_update_handle_HOSTSTATUSDATA;
-my $sth_insert_downtime_handle_HOSTSTATUSDATA;
-my $sth_update_downtime_handle_HOSTSTATUSDATA;
+our $sth_insert_handle_HOSTSTATUSDATA;
+our $sth_update_handle_HOSTSTATUSDATA;
+our $sth_insert_downtime_handle_HOSTSTATUSDATA;
+our $sth_update_downtime_handle_HOSTSTATUSDATA;
 
 sub handle_HOSTSTATUSDATA {
 
@@ -2494,10 +2534,10 @@
     return 1;
 }
 
-my $sth_insert_handle_SERVICESTATUSDATA;
-my $sth_update_handle_SERVICESTATUSDATA;
-my $sth_insert_downtime_handle_SERVICESTATUSDATA;
-my $sth_update_downtime_handle_SERVICESTATUSDATA;
+our $sth_insert_handle_SERVICESTATUSDATA;
+our $sth_update_handle_SERVICESTATUSDATA;
+our $sth_insert_downtime_handle_SERVICESTATUSDATA;
+our $sth_update_downtime_handle_SERVICESTATUSDATA;
 
 sub handle_SERVICESTATUSDATA {
 
@@ -2903,7 +2943,7 @@
     }
 }
 
-my $sth_insert_handle_MULTI_PARENTHOST;
+our $sth_insert_handle_MULTI_PARENTHOST;
 
 sub handle_MULTI_PARENTHOST {
 
@@ -2928,7 +2968,7 @@
     }
 }
 
-my $sth_insert_handle_MULTI_TIMERANGE;
+our $sth_insert_handle_MULTI_TIMERANGE;
 
 sub handle_MULTI_TIMERANGE {
 
@@ -2956,8 +2996,8 @@
     }
 }
 
-my $sth_insert_handle_MULTI_CUSTOMVARIABLE;
-my $sth_update_handle_MULTI_CUSTOMVARIABLE;
+our $sth_insert_handle_MULTI_CUSTOMVARIABLE;
+our $sth_update_handle_MULTI_CUSTOMVARIABLE;
 
 sub handle_MULTI_CUSTOMVARIABLE {
 
@@ -3004,8 +3044,8 @@
     }
 }
 
-my $sth_insert_handle_MULTI_CUSTOMVARIABLESTATUS;
-my $sth_update_handle_MULTI_CUSTOMVARIABLESTATUS;
+our $sth_insert_handle_MULTI_CUSTOMVARIABLESTATUS;
+our $sth_update_handle_MULTI_CUSTOMVARIABLESTATUS;
 
 sub handle_MULTI_CUSTOMVARIABLESTATUS {
 
@@ -3050,12 +3090,12 @@
     }
 }
 
-my $sth_update_handle_CONTACTSTATUSDATA;
-my $sth_insert_handle_CONTACTSTATUSDATA;
+our $sth_update_handle_CONTACTSTATUSDATA;
+our $sth_insert_handle_CONTACTSTATUSDATA;
 
 sub handle_CONTACTSTATUSDATA {
 
-    my $sth_update_handle_CONTACTSTATUSDATA = $DB->prepare_cached(
+    $sth_update_handle_CONTACTSTATUSDATA = $DB->prepare_cached(
         q{
             UPDATE nagios_contactstatus SET
 
@@ -3074,7 +3114,7 @@
         }
     ) unless defined $sth_update_handle_CONTACTSTATUSDATA;
 
-    my $sth_insert_handle_CONTACTSTATUSDATA = $DB->prepare_cached(
+    $sth_insert_handle_CONTACTSTATUSDATA = $DB->prepare_cached(
         q{
             INSERT INTO nagios_contactstatus SET
 
@@ -3162,7 +3202,7 @@
     return 1;
 }
 
-my $sth_insert_handle_ACKNOWLEDGEMENTDATA;
+our $sth_insert_handle_ACKNOWLEDGEMENTDATA;
 
 sub handle_ACKNOWLEDGEMENTDATA {
 
@@ -3230,9 +3270,9 @@
     return 1;
 }
 
-my $sth_insert_handle_STATECHANGEDATA;
-my $sth_select_downtimehist_handle_STATECHANGEDATA;
-my $sth_update_downtimehist_handle_STATECHANGEDATA;
+our $sth_insert_handle_STATECHANGEDATA;
+our $sth_select_downtimehist_handle_STATECHANGEDATA;
+our $sth_update_downtimehist_handle_STATECHANGEDATA;
 
 sub handle_STATECHANGEDATA {
 
@@ -3395,8 +3435,8 @@
     return 1;
 }
 
-my $sth_insert_handle_PROCESSDATA;
-my $sth_update_endtime_handle_PROCESSDATA;
+our $sth_insert_handle_PROCESSDATA;
+our $sth_update_endtime_handle_PROCESSDATA;
 
 sub handle_PROCESSDATA {
 
@@ -3513,9 +3553,9 @@
     return 1;
 }
 
-my $sth_update_handle_HOSTDEFINITION;
-my $sth_insert_handle_HOSTDEFINITION;
-my $sth_fetch_ID_handle_HOSTDEFINITION;
+our $sth_update_handle_HOSTDEFINITION;
+our $sth_insert_handle_HOSTDEFINITION;
+our $sth_fetch_ID_handle_HOSTDEFINITION;
 
 sub handle_HOSTDEFINITION {
 
@@ -3811,9 +3851,9 @@
     return 1;
 }
 
-my $sth_update_handle_SERVICEDEFINITION;
-my $sth_insert_handle_SERVICEDEFINITION;
-my $sth_fetch_ID_handle_SERVICEDEFINITION;
+our $sth_update_handle_SERVICEDEFINITION;
+our $sth_insert_handle_SERVICEDEFINITION;
+our $sth_fetch_ID_handle_SERVICEDEFINITION;
 
 sub handle_SERVICEDEFINITION {
 
@@ -4093,8 +4133,8 @@
     return 1;
 }
 
-my $sth_update_handle_SERVICEDEPENDENCYDEFINITION;
-my $sth_insert_handle_SERVICEDEPENDENCYDEFINITION;
+our $sth_update_handle_SERVICEDEPENDENCYDEFINITION;
+our $sth_insert_handle_SERVICEDEPENDENCYDEFINITION;
 
 sub handle_SERVICEDEPENDENCYDEFINITION {
 
@@ -4190,8 +4230,8 @@
     return 1;
 }
 
-my $sth_update_handle_COMMANDDEFINITION;
-my $sth_insert_handle_COMMANDDEFINITION;
+our $sth_update_handle_COMMANDDEFINITION;
+our $sth_insert_handle_COMMANDDEFINITION;
 
 sub handle_COMMANDDEFINITION {
 
@@ -4249,9 +4289,9 @@
     return 1;
 }
 
-my $sth_update_handle_TIMEPERIODDEFINITION;
-my $sth_insert_handle_TIMEPERIODDEFINITION;
-my $sth_fetch_ID_handle_TIMEPERIODDEFINITION;
+our $sth_update_handle_TIMEPERIODDEFINITION;
+our $sth_insert_handle_TIMEPERIODDEFINITION;
+our $sth_fetch_ID_handle_TIMEPERIODDEFINITION;
 
 sub handle_TIMEPERIODDEFINITION {
 
@@ -4345,8 +4385,8 @@
     return 1;
 }
 
-my $sth_update_handle_CONTACTDEFINITION;
-my $sth_insert_handle_CONTACTDEFINITION;
+our $sth_update_handle_CONTACTDEFINITION;
+our $sth_insert_handle_CONTACTDEFINITION;
 
 #my $sth_fetch_ID_handle_CONTACTDEFINITION;
 
@@ -4519,7 +4559,7 @@
     return 1;
 }
 
-my $sth_insert_handle_HOSTGROUPDEFINITION;
+our $sth_insert_handle_HOSTGROUPDEFINITION;
 
 sub handle_HOSTGROUPDEFINITION {
 
@@ -4569,7 +4609,7 @@
     return 1;
 }
 
-my $sth_insert_handle_CONTACTGROUPDEFINITION;
+our $sth_insert_handle_CONTACTGROUPDEFINITION;
 
 sub handle_CONTACTGROUPDEFINITION {
 
@@ -4623,6 +4663,59 @@
     return 1;
 }
 
+sub db_reconnect {
+
+    if ( defined $DB ) {
+        for my $sth ( @{ $DB->{ChildHandles} } ) {
+            next unless defined $sth;
+
+            $sth->finish;
+
+            # $sth is a tied hash
+            $sth->DESTROY;
+        }
+
+        $DB->STORE( CachedKids => {} );
+
+        $DB->disconnect;
+
+        undef $DB;
+    }
+
+    DB_RECONNECT: while () {
+
+        $LOGGER->warn( "Reconnecting to database" );
+        sleep 3;
+
+        eval { $_[ ARG_SELF() ]->db_connect(); };
+        if ($@) {
+            $LOGGER->fatal( "..reconnecting failed" );
+            next DB_RECONNECT;
+        }
+
+        reset_cached_statements();
+
+        return 1;
+    }
+
+    return 0;
+}
+
+sub reset_cached_statements {
+
+    # get package variables
+    my %pv = %Opsview::Utils::NDOLogsImporter::;
+
+    no strict 'refs';
+    for my $sth ( grep {/^sth_/} keys %pv ) {
+        my $v = \${ __PACKAGE__ . "::$sth" };
+        next unless defined $$v;
+        $$v->finish;
+        $$v->DESTROY;
+        undef $$v;
+    }
+}
+
 1;
 
 __END__

_______________________________________________
Opsview-checkins mailing list
Opsview-checkins@lists.opsview.org
http://lists.opsview.org/lists/listinfo/opsview-checkins

Reply via email to