On Fri, Nov 12, 2010 at 05:04:35PM +0100, bs_li...@aakef.fastmail.fm wrote:
> [Sorry for sending it twice, but first time I used a sender address, 
> which is not subscribed to the mailing list.]
> 
> Hi all,
> 
> the following patches are to better handle bug 2470 and have some generic 
> improvements. 
> 
> 
> Changes to v1 sent in September: 
> Split the first patch into 3 parts
> 
> Sorry that that took so long, but I didn't find a minute before

[PATCH 01 of 10] cl_log: Make functions static
[PATCH 02 of 10] cl_logd: Remove childlog_dump_ipcmsg_stats
[PATCH 03 of 10] cl_log: Remove CircularBuffer
[PATCH 09 of 10] cl_log: Fix white space

        Applied without changes.
        oh, I should have fixed the cl_logd to ha_logd in the commit message.
        too late ;-)

[PATCH 05 of 10] ha_logd: Use C99 initializers, also correct max entity string 
length

        Applied with minimal changes.

================================
[PATCH 10 of 10] ha_logd: Add a SIGHUP signal handler to close/open log files
[PATCH 08 of 10] cl_log: Restore old logfile open/seek/write/close behaviour

        Conceptually changed.
        Proposed patches attached.
        Note: not even compile tested :(

        Please try/comment.
================================

[PATCH 07 of 10] ha_logd: New option to disable syslog logging

        Dropped.
        facility none is already available for that.

[PATCH 04 of 10] cl_log: Simplify a function

        Dropped for now, may be revised with the "common log entity",
        if we actually get to it.

[PATCH 06 of 10] cl_log: Always print the common log entity to syslog messages

        Postponed. I may propose a patch later today/tomorrow.

        The desired functionality should in most cases be achieved by
        using the log facility.  There are LOCAL0 to LOCAL7 reserved
        facilities, I dare say that in most deployments one of them can
        be used exclusively for cluster logging.

-- 
: Lars Ellenberg
: LINBIT | Your Way to High Availability
: DRBD/HA support and consulting http://www.linbit.com

DRBD® and LINBIT® are registered trademarks of LINBIT, Austria.
# HG changeset patch
# User Lars Ellenberg <l...@linbit.com>
# Date 1289982555 -3600
# Node ID 0d37ebc650ac67e86df8e5058a6d75a6ad3e8311
# Parent  4a748dfb780e81913f05a4e241ce82b38130cdd6
cl_log: reopen logfiles on inode change (logrotate), default to unbuffered io

This patch improves on ada347da564d, by adding support for logrotate even
without signal handlers, comparing the current stat inode info with the stored
one, and reopen files if that has changed (or the file has been moved).

Since we now can keep the log file streams open for a potentially very long time,
and the implicit flush we used to get from the fclose is missing,
default to use unbuffered IO.

Close logfile handles, if we start to log to the daemon,
or on file name or stdio buffer settings change.

Signed-off-by: Lars Ellenberg <l...@linbit.com>

diff --git a/lib/clplumbing/cl_log.c b/lib/clplumbing/cl_log.c
--- a/lib/clplumbing/cl_log.c
+++ b/lib/clplumbing/cl_log.c
@@ -80,6 +80,7 @@ static int		conn_logd_time = 0;
 static char		cl_log_entity[MAXENTITY]= DFLT_ENTITY;
 static char		common_log_entity[MAXENTITY]= DFLT_ENTITY;
 static int		cl_log_facility = LOG_USER;
+static int		use_buffered_io = 0;
 
 static void		cl_opensyslog(void);
 static int		syslog_enabled = 0;
@@ -103,6 +104,10 @@ IPC_Channel* get_log_chan(void){
 }
 /*************************/
 
+/* We keep the file handles open for a potentially very long time.
+ * Sometimes we need to close them explicitly. */
+static void close_log_files(void);
+
 /**************************
  * check if the fd is in use for logging
  **************************/
@@ -138,6 +143,12 @@ cl_log_enable_syslog_filefmt(int truefal
 {
 	syslogformatfile = (gboolean)truefalse;
 }
+void
+cl_log_use_buffered_io(int truefalse)
+{
+	use_buffered_io = truefalse;
+	close_log_files();
+}
 
 gboolean
 cl_log_get_uselogd(void)
@@ -403,6 +414,7 @@ cl_log_set_logfile(const char *	path)
 		path = NULL;
 	}
 	logfile_name = path;
+	close_log_files();
 }
 void
 cl_log_set_debugfile(const char * path)
@@ -411,6 +423,7 @@ cl_log_set_debugfile(const char * path)
 		path = NULL;
 	}
 	debugfile_name = path;
+	close_log_files();
 }
 
 
@@ -502,18 +515,89 @@ append_log(FILE * fp, const char * entit
 	,	msg);
 }
 
-/*
- * Just open the given file name
+/* As performance optimization we try to keep the file descriptor
+ * open all the time, but as logrotation needs to work, the calling
+ * program actually needs a signal handler.
+ *
+ * To be able to keep files open even without signal handler,
+ * we remember the stat info, and close/reopen if the inode changed.
+ * We keep the number of stat() calls to one per file per minute.
+ * logrotate should be configured for delayed compression, if any.
  */
-static FILE * 
-open_log_file(const char * fname)
+
+struct log_file_context {
+	FILE *fp;
+	struct stat stat_buf;
+};
+
+static void close_log_file(struct log_file_context *lfc)
 {
-	FILE * fp = fopen(fname ,"a");
-	if (!fp) {
+	/* ignore errors, we cannot do anything about them anyways */
+	fflush(lfc->fp);
+	fsync(fileno(lfc->fp));
+	fclose(lfc->fp);
+	lfc->fp = NULL;
+}
+
+static void close_log_files(void)
+{
+	if (log_file.fp)
+		close_log_file(&log_file);
+	if (debug_file.fp)
+		close_debug_file(&log_file);
+}
+
+static void maybe_close_log_file(const char *fname, struct log_file_context *lfc)
+{
+	struct stat buf;
+	if (!lcf->fp)
+		return;
+	if (stat(fname, &log_sb) || log_sb.st_ino != lfc->stat_buf.st_ino)
+		close_log_file(lfc);
+}
+
+/* Default to unbuffered IO.  logd or others can use cl_log_use_buffered_io(1)
+ * to enable fully buffered mode, and then use fflush appropriately.
+ */
+static void open_log_file(const char *fname, struct log_file_context *lfc)
+{
+	lfc->fp = fopen(fname ,"a");
+	if (!lfc->fp) {
 		syslog(LOG_ERR, "Failed to open log file %s: %s\n" , 
 		       fname, strerror(errno)); 
+	} else {
+		setvbuf(lfc->fp, NULL,
+				use_buffered_io ? _IOFBF : _IONBF,
+				BUFSIZ);
+		fstat(fileno(lfc->fp), &lfc->stat_buf);
 	}
-	return fp;
+	return lfc->fp;
+}
+
+static struct log_file_context log_file, debug_file;
+
+static void maybe_reopen_log_files(const char *log_fname, const char *debug_fname)
+{
+	static TIME_T last_stat_time;
+	struct stat log_sb, dbg_sb;
+	TIME_T now = time(NULL);
+	
+	if (re_open_logfiles) {
+		close_log_files();
+		re_open_logfiles = 0;
+	} else if (now - last_stat_time < 61) {
+		/* Don't use an exact minute,
+		 * have it jitter around a bit against cron or others. */
+		maybe_close_log_file(log_fname, &log_file);
+		maybe_close_log_file(debug_fname, &debug_file);
+		last_stat_time = now;
+	}
+
+	if (log_fname && !log_file.fp)
+		open_log_file(log_fname, &log_file);
+
+	if (debug_fname && !debug_file.fp)
+		open_log_file(debug_fname, &debug_file);
 }
 
 /*
@@ -557,29 +641,13 @@ cl_direct_log(int priority, const char* 
 		}
 	}
 
-	if (debugfile_name != NULL) {
-		static FILE * debug_fp = NULL;
-		if (!debug_fp) {
-			/* As performance optimization we keep the file-handle
-			 * open all the time */
-			debug_fp = open_log_file(debugfile_name);
-		}
-		if (debug_fp)
-			append_log(debug_fp ,entity, entity_pid, ts, pristr, 
-				   buf);
-	}
+	maybe_reopen_log_files(logfile_name, debugfile_name);
 
-	if (priority != LOG_DEBUG && logfile_name != NULL) {
-		static FILE * log_fp = NULL;
-		if (!log_fp) {
-			/* As performance optimization we keep the file-handle
-			 * open all the time */
-			log_fp = open_log_file(logfile_name);
-		}
-		if (log_fp)
-			append_log(log_fp ,entity, entity_pid, ts, pristr, 
-				   buf);
-	}
+	if (debug_file.fp)
+		append_log(debug_file.fp, entity, entity_pid, ts, pristr, buf);
+
+	if (priority != LOG_DEBUG && log_file.fp)
+		append_log(log_file.fp, entity, entity_pid, ts, pristr, buf);
 
 	if (needprivs) {
 		return_to_dropped_privs();
@@ -805,6 +873,10 @@ LogToLoggingDaemon(int priority, const c
 	int			sendrc = IPC_FAIL;
 	int			intval = conn_logd_time;
 	
+	/* make sure we don't hold file descriptors open
+	 * we don't intend to use again */
+	close_log_files();
+
 	if (chan == NULL) {
 		longclock_t	lnow = time_longclock();
 		

# HG changeset patch
# User Lars Ellenberg <l...@linbit.com>
# Date 1289982558 -3600
# Node ID 86554a4bbd50f23c2c7c2183740d85369510ee5d
# Parent  0d37ebc650ac67e86df8e5058a6d75a6ad3e8311
Medium: logd use bufferd io with fflush and fsync

For performance, use buffered IO with logd, and batch the current message
backlog together with one fflush. This should keep throughput high
while still flushing it to the log files in time.

It will add an fsync if priority ERROR or worse have been processed,
which again may hurt performance, but may be our last chance to get
messages on stable storage before this node is fenced.

diff --git a/include/clplumbing/cl_log.h b/include/clplumbing/cl_log.h
--- a/include/clplumbing/cl_log.h
+++ b/include/clplumbing/cl_log.h
@@ -34,6 +34,15 @@ extern int		debug_level;
 #define	DEBUGPKT	(debug_level >= 4)
 #define	DEBUGPKTCONT	(debug_level >= 5)
 
+/* cl_log_use_buffered_io and cl_log_do_fflush as optimization for logd,
+ * so it may buffer a few message lines, then fflush them out in one write.
+ * Set do_fsync != 0, if you even want it to fsync. */
+void		cl_log_do_fflush(int do_fsync);
+void		cl_log_use_buffered_io(int truefalse);
+/* We now keep the file handles open for a potentially very long time.
+ * Sometimes we may need to close them explicitly. */
+void		cl_log_close_log_files(void);
+
 void		cl_direct_log(int priority, const char* buf, gboolean, const char*, int, TIME_T);
 void            cl_log(int priority, const char * fmt, ...) G_GNUC_PRINTF(2,3);
 void            cl_perror(const char * fmt, ...) G_GNUC_PRINTF(1,2);
diff --git a/lib/clplumbing/cl_log.c b/lib/clplumbing/cl_log.c
--- a/lib/clplumbing/cl_log.c
+++ b/lib/clplumbing/cl_log.c
@@ -104,10 +104,6 @@ IPC_Channel* get_log_chan(void){
 }
 /*************************/
 
-/* We keep the file handles open for a potentially very long time.
- * Sometimes we need to close them explicitly. */
-static void close_log_files(void);
-
 /**************************
  * check if the fd is in use for logging
  **************************/
@@ -147,7 +143,13 @@ void
 cl_log_use_buffered_io(int truefalse)
 {
 	use_buffered_io = truefalse;
-	close_log_files();
+	cl_log_close_log_files();
+}
+void
+cl_log_use_buffered_io(int truefalse)
+{
+	use_buffered_io = truefalse;
+	re_open_logfiles = 1;
 }
 
 gboolean
@@ -414,7 +416,7 @@ cl_log_set_logfile(const char *	path)
 		path = NULL;
 	}
 	logfile_name = path;
-	close_log_files();
+	cl_log_close_log_files();
 }
 void
 cl_log_set_debugfile(const char * path)
@@ -423,7 +425,7 @@ cl_log_set_debugfile(const char * path)
 		path = NULL;
 	}
 	debugfile_name = path;
-	close_log_files();
+	cl_log_close_log_files();
 }
 
 
@@ -539,7 +541,7 @@ static void close_log_file(struct log_fi
 	lfc->fp = NULL;
 }
 
-static void close_log_files(void)
+void cl_log_close_log_files(void)
 {
 	if (log_file.fp)
 		close_log_file(&log_file);
@@ -583,7 +585,7 @@ static void maybe_reopen_log_files(const
 	TIME_T now = time(NULL);
 	
 	if (re_open_logfiles) {
-		close_log_files();
+		cl_log_close_log_files();
 		re_open_logfiles = 0;
 	} else if (now - last_stat_time < 61) {
 		/* Don't use an exact minute,
@@ -656,7 +658,19 @@ cl_direct_log(int priority, const char* 
 	return;
 }
 
-
+void cl_log_do_fflush(int do_fsync)
+{
+	if (log_file.fp) {
+		fflush(log_file.fp);
+		if (do_fsync)
+			fsync(fileno(log_file.fp));
+	}
+	if (debug_file.fp) {
+		fflush(debug_file.fp);
+		if (do_fsync)
+			fsync(fileno(debug_file.fp));
+	}
+}
 
 /*
  * This function can cost us realtime unless use_logging_daemon
@@ -875,7 +889,7 @@ LogToLoggingDaemon(int priority, const c
 	
 	/* make sure we don't hold file descriptors open
 	 * we don't intend to use again */
-	close_log_files();
+	cl_log_close_log_files();
 
 	if (chan == NULL) {
 		longclock_t	lnow = time_longclock();
diff --git a/logd/ha_logd.c b/logd/ha_logd.c
--- a/logd/ha_logd.c
+++ b/logd/ha_logd.c
@@ -755,10 +755,9 @@ read_msg_process(IPC_Channel* chan)
 static gboolean
 direct_log(IPC_Channel* ch, gpointer user_data)
 {
-	
 	IPC_Message*		ipcmsg;
 	GMainLoop*		loop;
-	
+	int			pri = LOG_DEBUG + 1;
 
 	loop =(GMainLoop*)user_data;
 	
@@ -796,7 +795,9 @@ direct_log(IPC_Channel* ch, gpointer use
 			,	copy.use_pri_str
 			,	copy.entity, copy.entity_pid
 			,	copy.timestamp);
-		
+
+			if (copy.priority < pri)
+				pri = copy.priority;
 
 			(void)logd_log;
 /*
@@ -815,6 +816,11 @@ direct_log(IPC_Channel* ch, gpointer use
 		}
 		
 	}
+	/* current message backlog processed,
+	 * about to return to mainloop,
+	 * fflush and potentially fsync stuff */
+	cl_log_do_fflush(pri <= LOG_ERR);
+
 	if(needs_shutdown) {
 		cl_log(LOG_INFO, "Exiting write process");
 		g_main_quit(loop);
@@ -1013,6 +1019,7 @@ main(int argc, char** argv, char** envp)
 		return -1;
 	case 0:
 		/*child*/
+		cl_log_use_buffered_io(1);
 		set_proc_title("ha_logd: write process");
 		write_msg_process(chanspair[WRITE_PROC_CHAN]);		
 		break;

_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to