I discovered UnreadChar and so now mode 2 (indented follow-up lines) has a
chance of working. again compile tested (and visually code-reviewed by
someone else), but not executed.
David Lang
On Mon, 13 Dec 2010, [email protected] wrote:
This is a first cut of a modification to imfile to let it read multi-line
files.
As-is, this should have no effect on a system as it hard-codes the mode to
reading single lines (I really don't understand how to set a config variable,
but for someone who does, it should be simple to replace the '0' in imfile.c
with the value of the config file)
With this config option change, it should be possible to real logfiles that
have blank lines between multi-line log entries and have those log entries
treated as a single line.
I also have code in place (but disabled) to try and deal with the more
complicated layout where all lines after the first one are indented if they
are part of the same log entry. The problem I have is that when I discover
that I have finished reading a log entry I have already read the first
character of the next log entry. This extra character needs to be put pack
into the input buffer, but I don't know if that is possible or not. If this
isn't the case, I need a function that will let me peek at the next character
in the input buffer and make my decision based on that.
This compiles, but I have not tested it anywhere yet. with the hardcoded mode
0 for ('LF termination), there should be no change other than an extra test
against a constant for each character read from a file.
David Lang
diff --git a/plugins/imfile/imfile.c b/plugins/imfile/imfile.c
index 8a10e26..72d6e69 100644
--- a/plugins/imfile/imfile.c
+++ b/plugins/imfile/imfile.c
@@ -206,7 +206,7 @@ static rsRetVal pollFile(fileInfo_t *pThis, int *pbHadFileData)
/* loop below will be exited when strmReadLine() returns EOF */
while(1) {
- CHKiRet(strm.ReadLine(pThis->pStrm, &pCStr));
+ CHKiRet(strm.ReadMultiLine(pThis->pStrm, &pCStr, 0));
*pbHadFileData = 1; /* this is just a flag, so set it and forget it */
CHKiRet(enqLine(pThis, pCStr)); /* process line */
rsCStrDestruct(&pCStr); /* discard string (must be done by us!) */
diff --git a/runtime/stream.c b/runtime/stream.c
index b429576..00ee984 100644
--- a/runtime/stream.c
+++ b/runtime/stream.c
@@ -554,6 +554,75 @@ static rsRetVal strmUnreadChar(strm_t *pThis, uchar c)
return RS_RET_OK;
}
+/* read a 'paragraph' from a strm file. This was modified from ReadLine
+ * A paragraph may be terminated by a LF, by a LFLF, or by LF<not whitespace> depending on the option set.
+ * The termination LF characters are read, but are
+ * not returned in the buffer (it is discared). The caller is responsible for
+ * destruction of the returned CStr object! -- dlang 2010-12-13
+ */
+static rsRetVal
+strmReadMultiLine(strm_t *pThis, cstr_t **ppCStr, uchar mode)
+{
+ /* mode = 0 single line mode (equivalent to ReadLine)
+ * mode = 1 LFLF mode (paragraph, blank line between entries)
+ * mode = 3 LF <not whitespace> mode, a log line starts at the beginning of a line, but following lines that are indented are part of the same log entry
+ * This modal interface is not nearly as flexible as being able to define a regex for when a new record starts, but it's also not nearly as hard (or as slow) to implement
+ */
+ DEFiRet;
+ uchar c;
+
+ ASSERT(pThis != NULL);
+ ASSERT(ppCStr != NULL);
+
+ CHKiRet(cstrConstruct(ppCStr));
+
+ /* now read the line */
+ CHKiRet(strmReadChar(pThis, &c));
+ if (mode == 0){
+ while(c != '\n') {
+ CHKiRet(cstrAppendChar(*ppCStr, c));
+ CHKiRet(strmReadChar(pThis, &c));
+ }
+ CHKiRet(cstrFinalize(*ppCStr));
+ }
+ if (mode == 1){
+ while(c != '\n') {
+ CHKiRet(cstrAppendChar(*ppCStr, c));
+ CHKiRet(strmReadChar(pThis, &c));
+ }
+ } else {
+ if ((*ppCStr)->iStrLen > 0 && *((*ppCStr)->pBuf - 1) == '\n'){
+ rsCStrTruncate(*ppCStr,1); /* remove the prior newline */
+ CHKiRet(cstrFinalize(*ppCStr));
+ } else {
+ CHKiRet(cstrAppendChar(*ppCStr, c));
+ CHKiRet(strmReadChar(pThis, &c));
+ }
+ }
+ if (mode == 2){
+ if ((*ppCStr)->iStrLen > 0 && *((*ppCStr)->pBuf - 1) == '\n'){
+ if (c != ' ' && c != '\t'){
+ /* clean things up by putting the character we just read back into the input buffer and removing the LF character that is currently at the end of the output string */
+ strmUnreadChar(pThis, c);
+ rsCStrTruncate(*ppCStr,1);
+ CHKiRet(cstrFinalize(*ppCStr));
+ } else {
+ CHKiRet(cstrAppendChar(*ppCStr, c));
+ CHKiRet(strmReadChar(pThis, &c));
+ }
+ } else {
+ CHKiRet(cstrAppendChar(*ppCStr, c));
+ CHKiRet(strmReadChar(pThis, &c));
+ }
+ }
+
+finalize_it:
+ if(iRet != RS_RET_OK && *ppCStr != NULL)
+ cstrDestruct(ppCStr);
+
+ RETiRet;
+}
+
/* read a line from a strm file. A line is terminated by LF. The LF is read, but it
* is not returned in the buffer (it is discared). The caller is responsible for
diff --git a/runtime/stream.h b/runtime/stream.h
index 37e9d57..989e9b6 100644
--- a/runtime/stream.h
+++ b/runtime/stream.h
@@ -156,6 +156,7 @@ BEGINinterface(strm) /* name must also be changed in ENDinterface macro! */
rsRetVal (*SetFileName)(strm_t *pThis, uchar *pszName, size_t iLenName);
rsRetVal (*ReadChar)(strm_t *pThis, uchar *pC);
rsRetVal (*UnreadChar)(strm_t *pThis, uchar c);
+ rsRetVal (*ReadMultiLine)(strm_t *pThis, cstr_t **ppCStr, uchar mode);
rsRetVal (*ReadLine)(strm_t *pThis, cstr_t **ppCStr);
rsRetVal (*SeekCurrOffs)(strm_t *pThis);
rsRetVal (*Write)(strm_t *pThis, uchar *pBuf, size_t lenBuf);
_______________________________________________
rsyslog mailing list
http://lists.adiscon.net/mailman/listinfo/rsyslog
http://www.rsyslog.com