(cloudberry) 09/24: Fix gplogfilter csv generation

yjhjstz Wed, 25 Dec 2024 15:39:14 -0800

This is an automated email from the ASF dual-hosted git repository.

yjhjstz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit 4fa189964265c550568bfbd315a23f872fad7eb6
Author: t1mursadykov <[email protected]>
AuthorDate: Tue Aug 16 16:12:15 2022 -0700

    Fix gplogfilter csv generation
    
    The result of gplogfilter is ambiguously perceived by
    parsers. To fix this, the standard csv.writer class is
    used to generate csv.
    
    Reviewed-by: Jamie McAtamney <[email protected]>
    Reviewed-by: SmartKeyerror <[email protected]>
---
 gpMgmt/bin/gppylib/logfilter.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/gpMgmt/bin/gppylib/logfilter.py b/gpMgmt/bin/gppylib/logfilter.py
index 2027383795..c427ac1a6c 100644
--- a/gpMgmt/bin/gppylib/logfilter.py
+++ b/gpMgmt/bin/gppylib/logfilter.py
@@ -42,11 +42,15 @@ Module contents:
     spiffInterval() - get begin/end datetime given any subset of 
begin/end/duration
 """
 
+import io
+import csv
 from datetime import date, datetime
 import re
 import sys
 import time
 
+csvDelimeter = '|'
+
 timestampPattern = re.compile(r'\d\d\d\d-\d\d-\d\d \d\d:\d\d:\d\d(\.\d*)?')
 # This pattern matches the date and time stamp at the beginning of a line
 # in a GPDB log file.  The timestamp format is: YYYY-MM-DD HH:MM:SS[.frac]
@@ -264,6 +268,8 @@ class CsvFlatten(object):
 
     def __init__(self,iterable):
         self.source = iter(iterable)
+        self.buffer = io.StringIO()
+        self.writer = csv.writer(self.buffer, delimiter=csvDelimeter, 
quotechar='"', quoting=csv.QUOTE_MINIMAL)
 
     def __iter__(self):
         return self
@@ -273,8 +279,11 @@ class CsvFlatten(object):
         #we need to make a minor format change to the log level field so that
         # our single regex will match both.
         item[16] = item[16] + ": "
-        return '|'.join(item) + "\n"
 
+        self.buffer.truncate(0)
+        self.writer.writerow(item)
+
+        return self.buffer.getvalue()
 
 #------------------------------- Spying --------------------------------
 
@@ -697,13 +706,13 @@ def MatchColumns(iterable, cols):
                 n = 1
                 out = []
 
-                for c in s.split('|'):
+                for c in csv.reader(s, delimiter=csvDelimeter, quotechar='"'):
                     if n in cols:
                         out.append(c)
                     n += 1
                 if len(out):
                     #print out
-                    ret.append('|'.join(out) + "\n")
+                    ret.append(csvDelimeter.join(out) + "\n")
             yield ret
 
 #-------------------------------- Slicing --------------------------------


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 09/24: Fix gplogfilter csv generation

Reply via email to