(cloudberry) 06/20: Resolve FIXMEs in datetime.c

maxyang Mon, 13 Jan 2025 17:01:31 -0800

This is an automated email from the ASF dual-hosted git repository.

maxyang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git


commit eba05de737eea2e62926749bdb729acc55dc6bfb
Author: Huansong Fu <[email protected]>
AuthorDate: Thu Jun 8 13:17:11 2023 -0700

    Resolve FIXMEs in datetime.c
    
    The old GPDB version of the calculation (added in 
059ebb1da10b1f15487ba74dd1bada185093bbf2)
    and the new upstream version (added in aa2387e2fd53) are essentially the
    same: both were trying to optimize away sprintf and just do a byte-by-byte
    conversion. Similar to what upstream does for aa2387e2fd53, I did a
    comparison between the old GPDB version and the upstream version:
    
    ```
    -- pre-requisite:
    create table t(a int, b timestamp);
    insert into t select 0, '2018-01-01 10:10:00.1'::timestamp from 
generate_series(1,10000000);
    \timing on
    -- and measure the copy performance:
    copy t to '/tmp/copyto.csv';
    ```
    
    This would just test function AppendSeconds() but the other two functions
    should have similar results.
    
    1. Old AppendSeconds() (note: added back a missing function 
TrimTrailingZeros())
    ```
    postgres=# copy t to '/tmp/copyto.csv';
    COPY 10000000
    Time: 7232.761 ms (00:07.233)
    postgres=# copy t to '/tmp/copyto.csv';
    COPY 10000000
    Time: 7137.877 ms (00:07.138)
    postgres=# copy t to '/tmp/copyto.csv';
    COPY 10000000
    Time: 7189.453 ms (00:07.189)
    ```
    average 7186 ms
    
    2. New AppendSeconds()
    ```
    postgres=# copy t to '/tmp/copyto.csv';
    COPY 10000000
    Time: 7281.391 ms (00:07.281)
    postgres=# copy t to '/tmp/copyto.csv';
    COPY 10000000
    Time: 7231.939 ms (00:07.232)
    postgres=# copy t to '/tmp/copyto.csv';
    COPY 10000000
    Time: 7211.126 ms (00:07.211)
    ```
    average 7241 ms
    
    The difference is almost negligible.
    
    In addition, the old AppendSeconds() has some limitations:
    * Suprisingly, it does not even handle precision at all. It used to handle
    it for non-HAVE_INT64_TIMESTAMP case but since that case has gone in 
b9d092c962ea3262930e3c31a8c3d79b66ce9d43
    the precision parameter is completely ignored.
    * It makes an assumption about a two-digit seconds. It might not be a big
    deal in most cases but considering the many callsites of AppendSeconds()
    it's still better to make it more robust.
    
    Using upstream version is also also better in terms of getting more
    upstream optimization in future.
---
 src/backend/utils/adt/datetime.c | 68 +---------------------------------------
 1 file changed, 1 insertion(+), 67 deletions(-)

diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 841013b5d3..dc06cbfd7d 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -446,31 +446,6 @@ AppendSeconds(char *cp, int sec, fsec_t fsec, int 
precision, bool fillzeros)
 {
        Assert(precision >= 0);
 
-       /* GPDB_96_MERGE_FIXME: We had this faster version in GPDB. PostgreSQL
-        * also added faster versions in commit aa2387e2fd. Performance test is
-        * the old GPDB variants are even faster, or if we could drop the diff
-        * and just use upstream code. For now, the GPDB version is disabled
-        * and we use the upstream code.
-        */
-#if 0
-               int                     j = 0;
-
-               if (fillzeros || abs(sec)  > 9)
-                       cp[j++] = abs(sec)  / 10 + '0';
-               cp[j++] = abs(sec)  % 10 + '0';
-               cp[j++] = '.';
-               cp[j++] =  ((int) Abs(fsec) )/ 100000 + '0';
-               cp[j++] = ((int) Abs(fsec) ) / 10000 % 10 + '0';
-               cp[j++] = ((int) Abs(fsec) ) / 1000 % 10 + '0';
-               cp[j++] = ((int) Abs(fsec) ) / 100 % 10 + '0';
-               cp[j++] = ((int) Abs(fsec) ) / 10 % 10 + '0';
-               cp[j++] = ((int) Abs(fsec) ) % 10 + '0';
-               cp[j] = '\0';
-
-#endif
-
-       /* fsec_t is just an int32 */
-
        if (fillzeros)
                cp = pg_ultostr_zeropad(cp, Abs(sec), 2);
        else
@@ -3978,31 +3953,13 @@ EncodeDateOnly(struct pg_tm *tm, int style, char *str)
                case USE_ISO_DATES:
                case USE_XSD_DATES:
                        /* compatible with ISO date formats */
-                       /* GPDB_96_MERGE_FIXME: We had this faster version in 
GPDB. PostgreSQL
-                        * also added faster versions in commit aa2387e2fd. 
Performance test is
-                        * the old GPDB variants are even faster, or if we 
could drop the diff
-                        * and just use upstream code. For now, the GPDB 
version is disabled
-                        * and we use the upstream code.
-                        */
-#if 0
-                       if (tm->tm_year > 0)
-                       {
-                               //                              sprintf(str, 
"%04d-%02d-%02d",
-                               //              tm->tm_year, tm->tm_mon, 
tm->tm_mday);
-                               int j = 0;
-                               fast_encode_date(tm, str, &j);
-                       }
-                       else
-                               sprintf(str, "%04d-%02d-%02d %s",
-                                               -(tm->tm_year - 1), tm->tm_mon, 
tm->tm_mday, "BC");
-#else
                        str = pg_ultostr_zeropad(str,
                                                                         
(tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4);
                        *str++ = '-';
                        str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
                        *str++ = '-';
                        str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
-#endif
+
                        break;
 
                case USE_SQL_DATES:
@@ -4075,28 +4032,6 @@ EncodeDateOnly(struct pg_tm *tm, int style, char *str)
 void
 EncodeTimeOnly(struct pg_tm *tm, fsec_t fsec, bool print_tz, int tz, int 
style, char *str)
 {
-       /* GPDB_96_MERGE_FIXME: We had this faster version in GPDB. PostgreSQL
-        * also added faster versions in commit aa2387e2fd. Performance test is
-        * the old GPDB variants are even faster, or if we could drop the diff
-        * and just use upstream code. For now, the GPDB version is disabled
-        * and we use the upstream code.
-        *
-        * If we still need the old GPDB version, make sure it was actually 
correct.
-        * It seems to ignore the 'print_tz' argument...
-        */
-#if 0
-       str[0] = tm->tm_hour/10 + '0';
-       str[1] = tm->tm_hour % 10 + '0';
-       str[2] = ':';
-       str[3] = tm->tm_min/10 + '0';
-       str[4] = tm->tm_min % 10 + '0';
-       str[5] = ':';
-       str[6] = '\0';
-       str += strlen(str);
-
-       AppendSeconds(str, tm->tm_sec, fsec, MAX_TIME_PRECISION, true);
-#else
-
        str = pg_ultostr_zeropad(str, tm->tm_hour, 2);
        *str++ = ':';
        str = pg_ultostr_zeropad(str, tm->tm_min, 2);
@@ -4105,7 +4040,6 @@ EncodeTimeOnly(struct pg_tm *tm, fsec_t fsec, bool 
print_tz, int tz, int style,
        if (print_tz)
                str = EncodeTimezone(str, tz, style);
        *str = '\0';
-#endif
 }
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(cloudberry) 06/20: Resolve FIXMEs in datetime.c

Reply via email to