When keeping the time zone designation table small,
also handle the case where an existing abbreviation
is a suffix of the newly arrived abbreviation,
so that only the latter abbreviation needs to be stored.
The only TZif file in the current data affected by this change
is Asia/Ho_Chi_Minh, which shrinks by 4 bytes because
the earlier abbreviation LMT is a suffix of a later one PLMT.
As a nice side effect, this patch also fixes an unlikely stack
buffer overflow reported privately by Arthur Chan.
* NEWS: Mention this.
* zic.c (writezone, addtype):
Handle an abbreviation being added when an existing abbreviation is
its suffix.  Without this patch both abbreviations would appear in
this table, but only the longer abbreviation needs to be present.
(checkabbr): New function, containing the checking part
of the old newabbr function.
(newabbr): Remove.  Change uses to checkabbr + addabbr.
(addabbr): New function, which is like the old newabbr’s
adding code, except it removes any existing abbreviation
that is a suffix of the new one.
---
 NEWS  |   6 ++++
 zic.c | 110 +++++++++++++++++++++++++++++++++++++++-------------------
 2 files changed, 81 insertions(+), 35 deletions(-)

diff --git a/NEWS b/NEWS
index 1a97cbfe..09a15519 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,12 @@ Unreleased, experimental changes
     "PST-167:59:58PDT-167:59:59,M11.5.6/-167:59:59,M12.5.6/-167:59:59",
     which can occur with adversarial input.  (Thanks to Naveed Khan.)
 
+    zic no longer generates a longer TZif file than necessary when
+    an earlier time zone abbreviation is a suffix of a later one.
+    As a nice side effect, zic no longer overflows a buffer when given
+    a long series of abbreviations, each a suffix of the next.
+    (Buffer overflow reported by Arthur Chan.)
+
     zic no longer overflows an int when processing input like ‘Zone
     Ouch 2147483648:00:00 - LMT’.  The int overflow can lead to buffer
     overflow in adversarial cases.  (Thanks to Naveed Khan.)
diff --git a/zic.c b/zic.c
index 6e859d09..7ecf3b91 100644
--- a/zic.c
+++ b/zic.c
@@ -253,12 +253,13 @@ symlink(char const *target, char const *linkname)
    (errno = ENOTSUP, -1)
 #endif
 
-static void    check_for_signal(void);
+static int     addabbr(char[TZ_MAX_CHARS], int *, char const *);
 static void    addtt(zic_t starttime, int type);
 static int     addtype(zic_t, char const *, bool, bool, bool);
-static void    leapadd(zic_t, int, int);
 static void    adjleap(void);
 static void    associate(void);
+static void    checkabbr(char const *);
+static void    check_for_signal(void);
 static void    dolink(const char *, const char *, bool);
 static int     getfields(char *, char **, int);
 static zic_t   gethms(const char * string, const char * errstring);
@@ -271,11 +272,11 @@ static void       inrule(char ** fields, int nfields);
 static bool    inzcont(char ** fields, int nfields);
 static bool    inzone(char ** fields, int nfields);
 static bool    inzsub(char **, int, bool);
-static int     itssymlink(char const *, int *);
 static bool    is_alpha(char a);
+static int     itssymlink(char const *, int *);
+static void    leapadd(zic_t, int, int);
 static char    lowerit(char);
 static void    mkdirs(char const *, bool);
-static void    newabbr(const char * abbr);
 static zic_t   oadd(zic_t t1, zic_t t2);
 static zic_t   omul(zic_t, zic_t);
 static void    outzone(const struct zone * zp, ptrdiff_t ntzones);
@@ -2936,30 +2937,27 @@ writezone(const char *const name, const char *const 
string, char version,
                            : i == thisdefaulttype ? old0 : i]
                      = thistypecnt++;
 
-               for (i = 0; i < sizeof indmap / sizeof indmap[0]; ++i)
-                       indmap[i] = -1;
                thischarcnt = stdcnt = utcnt = 0;
                for (i = old0; i < typecnt; i++) {
-                       register char * thisabbr;
-
                        if (omittype[i])
                                continue;
                        if (ttisstds[i])
                          stdcnt = thistypecnt;
                        if (ttisuts[i])
                          utcnt = thistypecnt;
-                       if (indmap[desigidx[i]] >= 0)
-                               continue;
-                       thisabbr = &chars[desigidx[i]];
-                       for (j = 0; j < thischarcnt; ++j)
-                               if (strcmp(&thischars[j], thisabbr) == 0)
-                                       break;
-                       if (j == thischarcnt) {
-                               strcpy(&thischars[thischarcnt], thisabbr);
-                               thischarcnt += strlen(thisabbr) + 1;
-                       }
-                       indmap[desigidx[i]] = j;
+                       addabbr(thischars, &thischarcnt, &chars[desigidx[i]]);
                }
+
+               /* Now that all abbrevs have been added to THISCHARS,
+                  it is safe to set INDMAP without worrying about
+                  whether the abbrevs might move later.  */
+               for (i = 0; i < TZ_MAX_CHARS; i++)
+                 indmap[i] = -1;
+               for (i = old0; i < typecnt; i++)
+                 if (!omittype[i] && indmap[desigidx[i]] < 0)
+                   indmap[desigidx[i]] = addabbr(thischars, &thischarcnt,
+                                                 &chars[desigidx[i]]);
+
                if (pass == 1 && !want_bloat()) {
                  hicut = thisleapexpiry = false;
                  pretranstype = -1;
@@ -3782,6 +3780,7 @@ static int
 addtype(zic_t utoff, char const *abbr, bool isdst, bool ttisstd, bool ttisut)
 {
        register int    i, j;
+       int charcnt0;
 
        /* RFC 9636 section 3.2 specifies this range for utoff.  */
        if (! (-TWO_31_MINUS_1 <= utoff && utoff <= TWO_31_MINUS_1)) {
@@ -3791,12 +3790,18 @@ addtype(zic_t utoff, char const *abbr, bool isdst, bool 
ttisstd, bool ttisut)
        if (!want_bloat())
          ttisstd = ttisut = false;
 
-       for (j = 0; j < charcnt; ++j)
-               if (strcmp(&chars[j], abbr) == 0)
-                       break;
-       if (j == charcnt)
-               newabbr(abbr);
-       else {
+       checkabbr(abbr);
+
+       charcnt0 = charcnt;
+       j = addabbr(chars, &charcnt, abbr);
+       if (charcnt0 < charcnt) {
+         /* If an abbreviation was inserted, increment indexes no
+            earlier than the insert by the size of the insertion,
+            so that they continue to point to the same contents.  */
+         for (i = 0; i < typecnt; i++)
+           if (j <= desigidx[i])
+             desigidx[i] += charcnt - charcnt0;
+       } else {
          /* If there's already an entry, return its index.  */
          for (i = 0; i < typecnt; i++)
            if (utoff == utoffs[i] && isdst == isdsts[i] && j == desigidx[i]
@@ -4181,10 +4186,8 @@ will not work with pre-2004 versions of zic"));
 }
 
 static void
-newabbr(const char *string)
+checkabbr(char const *string)
 {
-       register int    i;
-
        if (strcmp(string, GRANDPARENTED) != 0) {
                register const char *   cp;
                const char *            mp;
@@ -4203,13 +4206,50 @@ mp = _("time zone abbreviation differs from POSIX 
standard");
                if (mp != NULL)
                        warning("%s (%s)", mp, string);
        }
-       i = strnlen(string, TZ_MAX_CHARS - charcnt) + 1;
-       if (charcnt + i > TZ_MAX_CHARS) {
-               error(_("too many, or too long, time zone abbreviations"));
-               exit(EXIT_FAILURE);
-       }
-       strcpy(&chars[charcnt], string);
-       charcnt += i;
+}
+
+/* Put into CHS, which currently contains *PNCHS bytes containing
+   NUL-terminated abbreviations none of which are suffixes of another,
+   the abbreviation ABBR including its trailing NUL.
+   If ABBR does not already appear in CHS,
+   possibly as a suffix of an existing abbreviation,
+   add ABBR to CHS, remove from CHS any abbreviation
+   that is a suffix of ABBR, and increment *PNCHS accordingly.
+   Return the index of ABBR after any modifications to CHS are made.
+
+   If all abbreviations have already been added, this function
+   lets the caller look up the index of an existing abbreviation.  */
+static int
+addabbr(char chs[TZ_MAX_CHARS], int *pnchs, char const *abbr)
+{
+  int nchs = *pnchs;
+  int alen = strlen(abbr), nchs_incr = alen + 1;
+  int i;
+  for (i = 0; i < nchs; ) {
+    int clen = strlen(&chs[i]);
+    if (alen <= clen) {
+      /* If ABBR is a suffix of an abbreviation in CHS,
+        return the index of ABBR in CHS.  */
+      int isuff = i + (clen - alen);
+      if (memcmp(&chs[isuff], abbr, alen) == 0)
+       return isuff;
+    } else if (memcmp(&chs[i], &abbr[alen - clen], clen) == 0) {
+      /* An abbreviation in CHS is a substring of ABBR.
+        Replace it with ABBR, instead of the more-common
+        actions of appending ABBR or doing nothing.  */
+      nchs_incr = alen - clen;
+      break;
+    }
+    i += clen + 1;
+  }
+  if (TZ_MAX_CHARS < nchs + nchs_incr) {
+    error(_("too many, or too long, time zone abbreviations"));
+    exit(EXIT_FAILURE);
+  }
+  memmove(&chs[i + nchs_incr], &chs[i], nchs - i);
+  memcpy(&chs[i], abbr, nchs_incr);
+  *pnchs = nchs + nchs_incr;
+  return i;
 }
 
 /* Ensure that the directories of ARGNAME exist, by making any missing
-- 
2.53.0

Reply via email to