Bug#928111: [pre-approval] unblock: icu/63.2-1

GCS Sun, 16 Jun 2019 13:39:18 -0700

Hi Paul,

On Sun, Jun 16, 2019 at 9:50 PM Paul Gevers <[email protected]> wrote:
> On 16-06-2019 11:20, László Böszörményi (GCS) wrote:
> > The debdiff is larger for the following changes. The backported
> > security fixes are no longer under debian/patches but inline. The ABI
> > break, called the 'ICU-20250' issue upstream is reversed with a patch.
> > Then the s/63.1/63.2/ changes, etc.
>
> Can you please provide a diff between the patches-applied tree of the
> current buster version and a patches-applied tree of the current sid
> version?
 Of course, attached. The diff size went down from 165 kB to 39 kB as
you see, even if the documentation and s/63.1/63.2/ changes are still
in as well.


Regards,
Laszlo/GCS

diff -Nur icu-63.1/readme.html icu-63.2/readme.html
--- icu-63.1/readme.html	2018-10-15 18:02:37.000000000 +0000
+++ icu-63.2/readme.html	2019-04-11 22:38:30.000000000 +0000
@@ -3,7 +3,7 @@
 
 <html lang="en-US" xmlns="http://www.w3.org/1999/xhtml"; xml:lang="en-US">
   <head>
-    <title>ReadMe for ICU 63.1</title>
+    <title>ReadMe for ICU 63.2</title>
     <meta name="COPYRIGHT" content=
     "Copyright (C) 2016 and later: Unicode, Inc. and others. License &amp; terms of use: http://www.unicode.org/copyright.html"/>
     <!-- meta name="COPYRIGHT" content=
@@ -32,7 +32,7 @@
       International Components for Unicode<br/>
       <span class="only-rc">Release Candidate</span>
       <span class="only-milestone">(Milestone Release)</span>
-      <abbr title="International Components for Unicode">ICU</abbr> 63.1 ReadMe
+      <abbr title="International Components for Unicode">ICU</abbr> 63.2 ReadMe
     </h1>
 
     <!-- Shouldn't need to comment/uncomment this paragraph, just change the body class -->
diff -Nur icu-63.1/source/common/umutablecptrie.cpp icu-63.2/source/common/umutablecptrie.cpp
--- icu-63.1/source/common/umutablecptrie.cpp	2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/common/umutablecptrie.cpp	2019-06-16 20:23:58.000000000 +0000
@@ -60,6 +60,7 @@
 constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
 
 class AllSameBlocks;
+class MixedBlocks;
 
 class MutableCodePointTrie : public UMemory {
 public:
@@ -92,8 +93,10 @@
     void maskValues(uint32_t mask);
     UChar32 findHighStart() const;
     int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
-    int32_t compactData(int32_t fastILimit, uint32_t *newData, int32_t dataNullIndex);
-    int32_t compactIndex(int32_t fastILimit, UErrorCode &errorCode);
+    int32_t compactData(
+            int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
+            int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
+    int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
     int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
 
     uint32_t *index = nullptr;
@@ -548,28 +551,8 @@
     }
 }
 
-inline bool
-equalBlocks(const uint32_t *s, const uint32_t *t, int32_t length) {
-    while (length > 0 && *s == *t) {
-        ++s;
-        ++t;
-        --length;
-    }
-    return length == 0;
-}
-
-inline bool
-equalBlocks(const uint16_t *s, const uint32_t *t, int32_t length) {
-    while (length > 0 && *s == *t) {
-        ++s;
-        ++t;
-        --length;
-    }
-    return length == 0;
-}
-
-inline bool
-equalBlocks(const uint16_t *s, const uint16_t *t, int32_t length) {
+template<typename UIntA, typename UIntB>
+bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
     while (length > 0 && *s == *t) {
         ++s;
         ++t;
@@ -585,36 +568,6 @@
 }
 
 /** Search for an identical block. */
-int32_t findSameBlock(const uint32_t *p, int32_t pStart, int32_t length,
-                      const uint32_t *q, int32_t qStart, int32_t blockLength) {
-    // Ensure that we do not even partially get past length.
-    length -= blockLength;
-
-    q += qStart;
-    while (pStart <= length) {
-        if (equalBlocks(p + pStart, q, blockLength)) {
-            return pStart;
-        }
-        ++pStart;
-    }
-    return -1;
-}
-
-int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
-                      const uint32_t *q, int32_t qStart, int32_t blockLength) {
-    // Ensure that we do not even partially get past length.
-    length -= blockLength;
-
-    q += qStart;
-    while (pStart <= length) {
-        if (equalBlocks(p + pStart, q, blockLength)) {
-            return pStart;
-        }
-        ++pStart;
-    }
-    return -1;
-}
-
 int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
                       const uint16_t *q, int32_t qStart, int32_t blockLength) {
     // Ensure that we do not even partially get past length.
@@ -655,30 +608,9 @@
  * Look for maximum overlap of the beginning of the other block
  * with the previous, adjacent block.
  */
-int32_t getOverlap(const uint32_t *p, int32_t length,
-                   const uint32_t *q, int32_t qStart, int32_t blockLength) {
-    int32_t overlap = blockLength - 1;
-    U_ASSERT(overlap <= length);
-    q += qStart;
-    while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
-        --overlap;
-    }
-    return overlap;
-}
-
-int32_t getOverlap(const uint16_t *p, int32_t length,
-                   const uint32_t *q, int32_t qStart, int32_t blockLength) {
-    int32_t overlap = blockLength - 1;
-    U_ASSERT(overlap <= length);
-    q += qStart;
-    while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
-        --overlap;
-    }
-    return overlap;
-}
-
-int32_t getOverlap(const uint16_t *p, int32_t length,
-                   const uint16_t *q, int32_t qStart, int32_t blockLength) {
+template<typename UIntA, typename UIntB>
+int32_t getOverlap(const UIntA *p, int32_t length,
+                   const UIntB *q, int32_t qStart, int32_t blockLength) {
     int32_t overlap = blockLength - 1;
     U_ASSERT(overlap <= length);
     q += qStart;
@@ -807,6 +739,171 @@
     int32_t refCounts[CAPACITY];
 };
 
+// Custom hash table for mixed-value blocks to be found anywhere in the
+// compacted data or index so far.
+class MixedBlocks {
+public:
+    MixedBlocks() {}
+    ~MixedBlocks() {
+        uprv_free(table);
+    }
+
+    bool init(int32_t maxLength, int32_t newBlockLength) {
+        // We store actual data indexes + 1 to reserve 0 for empty entries.
+        int32_t maxDataIndex = maxLength - newBlockLength + 1;
+        int32_t newLength;
+        if (maxDataIndex <= 0xfff) {  // 4k
+            newLength = 6007;
+            shift = 12;
+            mask = 0xfff;
+        } else if (maxDataIndex <= 0x7fff) {  // 32k
+            newLength = 50021;
+            shift = 15;
+            mask = 0x7fff;
+        } else if (maxDataIndex <= 0x1ffff) {  // 128k
+            newLength = 200003;
+            shift = 17;
+            mask = 0x1ffff;
+        } else {
+            // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
+            newLength = 1500007;
+            shift = 21;
+            mask = 0x1fffff;
+        }
+        if (newLength > capacity) {
+            uprv_free(table);
+            table = (uint32_t *)uprv_malloc(newLength * 4);
+            if (table == nullptr) {
+                return false;
+            }
+            capacity = newLength;
+        }
+        length = newLength;
+        uprv_memset(table, 0, length * 4);
+
+        blockLength = newBlockLength;
+        return true;
+    }
+
+    template<typename UInt>
+    void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
+        int32_t start = prevDataLength - blockLength;
+        if (start >= minStart) {
+            ++start;  // Skip the last block that we added last time.
+        } else {
+            start = minStart;  // Begin with the first full block.
+        }
+        for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
+            uint32_t hashCode = makeHashCode(data, start);
+            addEntry(data, start, hashCode, start);
+        }
+    }
+
+    template<typename UIntA, typename UIntB>
+    int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
+        uint32_t hashCode = makeHashCode(blockData, blockStart);
+        int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
+        if (entryIndex >= 0) {
+            return (table[entryIndex] & mask) - 1;
+        } else {
+            return -1;
+        }
+    }
+
+    int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const {
+        uint32_t hashCode = makeHashCode(blockValue);
+        int32_t entryIndex = findEntry(data, blockValue, hashCode);
+        if (entryIndex >= 0) {
+            return (table[entryIndex] & mask) - 1;
+        } else {
+            return -1;
+        }
+    }
+
+private:
+    template<typename UInt>
+    uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
+        int32_t blockLimit = blockStart + blockLength;
+        uint32_t hashCode = blockData[blockStart++];
+        do {
+            hashCode = 37 * hashCode + blockData[blockStart++];
+        } while (blockStart < blockLimit);
+        return hashCode;
+    }
+
+    uint32_t makeHashCode(uint32_t blockValue) const {
+        uint32_t hashCode = blockValue;
+        for (int32_t i = 1; i < blockLength; ++i) {
+            hashCode = 37 * hashCode + blockValue;
+        }
+        return hashCode;
+    }
+
+    template<typename UInt>
+    void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
+        U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
+        int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
+        if (entryIndex < 0) {
+            table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
+        }
+    }
+
+    template<typename UIntA, typename UIntB>
+    int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,
+                      uint32_t hashCode) const {
+        uint32_t shiftedHashCode = hashCode << shift;
+        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
+        for (int32_t entryIndex = initialEntryIndex;;) {
+            uint32_t entry = table[entryIndex];
+            if (entry == 0) {
+                return ~entryIndex;
+            }
+            if ((entry & ~mask) == shiftedHashCode) {
+                int32_t dataIndex = (entry & mask) - 1;
+                if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
+                    return entryIndex;
+                }
+            }
+            entryIndex = nextIndex(initialEntryIndex, entryIndex);
+        }
+    }
+
+    int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const {
+        uint32_t shiftedHashCode = hashCode << shift;
+        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
+        for (int32_t entryIndex = initialEntryIndex;;) {
+            uint32_t entry = table[entryIndex];
+            if (entry == 0) {
+                return ~entryIndex;
+            }
+            if ((entry & ~mask) == shiftedHashCode) {
+                int32_t dataIndex = (entry & mask) - 1;
+                if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) {
+                    return entryIndex;
+                }
+            }
+            entryIndex = nextIndex(initialEntryIndex, entryIndex);
+        }
+    }
+
+    inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const {
+        // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
+        return (entryIndex + initialEntryIndex) % length;
+    }
+
+    // Hash table.
+    // The length is a prime number, larger than the maximum data length.
+    // The "shift" lower bits store a data index + 1.
+    // The remaining upper bits store a partial hashCode of the block data values.
+    uint32_t *table = nullptr;
+    int32_t capacity = 0;
+    int32_t length = 0;
+    int32_t shift = 0;
+    uint32_t mask = 0;
+
+    int32_t blockLength = 0;
+};
+
 int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
 #ifdef UCPTRIE_DEBUG
     bool overflow = false;
@@ -962,8 +1059,9 @@
  *
  * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
  */
-int32_t MutableCodePointTrie::compactData(int32_t fastILimit,
-                                          uint32_t *newData, int32_t dataNullIndex) {
+int32_t MutableCodePointTrie::compactData(
+        int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
+        int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) {
 #ifdef UCPTRIE_DEBUG
     int32_t countSame=0, sumOverlaps=0;
     bool printData = dataLength == 29088 /* line.brk */ ||
@@ -983,8 +1081,14 @@
 #endif
     }
 
-    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
     int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
+    if (!mixedBlocks.init(newDataCapacity, blockLength)) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+    mixedBlocks.extend(newData, 0, 0, newDataLength);
+
+    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
     int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
     int32_t fastLength = 0;
     for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
@@ -992,12 +1096,17 @@
             blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
             inc = 1;
             fastLength = newDataLength;
+            if (!mixedBlocks.init(newDataCapacity, blockLength)) {
+                errorCode = U_MEMORY_ALLOCATION_ERROR;
+                return 0;
+            }
+            mixedBlocks.extend(newData, 0, 0, newDataLength);
         }
         if (flags[i] == ALL_SAME) {
             uint32_t value = index[i];
-            int32_t n;
             // Find an earlier part of the data array of length blockLength
             // that is filled with this value.
+            int32_t n = mixedBlocks.findAllSameBlock(newData, value);
             // If we find a match, and the current block is the data null block,
             // and it is not a fast block but matches the start of a fast block,
             // then we need to continue looking.
@@ -1005,12 +1114,10 @@
             // and not all of the rest of the fast block is filled with this value.
             // Otherwise trie.getRange() would detect that the fast block starts at
             // dataNullOffset and assume incorrectly that it is filled with the null value.
-            for (int32_t start = 0;
-                    (n = findAllSameBlock(newData, start, newDataLength,
-                                value, blockLength)) >= 0 &&
-                            i == dataNullIndex && i >= fastILimit && n < fastLength &&
-                            isStartOfSomeFastBlock(n, index, fastILimit);
-                    start = n + 1) {}
+            while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
+                    isStartOfSomeFastBlock(n, index, fastILimit)) {
+                n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
+            }
             if (n >= 0) {
                 DEBUG_DO(++countSame);
                 index[i] = n;
@@ -1023,14 +1130,16 @@
                 }
 #endif
                 index[i] = newDataLength - n;
+                int32_t prevDataLength = newDataLength;
                 while (n < blockLength) {
                     newData[newDataLength++] = value;
                     ++n;
                 }
+                mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
             }
         } else if (flags[i] == MIXED) {
             const uint32_t *block = data + index[i];
-            int32_t n = findSameBlock(newData, 0, newDataLength, block, 0, blockLength);
+            int32_t n = mixedBlocks.findBlock(newData, block, 0);
             if (n >= 0) {
                 DEBUG_DO(++countSame);
                 index[i] = n;
@@ -1043,9 +1152,11 @@
                 }
 #endif
                 index[i] = newDataLength - n;
+                int32_t prevDataLength = newDataLength;
                 while (n < blockLength) {
                     newData[newDataLength++] = block[n++];
                 }
+                mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
             }
         } else /* SAME_AS */ {
             uint32_t j = index[i];
@@ -1061,7 +1172,8 @@
     return newDataLength;
 }
 
-int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, UErrorCode &errorCode) {
+int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
+                                           UErrorCode &errorCode) {
     int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
     if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
         // Only the linear fast index, no multi-stage index tables.
@@ -1095,6 +1207,12 @@
         }
     }
 
+    if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+    mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);
+
     // Examine index-3 blocks. For each determine one of:
     // - same as the index-3 null block
     // - same as a fast-index block
@@ -1105,6 +1223,7 @@
     // Also determine an upper limit for the index-3 table length.
     int32_t index3Capacity = 0;
     i3FirstNull = index3NullOffset;
+    bool hasLongI3Blocks = false;
     // If the fast index covers the whole BMP, then
     // the multi-stage index is only for supplementary code points.
     // Otherwise, the multi-stage index covers all of Unicode.
@@ -1129,13 +1248,13 @@
                     index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
                 } else {
                     index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
+                    hasLongI3Blocks = true;
                 }
                 i3FirstNull = 0;
             }
         } else {
             if (oredI3 <= 0xffff) {
-                int32_t n = findSameBlock(fastIndex, 0, fastIndexLength,
-                                          index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
+                int32_t n = mixedBlocks.findBlock(fastIndex, index, i);
                 if (n >= 0) {
                     flags[i] = I3_BMP;
                     index[i] = n;
@@ -1146,6 +1265,7 @@
             } else {
                 flags[i] = I3_18;
                 index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
+                hasLongI3Blocks = true;
             }
         }
         i = j;
@@ -1166,6 +1286,18 @@
     }
     uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
 
+    if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
+        errorCode = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+    MixedBlocks longI3Blocks;
+    if (hasLongI3Blocks) {
+        if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
+            errorCode = U_MEMORY_ALLOCATION_ERROR;
+            return 0;
+        }
+    }
+
     // Compact the index-3 table and write an uncompacted version of the index-2 table.
     uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2];  // index2Capacity
     int32_t i2Length = 0;
@@ -1185,8 +1317,7 @@
         } else if (f == I3_BMP) {
             i3 = index[i];
         } else if (f == I3_16) {
-            int32_t n = findSameBlock(index16, index3Start, indexLength,
-                                      index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
+            int32_t n = mixedBlocks.findBlock(index16, index, i);
             if (n >= 0) {
                 i3 = n;
             } else {
@@ -1198,12 +1329,18 @@
                                    index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
                 }
                 i3 = indexLength - n;
+                int32_t prevIndexLength = indexLength;
                 while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
                     index16[indexLength++] = index[i + n++];
                 }
+                mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
+                if (hasLongI3Blocks) {
+                    longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
+                }
             }
         } else {
             U_ASSERT(f == I3_18);
+            U_ASSERT(hasLongI3Blocks);
             // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
             int32_t j = i;
             int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
@@ -1236,8 +1373,7 @@
                 index16[k++] = v;
                 index16[k - 9] = upperBits;
             } while (j < jLimit);
-            int32_t n = findSameBlock(index16, index3Start, indexLength,
-                                      index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
+            int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);
             if (n >= 0) {
                 i3 = n | 0x8000;
             } else {
@@ -1249,6 +1385,7 @@
                                    index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
                 }
                 i3 = (indexLength - n) | 0x8000;
+                int32_t prevIndexLength = indexLength;
                 if (n > 0) {
                     int32_t start = indexLength;
                     while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
@@ -1257,6 +1394,10 @@
                 } else {
                     indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
                 }
+                mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
+                if (hasLongI3Blocks) {
+                    longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
+                }
             }
         }
         if (index3NullOffset < 0 && i3FirstNull >= 0) {
@@ -1279,16 +1420,23 @@
     }
 
     // Compact the index-2 table and write the index-1 table.
+    static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,
+                  "must re-init mixedBlocks");
     int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
     int32_t i1 = fastIndexLength;
     for (int32_t i = 0; i < i2Length; i += blockLength) {
-        if ((i2Length - i) < blockLength) {
+        int32_t n;
+        if ((i2Length - i) >= blockLength) {
+            // normal block
+            U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
+            n = mixedBlocks.findBlock(index16, index2, i);
+        } else {
             // highStart is inside the last index-2 block. Shorten it.
             blockLength = i2Length - i;
+            n = findSameBlock(index16, index3Start, indexLength,
+                              index2, i, blockLength);
         }
         int32_t i2;
-        int32_t n = findSameBlock(index16, index3Start, indexLength,
-                                  index2, i, blockLength);
         if (n >= 0) {
             i2 = n;
         } else {
@@ -1299,9 +1447,11 @@
                 n = getOverlap(index16, indexLength, index2, i, blockLength);
             }
             i2 = indexLength - n;
+            int32_t prevIndexLength = indexLength;
             while (n < blockLength) {
                 index16[indexLength++] = index2[i + n++];
             }
+            mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
         }
         // Set the index-1 table entry.
         index16[i1++] = i2;
@@ -1369,7 +1519,11 @@
     uprv_memcpy(newData, asciiData, sizeof(asciiData));
 
     int32_t dataNullIndex = allSameBlocks.findMostUsed();
-    int32_t newDataLength = compactData(fastILimit, newData, dataNullIndex);
+
+    MixedBlocks mixedBlocks;
+    int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
+                                        dataNullIndex, mixedBlocks, errorCode);
+    if (U_FAILURE(errorCode)) { return 0; }
     U_ASSERT(newDataLength <= newDataCapacity);
     uprv_free(data);
     data = newData;
@@ -1394,7 +1548,7 @@
         dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
     }
 
-    int32_t indexLength = compactIndex(fastILimit, errorCode);
+    int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);
     highStart = realHighStart;
     return indexLength;
 }
diff -Nur icu-63.1/source/common/umutex.h icu-63.2/source/common/umutex.h
--- icu-63.1/source/common/umutex.h	2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/common/umutex.h	2019-04-11 22:38:30.000000000 +0000
@@ -54,15 +54,23 @@
 
 #include <atomic>
 
-U_NAMESPACE_BEGIN
-
 // Export an explicit template instantiation of std::atomic<int32_t>. 
 // When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class.
 // See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.
-#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
+#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
+  #if defined(__clang__)
+  // Suppress the warning that the explicit instantiation after explicit specialization has no effect.
+  #pragma clang diagnostic push
+  #pragma clang diagnostic ignored "-Winstantiation-after-specialization"
+  #endif
 template struct U_COMMON_API std::atomic<int32_t>;
+  #if defined(__clang__)
+  #pragma clang diagnostic pop
+  #endif
 #endif
 
+U_NAMESPACE_BEGIN
+
 typedef std::atomic<int32_t> u_atomic_int32_t;
 #define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val)
 
diff -Nur icu-63.1/source/common/unicode/uvernum.h icu-63.2/source/common/unicode/uvernum.h
--- icu-63.1/source/common/unicode/uvernum.h	2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/common/unicode/uvernum.h	2019-04-11 22:38:30.000000000 +0000
@@ -66,7 +66,7 @@
  *  This value will change in the subsequent releases of ICU
  *  @stable ICU 2.6
  */
-#define U_ICU_VERSION_MINOR_NUM 1
+#define U_ICU_VERSION_MINOR_NUM 2
 
 /** The current ICU patchlevel version as an integer.
  *  This value will change in the subsequent releases of ICU
@@ -121,7 +121,7 @@
  *  This value will change in the subsequent releases of ICU
  *  @stable ICU 2.4
  */
-#define U_ICU_VERSION "63.1"
+#define U_ICU_VERSION "63.2"
 
 /**
  * The current ICU library major version number as a string, for library name suffixes.
@@ -140,7 +140,7 @@
 /** Data version in ICU4C.
  * @internal ICU 4.4 Internal Use Only
  **/
-#define U_ICU_DATA_VERSION "63.1"
+#define U_ICU_DATA_VERSION "63.2"
 #endif  /* U_HIDE_INTERNAL_API */
 
 /*===========================================================================
diff -Nur icu-63.1/source/configure icu-63.2/source/configure
--- icu-63.1/source/configure	2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/configure	2019-04-11 22:38:30.000000000 +0000
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for ICU 63.1.
+# Generated by GNU Autoconf 2.69 for ICU 63.2.
 #
 # Report bugs to <http://icu-project.org/bugs>.
 #
@@ -582,8 +582,8 @@
 # Identity of this package.
 PACKAGE_NAME='ICU'
 PACKAGE_TARNAME='International Components for Unicode'
-PACKAGE_VERSION='63.1'
-PACKAGE_STRING='ICU 63.1'
+PACKAGE_VERSION='63.2'
+PACKAGE_STRING='ICU 63.2'
 PACKAGE_BUGREPORT='http://icu-project.org/bugs'
 PACKAGE_URL='http://icu-project.org'
 
@@ -1370,7 +1370,7 @@
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures ICU 63.1 to adapt to many kinds of systems.
+\`configure' configures ICU 63.2 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1437,7 +1437,7 @@
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of ICU 63.1:";;
+     short | recursive ) echo "Configuration of ICU 63.2:";;
    esac
   cat <<\_ACEOF
 
@@ -1574,7 +1574,7 @@
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-ICU configure 63.1
+ICU configure 63.2
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2266,7 +2266,7 @@
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by ICU $as_me 63.1, which was
+It was created by ICU $as_me 63.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -8434,7 +8434,7 @@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ICU $as_me 63.1, which was
+This file was extended by ICU $as_me 63.2, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -8488,7 +8488,7 @@
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-ICU config.status 63.1
+ICU config.status 63.2
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
Binary files icu-63.1/source/data/in/icudt63l.dat and icu-63.2/source/data/in/icudt63l.dat differ
diff -Nur icu-63.1/source/i18n/japancal.cpp icu-63.2/source/i18n/japancal.cpp
--- icu-63.1/source/i18n/japancal.cpp	2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/i18n/japancal.cpp	2019-04-11 22:38:30.000000000 +0000
@@ -18,6 +18,16 @@
 #if !UCONFIG_NO_FORMATTING
 #if U_PLATFORM_HAS_WINUWP_API == 0
 #include <stdlib.h> // getenv() is not available in UWP env
+#else
+#ifndef WIN32_LEAN_AND_MEAN
+#   define WIN32_LEAN_AND_MEAN
+#endif
+#   define VC_EXTRALEAN
+#   define NOUSER
+#   define NOSERVICE
+#   define NOIME
+#   define NOMCX
+#include <windows.h>
 #endif
 #include "cmemory.h"
 #include "erarules.h"
diff -Nur icu-63.1/source/i18n/unicode/numberrangeformatter.h icu-63.2/source/i18n/unicode/numberrangeformatter.h
--- icu-63.1/source/i18n/unicode/numberrangeformatter.h	2018-10-15 18:02:37.000000000 +0000
+++ icu-63.2/source/i18n/unicode/numberrangeformatter.h	2019-04-11 22:38:30.000000000 +0000
@@ -185,8 +185,14 @@
  * Export an explicit template instantiation. See datefmt.h
  * (When building DLLs for Windows this is required.)
  */
-#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN)
-template struct U_I18N_API std::atomic<impl::NumberRangeFormatterImpl*>;
+#if U_PLATFORM == U_PF_WINDOWS && !defined(U_IN_DOXYGEN)
+} // namespace icu::number
+U_NAMESPACE_END
+
+template struct U_I18N_API std::atomic< U_NAMESPACE_QUALIFIER number::impl::NumberRangeFormatterImpl*>;
+
+U_NAMESPACE_BEGIN
+namespace number {  // icu::number
 #endif
 /** \endcond */
 
diff -Nur icu-63.1/source/test/intltest/incaltst.cpp icu-63.2/source/test/intltest/incaltst.cpp
--- icu-63.1/source/test/intltest/incaltst.cpp	2018-09-29 00:34:42.000000000 +0000
+++ icu-63.2/source/test/intltest/incaltst.cpp	2019-04-11 22:38:30.000000000 +0000
@@ -77,6 +77,7 @@
     CASE(7,TestPersian);
     CASE(8,TestPersianFormat);
     CASE(9,TestTaiwan);
+    CASE(10,TestJapaneseHeiseiToReiwa);
     default: name = ""; break;
     }
 }
@@ -626,23 +627,23 @@
         // Test simple parse/format with adopt
         UDate aDate = 0; 
         
-        // Test parse with missing era (should default to current era, heisei)
+        // Test parse with missing era (should default to current era)
         // Test parse with incomplete information
         logln("Testing parse w/ missing era...");
-        SimpleDateFormat *fmt = new SimpleDateFormat(UnicodeString("y.M.d"), Locale("ja_JP@calendar=japanese"), status);
+        SimpleDateFormat *fmt = new SimpleDateFormat(UnicodeString("y/M/d"), Locale("ja_JP@calendar=japanese"), status);
         CHECK(status, "creating date format instance");
         if(!fmt) { 
             errln("Couldn't create en_US instance");
         } else {
             UErrorCode s2 = U_ZERO_ERROR;
             cal2->clear();
-            UnicodeString samplestr("1.1.9");
+            UnicodeString samplestr("1/5/9");
             logln(UnicodeString() + "Test Year: " + samplestr);
             aDate = fmt->parse(samplestr, s2);
             ParsePosition pp=0;
             fmt->parse(samplestr, *cal2, pp);
-            CHECK(s2, "parsing the 1.1.9 string");
-            logln("*cal2 after 119 parse:");
+            CHECK(s2, "parsing the 1/5/9 string");
+            logln("*cal2 after 159 parse:");
             str.remove();
             fmt2->format(aDate, str);
             logln(UnicodeString() + "as Gregorian Calendar: " + str);
@@ -653,7 +654,7 @@
             int32_t expectYear = 1;
             int32_t expectEra = JapaneseCalendar::getCurrentEra();
             if((gotYear!=1) || (gotEra != expectEra)) {
-                errln(UnicodeString("parse "+samplestr+" of 'y.m.d' as Japanese Calendar, expected year ") + expectYear + 
+                errln(UnicodeString("parse "+samplestr+" of 'y/m/d' as Japanese Calendar, expected year ") + expectYear + 
                     UnicodeString(" and era ") + expectEra +", but got year " + gotYear + " and era " + gotEra + " (Gregorian:" + str +")");
             } else {            
                 logln(UnicodeString() + " year: " + gotYear + ", era: " + gotEra);
@@ -666,7 +667,7 @@
         // Test simple parse/format with adopt
         UDate aDate = 0; 
         
-        // Test parse with missing era (should default to current era, heisei)
+        // Test parse with missing era (should default to current era)
         // Test parse with incomplete information
         logln("Testing parse w/ just year...");
         SimpleDateFormat *fmt = new SimpleDateFormat(UnicodeString("y"), Locale("ja_JP@calendar=japanese"), status);
@@ -678,7 +679,7 @@
             cal2->clear();
             UnicodeString samplestr("1");
             logln(UnicodeString() + "Test Year: " + samplestr);
-            aDate = fmt->parse(samplestr, s2);
+            aDate = fmt->parse(samplestr, s2);  // Should be parsed as the first day of the current era
             ParsePosition pp=0;
             fmt->parse(samplestr, *cal2, pp);
             CHECK(s2, "parsing the 1 string");
@@ -691,7 +692,7 @@
             int32_t gotYear = cal2->get(UCAL_YEAR, s2);
             int32_t gotEra = cal2->get(UCAL_ERA, s2);
             int32_t expectYear = 1;
-            int32_t expectEra = 235; //JapaneseCalendar::kCurrentEra;
+            int32_t expectEra = JapaneseCalendar::getCurrentEra();
             if((gotYear!=1) || (gotEra != expectEra)) {
                 errln(UnicodeString("parse "+samplestr+" of 'y' as Japanese Calendar, expected year ") + expectYear + 
                     UnicodeString(" and era ") + expectEra +", but got year " + gotYear + " and era " + gotEra + " (Gregorian:" + str +")");
@@ -707,6 +708,40 @@
     delete fmt2;
 }
 
+void IntlCalendarTest::TestJapaneseHeiseiToReiwa() {
+    Calendar *cal;
+    UErrorCode status = U_ZERO_ERROR;
+    cal = Calendar::createInstance(status);
+    CHECK(status, UnicodeString("Creating default Gregorian Calendar"));
+    cal->set(2019, UCAL_APRIL, 29);
+
+    DateFormat *jfmt = DateFormat::createDateInstance(DateFormat::LONG, "ja@calendar=japanese");
+    CHECK(status, UnicodeString("Creating date format ja@calendar=japanese"))
+
+    const char* EXPECTED_FORMAT[4] = {
+        "\\u5E73\\u621031\\u5E744\\u670829\\u65E5", // Heisei 31 April 29
+        "\\u5E73\\u621031\\u5E744\\u670830\\u65E5", // Heisei 31 April 30
+        "\\u4EE4\\u548c1\\u5E745\\u67081\\u65E5",   // Reiwa 1 May 1
+        "\\u4EE4\\u548c1\\u5E745\\u67082\\u65E5"    // Reiwa 1 May 2
+    };
+
+    for (int32_t i = 0; i < 4; i++) {
+        UnicodeString dateStr;
+        UDate d = cal->getTime(status);
+        CHECK(status, UnicodeString("Get test date"));
+        jfmt->format(d, dateStr);
+        UnicodeString expected(UnicodeString(EXPECTED_FORMAT[i], -1, US_INV).unescape());
+        if (expected.compare(dateStr) != 0) {
+            errln(UnicodeString("Formatting year:") + cal->get(UCAL_YEAR, status) + " month:"
+                + cal->get(UCAL_MONTH, status) + " day:" + (cal->get(UCAL_DATE, status) + 1)
+                + " - expected: " + expected + " / actual: " + dateStr);
+        }
+        cal->add(UCAL_DATE, 1, status);
+        CHECK(status, UnicodeString("Add 1 day"));
+    }
+    delete jfmt;
+    delete cal;
+}
 
 
 
diff -Nur icu-63.1/source/test/intltest/incaltst.h icu-63.2/source/test/intltest/incaltst.h
--- icu-63.1/source/test/intltest/incaltst.h	2018-09-29 00:34:42.000000000 +0000
+++ icu-63.2/source/test/intltest/incaltst.h	2019-04-11 22:38:30.000000000 +0000
@@ -34,6 +34,7 @@
     void TestJapanese(void);
     void TestJapaneseFormat(void);
     void TestJapanese3860(void);
+    void TestJapaneseHeiseiToReiwa(void);
     
     void TestPersian(void);
     void TestPersianFormat(void);
diff -Nur icu-63.1/source/test/intltest/numbertest.h icu-63.2/source/test/intltest/numbertest.h
--- icu-63.1/source/test/intltest/numbertest.h	2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/test/intltest/numbertest.h	2019-04-11 22:38:30.000000000 +0000
@@ -10,6 +10,7 @@
 #include "intltest.h"
 #include "number_affixutils.h"
 #include "numparse_stringsegment.h"
+#include "numrange_impl.h"
 #include "unicode/locid.h"
 #include "unicode/numberformatter.h"
 #include "unicode/numberrangeformatter.h"
diff -Nur icu-63.1/source/test/testdata/format.txt icu-63.2/source/test/testdata/format.txt
--- icu-63.1/source/test/testdata/format.txt	2018-10-01 22:39:56.000000000 +0000
+++ icu-63.2/source/test/testdata/format.txt	2019-04-11 22:38:30.000000000 +0000
@@ -488,42 +488,44 @@
                     "AD 02008"
                },
 
-				// Japanese
-               {
-                    "en_US@calendar=japanese",         
-                    "",
-                    "PATTERN=G y",
-                    "YEAR=8",
-                    "Heisei 8"
-               },
-               {
-                    "en_US@calendar=japanese",         
-                    "",
-                    "PATTERN=G yy",
-                    "YEAR=8",
-                    "Heisei 08"
-               },
-               {
-                    "en_US@calendar=japanese",         
-                    "",
-                    "PATTERN=G yyy",
-                    "YEAR=8",
-                    "Heisei 008"
-               },
-               {
-                    "en_US@calendar=japanese",         
-                    "",
-                    "PATTERN=G yyyy",
-                    "YEAR=8",
-                    "Heisei 0008"
-               },
-               {
-                    "en_US@calendar=japanese",         
-                    "",
-                    "PATTERN=G yyyyy",
-                    "YEAR=8",
-                    "Heisei 00008"
-               },
+// The following test case is commented out as the current era
+// depends on the current time when the test is run.
+//				// Japanese
+//               {
+//                    "en_US@calendar=japanese",         
+//                    "",
+//                    "PATTERN=G y",
+//                    "YEAR=8",
+//                    "Reiwa 8"
+//               },
+//               {
+//                    "en_US@calendar=japanese",         
+//                    "",
+//                    "PATTERN=G yy",
+//                    "YEAR=8",
+//                    "Reiwa 08"
+//               },
+//               {
+//                    "en_US@calendar=japanese",         
+//                    "",
+//                    "PATTERN=G yyy",
+//                    "YEAR=8",
+//                    "Reiwa 008"
+//               },
+//               {
+//                    "en_US@calendar=japanese",         
+//                    "",
+//                    "PATTERN=G yyyy",
+//                    "YEAR=8",
+//                    "Reiwa 0008"
+//               },
+//               {
+//                    "en_US@calendar=japanese",         
+//                    "",
+//                    "PATTERN=G yyyyy",
+//                    "YEAR=8",
+//                    "Reiwa 00008"
+//               },
 
             }
         }

Bug#928111: [pre-approval] unblock: icu/63.2-1

Reply via email to