This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch branch-1.8
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.8 by this push:
     new 48b26d754 ORC-1315: [C++] Fix byte to integer conversions fail on 
platforms with unsigned char type
48b26d754 is described below

commit 48b26d7543533d89d2c60996c448812215a0d242
Author: Gang Wu <[email protected]>
AuthorDate: Thu Aug 31 16:09:59 2023 +0800

    ORC-1315: [C++] Fix byte to integer conversions fail on platforms with 
unsigned char type
    
    ### What changes were proposed in this pull request?
    
    This patch adds static_cast for the char type cast in RLE/ColumnReader.
    
    ### Why are the changes needed?
    
    For the context of 
[ORC-1315](https://issues.apache.org/jira/browse/ORC-1315), this patch fixes 
signed char to unsigned char conversions fail where char is by default unsigned.
    
    ### How was this patch tested?
    
    No new tests were added.
    
    Closes #1607 from wgtmac/branch-1.8.
    
    Authored-by: Gang Wu <[email protected]>
    Signed-off-by: Gang Wu <[email protected]>
---
 c++/src/ByteRLE.cc           |  6 +++---
 c++/src/ColumnReader.cc      |  2 +-
 c++/src/RLEv1.cc             |  2 +-
 c++/test/TestByteRle.cc      | 31 +++++++++++++++----------------
 c++/test/TestColumnReader.cc |  4 ++--
 tools/test/TestMatch.cc      |  2 +-
 6 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/c++/src/ByteRLE.cc b/c++/src/ByteRLE.cc
index 557fc8fd7..cdaed5586 100644
--- a/c++/src/ByteRLE.cc
+++ b/c++/src/ByteRLE.cc
@@ -366,7 +366,7 @@ namespace orc {
     if (bufferStart == bufferEnd) {
       nextBuffer();
     }
-    return *(bufferStart++);
+    return static_cast<signed char>(*(bufferStart++));
   }
 
   void ByteRleDecoderImpl::readHeader() {
@@ -377,7 +377,7 @@ namespace orc {
     } else {
       remainingValues = static_cast<size_t>(ch) + MINIMUM_REPEAT;
       repeating = true;
-      value = readByte();
+      value = static_cast<char>(readByte());
     }
   }
 
@@ -466,7 +466,7 @@ namespace orc {
         if (notNull) {
           for(uint64_t i=0; i < count; ++i) {
             if (notNull[position + i]) {
-              data[position + i] = readByte();
+              data[position + i] = static_cast<char>(readByte());
               consumed += 1;
             }
           }
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index f4a4df924..f34185982 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -131,7 +131,7 @@ namespace orc {
    */
   void expandBytesToLongs(int64_t* buffer, uint64_t numValues) {
     for(size_t i=numValues - 1; i < numValues; --i) {
-      buffer[i] = reinterpret_cast<char *>(buffer)[i];
+      buffer[i] = reinterpret_cast<signed char *>(buffer)[i];
     }
   }
 
diff --git a/c++/src/RLEv1.cc b/c++/src/RLEv1.cc
index d80a81a56..e82b7b07e 100644
--- a/c++/src/RLEv1.cc
+++ b/c++/src/RLEv1.cc
@@ -147,7 +147,7 @@ signed char RleDecoderV1::readByte() {
     bufferStart = static_cast<const char*>(bufferPointer);
     bufferEnd = bufferStart + bufferLength;
   }
-  return *(bufferStart++);
+  return static_cast<signed char>(*(bufferStart++));
 }
 
 uint64_t RleDecoderV1::readLong() {
diff --git a/c++/test/TestByteRle.cc b/c++/test/TestByteRle.cc
index bdcc13c9e..8191c7a49 100644
--- a/c++/test/TestByteRle.cc
+++ b/c++/test/TestByteRle.cc
@@ -50,8 +50,8 @@ TEST(ByteRle, nullTest) {
   char buffer[258];
   char notNull[266];
   char result[266];
-  buffer[0] = -128;
-  buffer[129] = -128;
+  buffer[0] = static_cast<char>(-128);
+  buffer[129] = static_cast<char>(-128);
   for(int i=0; i < 128; ++i) {
     buffer[1 + i] = static_cast<char>(i);
     buffer[130 + i] = static_cast<char>(128 + i);
@@ -186,7 +186,7 @@ TEST(ByteRle, testNulls) {
       createByteRleDecoder(
         std::unique_ptr<orc::SeekableInputStream>
        (new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer), 3)));
-  std::vector<char> data(16, -1);
+  std::vector<char> data(16, static_cast<char>(-1));
   std::vector<char> notNull(data.size());
   for (size_t i = 0; i < data.size(); ++i) {
     notNull[i] = (i + 1) % 2;
@@ -198,17 +198,16 @@ TEST(ByteRle, testNulls) {
         EXPECT_EQ((i*data.size() + j)/2, data[j]) << "Output wrong at "
                                                   << (i * data.size() + j);
       } else {
-        EXPECT_EQ(-1, data[j]) << "Output wrong at "
-                               << (i * data.size() + j);
+        EXPECT_EQ(static_cast<char>(-1), data[j]) << "Output wrong at "
+                                                  << (i * data.size() + j);
       }
     }
   }
   for (size_t i = 0; i < 8; ++i) {
     rle->next(data.data(), data.size(), notNull.data());
     for (size_t j = 0; j < data.size(); ++j) {
-      EXPECT_EQ(j % 2 == 0 ? -36 : -1,
-                data[j])
-          << "Output wrong at " << (i * data.size() + j + 32);
+      EXPECT_EQ(j % 2 == 0 ? static_cast<char>(-36) : static_cast<char>(-1), 
data[j])
+        << "Output wrong at " << (i * data.size() + j + 32);
     }
   }
 }
@@ -221,26 +220,26 @@ TEST(ByteRle, testAllNulls) {
       createByteRleDecoder(
         std::unique_ptr<orc::SeekableInputStream>
        (new SeekableArrayInputStream(buffer, ARRAY_SIZE(buffer))));
-  std::vector<char> data(16, -1);
+  std::vector<char> data(16, static_cast<char>(-1));
   std::vector<char> allNull(data.size(), 0);
   std::vector<char> noNull(data.size(), 1);
   rle->next(data.data(), data.size(), allNull.data());
   for (size_t i = 0; i < data.size(); ++i) {
-    EXPECT_EQ(-1, data[i]) << "Output wrong at " << i;
+    EXPECT_EQ(static_cast<char>(-1), data[i]) << "Output wrong at " << i;
   }
   rle->next(data.data(), data.size(), noNull.data());
   for (size_t i = 0; i < data.size(); ++i) {
     EXPECT_EQ(i, data[i]) << "Output wrong at " << i;
-    data[i] = -1;
+    data[i] = static_cast<char>(-1);
   }
   rle->next(data.data(), data.size(), allNull.data());
   for (size_t i = 0; i < data.size(); ++i) {
-    EXPECT_EQ(-1, data[i]) << "Output wrong at " << i;
+    EXPECT_EQ(static_cast<char>(-1), data[i]) << "Output wrong at " << i;
   }
   for (size_t i = 0; i < 4; ++i) {
     rle->next(data.data(), data.size(), noNull.data());
     for (size_t j = 0; j < data.size(); ++j) {
-      EXPECT_EQ(-36, data[j]) << "Output wrong at " << i;
+      EXPECT_EQ(static_cast<char>(-36), data[j]) << "Output wrong at " << i;
     }
   }
   rle->next(data.data(), data.size(), allNull.data());
@@ -1107,7 +1106,7 @@ TEST(BooleanRle, skipTestWithNulls) {
   someNull[1] = 1;
   std::vector<char> allNull(data.size(), 0);
   for (size_t i = 0; i < 16384; i += 5) {
-    data.assign(data.size(), -1);
+    data.assign(data.size(), static_cast<char>(-1));
     rle->next(data.data(), data.size(), someNull.data());
     EXPECT_EQ(0, data[0]) << "Output wrong at " << i;
     EXPECT_EQ(0, data[2]) << "Output wrong at " << i;
@@ -1118,7 +1117,7 @@ TEST(BooleanRle, skipTestWithNulls) {
       rle->skip(4);
     }
     rle->skip(0);
-    data.assign(data.size(), -1);
+    data.assign(data.size(), static_cast<char>(-1));
     rle->next(data.data(), data.size(), allNull.data());
     for (size_t j = 0; j < data.size(); ++j) {
       EXPECT_EQ(0, data[j]) << "Output wrong at " << i << ", " << j;
@@ -1390,7 +1389,7 @@ TEST(BooleanRle, seekTestWithNulls) {
     EXPECT_EQ(i < 8192 ? i & 1 : (i / 3) & 1,
               data[i])
         << "Output wrong at " << i;
-    data[0] = -1;
+    data[0] = static_cast<char>(-1);
     rle->next(data.data(), 1, allNull.data());
     EXPECT_EQ(0, data[0]) << "Output wrong at " << i;
   } while (i != 0);
diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc
index bc0ecb8eb..187aecde2 100644
--- a/c++/test/TestColumnReader.cc
+++ b/c++/test/TestColumnReader.cc
@@ -273,7 +273,7 @@ TEST(TestColumnReader, testByteWithNulls) {
       EXPECT_EQ(0, longBatch->notNull[i]) << "Wrong value at " << i;
     } else {
       EXPECT_EQ(1, longBatch->notNull[i]) << "Wrong value at " << i;
-      EXPECT_EQ(static_cast<char>(next++), longBatch->data[i])
+      EXPECT_EQ(static_cast<char>(next++), 
static_cast<char>(longBatch->data[i]))
           << "Wrong value at " << i;
     }
   }
@@ -337,7 +337,7 @@ TEST(TestColumnReader, testByteSkipsWithNulls) {
   ASSERT_EQ(true, !batch.hasNulls);
   ASSERT_EQ(5, longBatch->numElements);
   ASSERT_EQ(true, longBatch->hasNulls);
-  EXPECT_EQ(static_cast<char>(-1), longBatch->data[0]);
+  EXPECT_EQ(static_cast<char>(-1), static_cast<char>(longBatch->data[0]));
   EXPECT_EQ(true, !longBatch->notNull[1]);
   EXPECT_EQ(true, !longBatch->notNull[2]);
   EXPECT_EQ(true, !longBatch->notNull[3]);
diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc
index 9fb28f494..bc7559383 100644
--- a/tools/test/TestMatch.cc
+++ b/tools/test/TestMatch.cc
@@ -150,7 +150,7 @@ namespace orc {
         ASSERT_EQ(true, expected.nextLine(expectedLine));
         line.clear();
         printer->printRow(i);
-        EXPECT_EQ(expectedLine, line)
+        ASSERT_EQ(expectedLine, line)
           << "wrong output at row " << (rowCount + i);
       }
       rowCount += batch->numElements;

Reply via email to