This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 91ec792  ARROW-2411: [C++] Add StringBuilder::Append(const char 
**values)
91ec792 is described below

commit 91ec7928fc112883d4c693059bc7371789a7fe0d
Author: Kouhei Sutou <k...@clear-code.com>
AuthorDate: Tue Apr 10 19:14:38 2018 +0200

    ARROW-2411: [C++] Add StringBuilder::Append(const char **values)
    
    This implementation uses not NULL-terminated C strings and the length of 
values instead of NULL-terminated C strings. Because other builder uses the 
interface such as `PrimitiveBuilder::Append()`.
    
    Author: Kouhei Sutou <k...@clear-code.com>
    Author: Antoine Pitrou <pit...@free.fr>
    
    Closes #1852 from kou/cpp-string-builder-append-cstrings and squashes the 
following commits:
    
    df00afc <Antoine Pitrou> Nit: fix docstring wording
    ed8203a <Kouhei Sutou>  Process NULL char * as a null value
    6d4669b <Kouhei Sutou>  Fix expected value
    d697e49 <Kouhei Sutou>  Format
    001ea51 <Kouhei Sutou>  Process nullptr char * as a NULL value
    0a11f7b <Kouhei Sutou>  Use "nul-terminated char *" instead of "C string"
    f06fd76 <Kouhei Sutou>  Add StringBuilder::Append(const char **values)
---
 cpp/src/arrow/array-test.cc | 66 +++++++++++++++++++++++++++++++++++++++++++++
 cpp/src/arrow/builder.cc    | 58 +++++++++++++++++++++++++++++++++++++++
 cpp/src/arrow/builder.h     | 12 +++++++++
 3 files changed, 136 insertions(+)

diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc
index fb1bebf..2ae9f48 100644
--- a/cpp/src/arrow/array-test.cc
+++ b/cpp/src/arrow/array-test.cc
@@ -1022,6 +1022,72 @@ TEST_F(TestStringBuilder, TestAppendVector) {
   }
 }
 
+TEST_F(TestStringBuilder, TestAppendCStringsWithValidBytes) {
+  const char* strings[] = {nullptr, "aaa", nullptr, "ignored", ""};
+  vector<uint8_t> valid_bytes = {1, 1, 1, 0, 1};
+
+  int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
+  int reps = 1000;
+
+  for (int j = 0; j < reps; ++j) {
+    ASSERT_OK(builder_->Append(strings, N, valid_bytes.data()));
+  }
+  Done();
+
+  ASSERT_EQ(reps * N, result_->length());
+  ASSERT_EQ(reps * 3, result_->null_count());
+  ASSERT_EQ(reps * 3, result_->value_data()->size());
+
+  int32_t length;
+  int32_t pos = 0;
+  for (int i = 0; i < N * reps; ++i) {
+    auto string = strings[i % N];
+    if (string && valid_bytes[i % N]) {
+      ASSERT_FALSE(result_->IsNull(i));
+      result_->GetValue(i, &length);
+      ASSERT_EQ(pos, result_->value_offset(i));
+      ASSERT_EQ(static_cast<int32_t>(strlen(string)), length);
+      ASSERT_EQ(strings[i % N], result_->GetString(i));
+
+      pos += length;
+    } else {
+      ASSERT_TRUE(result_->IsNull(i));
+    }
+  }
+}
+
+TEST_F(TestStringBuilder, TestAppendCStringsWithoutValidBytes) {
+  const char* strings[] = {"", "bb", "a", nullptr, "ccc"};
+
+  int N = static_cast<int>(sizeof(strings) / sizeof(strings[0]));
+  int reps = 1000;
+
+  for (int j = 0; j < reps; ++j) {
+    ASSERT_OK(builder_->Append(strings, N));
+  }
+  Done();
+
+  ASSERT_EQ(reps * N, result_->length());
+  ASSERT_EQ(reps, result_->null_count());
+  ASSERT_EQ(reps * 6, result_->value_data()->size());
+
+  int32_t length;
+  int32_t pos = 0;
+  for (int i = 0; i < N * reps; ++i) {
+    if (strings[i % N]) {
+      ASSERT_FALSE(result_->IsNull(i));
+      result_->GetValue(i, &length);
+      ASSERT_EQ(pos, result_->value_offset(i));
+      ASSERT_EQ(static_cast<int32_t>(strlen(strings[i % N])), length);
+      ASSERT_EQ(strings[i % N], result_->GetString(i));
+
+      pos += length;
+    } else {
+      ASSERT_TRUE(result_->IsNull(i));
+    }
+  }
+}
+
 TEST_F(TestStringBuilder, TestZeroLength) {
   // All buffers are null
   Done();
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index c97253e..78c42f4 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -1413,6 +1413,64 @@ Status StringBuilder::Append(const 
std::vector<std::string>& values,
   return Status::OK();
 }
 
+Status StringBuilder::Append(const char** values, int64_t length,
+                             const uint8_t* valid_bytes) {
+  std::size_t total_length = 0;
+  std::vector<std::size_t> value_lengths(length);
+  bool have_null_value = false;
+  for (int64_t i = 0; i < length; ++i) {
+    if (values[i]) {
+      auto value_length = strlen(values[i]);
+      value_lengths[i] = value_length;
+      total_length += value_length;
+    } else {
+      have_null_value = true;
+    }
+  }
+  RETURN_NOT_OK(Reserve(length));
+  RETURN_NOT_OK(value_data_builder_.Reserve(total_length));
+  RETURN_NOT_OK(offsets_builder_.Reserve(length));
+
+  if (valid_bytes) {
+    int64_t valid_bytes_offset = 0;
+    for (int64_t i = 0; i < length; ++i) {
+      RETURN_NOT_OK(AppendNextOffset());
+      if (valid_bytes[i]) {
+        if (values[i]) {
+          RETURN_NOT_OK(value_data_builder_.Append(
+              reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
+        } else {
+          UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, i - 
valid_bytes_offset);
+          UnsafeAppendToBitmap(false);
+          valid_bytes_offset = i + 1;
+        }
+      }
+    }
+    UnsafeAppendToBitmap(valid_bytes + valid_bytes_offset, length - 
valid_bytes_offset);
+  } else {
+    if (have_null_value) {
+      std::vector<uint8_t> valid_vector(length, 0);
+      for (int64_t i = 0; i < length; ++i) {
+        RETURN_NOT_OK(AppendNextOffset());
+        if (values[i]) {
+          RETURN_NOT_OK(value_data_builder_.Append(
+              reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
+          valid_vector[i] = 1;
+        }
+      }
+      UnsafeAppendToBitmap(valid_vector.data(), length);
+    } else {
+      for (int64_t i = 0; i < length; ++i) {
+        RETURN_NOT_OK(AppendNextOffset());
+        RETURN_NOT_OK(value_data_builder_.Append(
+            reinterpret_cast<const uint8_t*>(values[i]), value_lengths[i]));
+      }
+      UnsafeAppendToBitmap(nullptr, length);
+    }
+  }
+  return Status::OK();
+}
+
 // ----------------------------------------------------------------------
 // Fixed width binary
 
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index b0f77bd..54ce1df 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -720,6 +720,18 @@ class ARROW_EXPORT StringBuilder : public BinaryBuilder {
 
   Status Append(const std::vector<std::string>& values,
                 const uint8_t* valid_bytes = NULLPTR);
+
+  /// \brief Append a sequence of nul-terminated strings in one shot.
+  ///        If one of the values is NULL, it is processed as a null
+  ///        value even if the corresponding valid_bytes entry is 1.
+  ///
+  /// \param[in] values a contiguous C array of nul-terminated char *
+  /// \param[in] length the number of values to append
+  /// \param[in] valid_bytes an optional sequence of bytes where non-zero
+  /// indicates a valid (non-null) value
+  /// \return Status
+  Status Append(const char** values, int64_t length,
+                const uint8_t* valid_bytes = NULLPTR);
 };
 
 // ----------------------------------------------------------------------

-- 
To stop receiving notification emails like this one, please contact
apit...@apache.org.

Reply via email to