save-buffer commented on a change in pull request #12537:
URL: https://github.com/apache/arrow/pull/12537#discussion_r829708651



##########
File path: cpp/src/arrow/compute/exec/tpch_node_test.cc
##########
@@ -0,0 +1,623 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gmock/gmock-matchers.h>
+
+#include "arrow/api.h"
+#include "arrow/array/validate.h"
+#include "arrow/compute/exec/options.h"
+#include "arrow/compute/exec/test_util.h"
+#include "arrow/compute/exec/tpch_node.h"
+#include "arrow/compute/exec/util.h"
+#include "arrow/compute/kernels/row_encoder.h"
+#include "arrow/compute/kernels/test_util.h"
+#include "arrow/testing/gtest_util.h"
+#include "arrow/testing/matchers.h"
+#include "arrow/testing/random.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/make_unique.h"
+#include "arrow/util/pcg_random.h"
+#include "arrow/util/thread_pool.h"
+
+#include <cctype>
+#include <string>
+#include <unordered_set>
+
+namespace arrow {
+namespace compute {
+static constexpr uint32_t kStartDate =
+    8035;  // January 1, 1992 is 8035 days after January 1, 1970
+static constexpr uint32_t kEndDate =
+    10591;  // December 12, 1998 is 10591 days after January 1, 1970
+
+void ValidateBatch(const ExecBatch& batch) {
+  for (const Datum& d : batch.values)
+    ASSERT_OK(arrow::internal::ValidateArray(*d.array()));
+}
+
+void VerifyUniqueKey(std::unordered_set<int32_t>& seen, const Datum& d, 
int32_t min,
+                     int32_t max) {
+  const int32_t* keys = reinterpret_cast<const 
int32_t*>(d.array()->buffers[1]->data());
+  int64_t num_keys = d.length();
+  for (int64_t i = 0; i < num_keys; i++) {
+    ASSERT_TRUE(seen.insert(keys[i]).second);
+    ASSERT_LE(keys[i], max);
+    ASSERT_GE(keys[i], min);
+  }
+}
+
+void VerifyStringAndNumber_Single(const char* row, const char* prefix, const 
int64_t i,
+                                  const int32_t* nums, int byte_width,
+                                  bool verify_padding) {
+  int num_offset = static_cast<int>(std::strlen(prefix));
+  ASSERT_EQ(std::memcmp(row, prefix, num_offset), 0)
+      << row << ", prefix=" << prefix << ", i=" << i;
+  const char* num_str = row + num_offset;
+  int64_t num = 0;
+  int ibyte = static_cast<int>(num_offset);
+  for (; *num_str && ibyte < byte_width; ibyte++) {
+    num *= 10;
+    ASSERT_TRUE(std::isdigit(*num_str));
+    num += *num_str++ - '0';
+  }
+  if (nums) {
+    ASSERT_EQ(static_cast<int32_t>(num), nums[i]);
+  }
+  if (verify_padding) {
+    int num_chars = ibyte - num_offset;
+    ASSERT_GE(num_chars, 9);
+  }
+}
+
+void VerifyStringAndNumber_FixedWidth(const Datum& strings, const Datum& 
numbers,
+                                      int byte_width, const char* prefix,
+                                      bool verify_padding = true) {
+  int64_t length = strings.length();
+  const char* str = reinterpret_cast<const 
char*>(strings.array()->buffers[1]->data());
+
+  const int32_t* nums = nullptr;
+  if (numbers.kind() != Datum::NONE) {
+    ASSERT_EQ(length, numbers.length());
+    nums = reinterpret_cast<const 
int32_t*>(numbers.array()->buffers[1]->data());
+  }
+
+  for (int64_t i = 0; i < length; i++) {
+    const char* row = str + i * byte_width;
+    VerifyStringAndNumber_Single(row, prefix, i, nums, byte_width, 
verify_padding);
+  }
+}
+
+void VerifyStringAndNumber_Varlen(const Datum& strings, const Datum& numbers,
+                                  const char* prefix, bool verify_padding = 
true) {
+  int64_t length = strings.length();
+  const int32_t* offsets =
+      reinterpret_cast<const int32_t*>(strings.array()->buffers[1]->data());
+  const char* str = reinterpret_cast<const 
char*>(strings.array()->buffers[2]->data());
+
+  const int32_t* nums = nullptr;
+  if (numbers.kind() != Datum::NONE) {
+    ASSERT_EQ(length, numbers.length());
+    nums = reinterpret_cast<const 
int32_t*>(numbers.array()->buffers[1]->data());
+  }
+
+  for (int64_t i = 0; i < length; i++) {
+    char tmp_str[256] = {};
+    int32_t start = offsets[i];
+    int32_t str_len = offsets[i + 1] - offsets[i];
+    std::memcpy(tmp_str, str + start, str_len);
+    VerifyStringAndNumber_Single(tmp_str, prefix, i, nums, sizeof(tmp_str),
+                                 verify_padding);
+  }

Review comment:
       i made it verify that there are no nulls when we verify the data. 
   regarding visitors, they look clunky and the code that implements it isn't 
very readable. i'd prefer to stick with the simple solution. 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to