bkietz commented on a change in pull request #9323:
URL: https://github.com/apache/arrow/pull/9323#discussion_r581371432



##########
File path: cpp/src/arrow/dataset/partition_test.cc
##########
@@ -77,6 +78,39 @@ class TestPartitioning : public ::testing::Test {
     ASSERT_OK_AND_ASSIGN(partitioning_, factory_->Finish(actual));
   }
 
+  void AssertPartition(const std::shared_ptr<Partitioning> partitioning,
+                       const std::shared_ptr<RecordBatch> full_batch,
+                       const RecordBatchVector& expected_batches,
+                       const std::vector<Expression>& expected_expressions) {
+    ASSERT_OK_AND_ASSIGN(auto partition_results, 
partitioning->Partition(full_batch));
+    std::shared_ptr<RecordBatch> rest = full_batch;

Review comment:
       Unused:
   ```suggestion
   ```

##########
File path: cpp/src/arrow/dataset/partition.cc
##########
@@ -147,7 +151,9 @@ Result<Expression> KeyValuePartitioning::ConvertKey(const 
Key& key) const {
 
   std::shared_ptr<Scalar> converted;
 
-  if (field->type()->id() == Type::DICTIONARY) {
+  if (!key.value.has_value()) {
+    return is_null(field_ref(field->name()));
+  } else if (field->type()->id() == Type::DICTIONARY) {

Review comment:
       nit:
   ```suggestion
     }
   
     if (field->type()->id() == Type::DICTIONARY) {
   ```

##########
File path: cpp/src/arrow/compute/kernels/vector_hash_test.cc
##########
@@ -542,6 +547,12 @@ TEST_F(TestHashKernel, UniqueDecimal) {
                                           {true, false, true, true}, expected, 
{1, 0, 1});
 }
 
+TEST_F(TestHashKernel, UniqueNull) {
+  CheckUnique<NullType, std::nullptr_t>(null(), {nullptr, nullptr}, {false, 
true},
+                                        {nullptr}, {false});
+  CheckUnique<NullType, std::nullptr_t>(null(), {}, {}, {}, {});

Review comment:
       ```suggestion
     CheckUnique(ArrayFromJSON(null(), "[null, null, null]"), 
ArrayFromJSON(null(), "[null]"));
     CheckUnique(ArrayFromJSON(null(), "[]"), ArrayFromJSON(null(), "[]"));
   ```

##########
File path: cpp/src/arrow/dataset/partition_test.cc
##########
@@ -282,9 +348,16 @@ TEST_F(TestPartitioning, HivePartitioningFormat) {
                     equal(field_ref("alpha"), literal(0))),
                "alpha=0/beta=3.25");
   AssertFormat(equal(field_ref("alpha"), literal(0)), "alpha=0");

Review comment:
       Since a null valued partition key is semantically equivalent to an 
absent one, we should ensure they format identically. I've created ARROW-11762 
for follow up

##########
File path: cpp/src/arrow/dataset/partition.cc
##########
@@ -625,8 +650,27 @@ class StructDictionary {
 
  private:
   Status AddOne(Datum column, std::shared_ptr<Int32Array>* fused_indices) {
+    if (column.type()->id() == Type::DICTIONARY) {
+      if (column.null_count() != 0) {
+        // TODO(ARROW-11732) Optimize this by allowign DictionaryEncode to 
transfer a

Review comment:
       ```suggestion
           // TODO(ARROW-11732) Optimize this by allowing DictionaryEncode to 
transfer a
   ```

##########
File path: cpp/src/arrow/dataset/partition.cc
##########
@@ -287,8 +303,13 @@ class KeyValuePartitioningFactory : public 
PartitioningFactory {
     return it_inserted.first->second;
   }
 
-  Status InsertRepr(const std::string& name, util::string_view repr) {
-    return InsertRepr(GetOrInsertField(name), repr);
+  Status InsertRepr(const std::string& name, util::optional<string_view> repr) 
{
+    auto field_index = GetOrInsertField(name);
+    if (repr.has_value()) {
+      return InsertRepr(field_index, *repr);
+    } else {
+      return Status::OK();
+    }

Review comment:
       ```suggestion
       if (repr.has_value()) {
         auto field_index = GetOrInsertField(name);
         return InsertRepr(field_index, *repr);
       }
       return Status::OK();
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to