felipecrv commented on code in PR #35565:
URL: https://github.com/apache/arrow/pull/35565#discussion_r1199260838


##########
cpp/src/arrow/type_traits.cc:
##########
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/type_traits.h"
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+namespace {}  // namespace
+
+int RequiredValueAlignmentForBuffer(Type::type type_id, int buffer_index) {
+  if (buffer_index != 1) {
+    // If the buffer index is 0 then either:
+    //  * The array type has no buffers, thus this shouldn't be called anyways
+    //  * The array has a validity buffer at 0, no alignment needed
+    //  * The array is a union array and has a types buffer at 0, no alignment 
needed
+    // If the buffer index is > 1 then, in all current cases, it represents 
binary
+    //  data and no alignment is needed
+    return 1;
+  }
+  DCHECK_NE(type_id, Type::DICTIONARY);
+  DCHECK_NE(type_id, Type::EXTENSION);
+
+  switch (type_id) {
+    case Type::NA:                 // No buffers
+    case Type::FIXED_SIZE_LIST:    // No second buffer (values in child array)
+    case Type::FIXED_SIZE_BINARY:  // Fixed size binary could be dangerous but 
the
+                                   // compute kernels don't type pun this.  
E.g. if
+                                   // an extension type is storing some kind 
of struct
+                                   // here then the user should do their own 
alignment
+                                   // check before casting to an array of 
structs
+    case Type::BOOL:               // Always treated as uint8_t*
+    case Type::INT8:               // Always treated as uint8_t*
+    case Type::UINT8:              // Always treated as uint8_t*
+    case Type::SPARSE_UNION:       // No second buffer
+    case Type::RUN_END_ENCODED:    // No buffers
+    case Type::STRUCT:             // No second buffer
+      return 1;
+    case Type::INT16:
+    case Type::UINT16:
+    case Type::HALF_FLOAT:
+      return 2;
+    case Type::INT32:
+    case Type::UINT32:
+    case Type::FLOAT:
+    case Type::STRING:  // Offsets may be cast to int32_t*
+    case Type::BINARY:  // Offsets may be cast to int32_t*
+    case Type::DATE32:
+    case Type::TIME32:
+    case Type::LIST:         // Offsets may be cast to int32_t*, data is in 
child array
+    case Type::MAP:          // This is a list array
+    case Type::DENSE_UNION:  // Has an offsets buffer of int32_t*
+    case Type::INTERVAL_MONTHS:    // Stored as int32_t*
+    case Type::INTERVAL_DAY_TIME:  // Stored as two contiguous 32-bit integers
+      return 4;
+    case Type::INT64:
+    case Type::UINT64:
+    case Type::DOUBLE:
+    case Type::DECIMAL128:    // May be cast to GenericBasicDecimal* which 
requires
+                              // alignment of 8
+    case Type::DECIMAL256:    // May be cast to GenericBasicDecimal* which 
requires
+                              // alignment of 8
+    case Type::LARGE_BINARY:  // Offsets may be cast to int64_t*
+    case Type::LARGE_LIST:    // Offsets may be cast to int64_t*
+    case Type::LARGE_STRING:  // Offsets may be cast to int64_t*
+    case Type::DATE64:
+    case Type::TIME64:
+    case Type::TIMESTAMP:
+    case Type::DURATION:
+    case Type::INTERVAL_MONTH_DAY_NANO:  // Stored as two 32-bit integers and 
a 64-bit
+                                         // integer
+      return 8;
+    default:

Review Comment:
   Can we keep this without a `default` so when new types are added by me and 
@bkietz we get a compiler error reminding us to think about this function?



##########
cpp/src/arrow/util/align_util.h:
##########
@@ -72,12 +72,13 @@ namespace util {
 // Functions to check if the provided Arrow object is aligned by the specified 
alignment
 
 /// \brief if this is specified in one of the CheckAlignment or 
EnsureAlignment functions
+///
 /// then the funciton will ensure each buffer is suitably aligned for the data 
type of the
 /// array.  For example, given an int32 buffer the validity buffer must be a 
multiple of 8
 /// and the values buffer must be a multiple of 32.  Given a large_string 
buffer the
 /// validity buffer and values buffers must be multiples of 8 and the offsets 
buffer must
 /// be a multiple of 64.
-constexpr int64_t kMallocAlignment = -3;
+constexpr int64_t kValueAlignment = -3;

Review Comment:
   Nice!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to