This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e358f79a Update dist/ for commit 
c4d20a08514eaa0520e0c5d3f0d51469011a1a4a
e358f79a is described below

commit e358f79a2f3e7a1ed2b65a05ae66047cdb967a1c
Author: GitHub Actions <[email protected]>
AuthorDate: Sat Jun 8 01:30:18 2024 +0000

    Update dist/ for commit c4d20a08514eaa0520e0c5d3f0d51469011a1a4a
---
 dist/nanoarrow.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dist/nanoarrow.h |  19 ++++++-
 2 files changed, 168 insertions(+), 1 deletion(-)

diff --git a/dist/nanoarrow.c b/dist/nanoarrow.c
index 9677a0e5..37d14305 100644
--- a/dist/nanoarrow.c
+++ b/dist/nanoarrow.c
@@ -66,6 +66,7 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum 
ArrowType storage_type) {
   switch (storage_type) {
     case NANOARROW_TYPE_UNINITIALIZED:
     case NANOARROW_TYPE_NA:
+    case NANOARROW_TYPE_RUN_END_ENCODED:
       layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
       layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED;
       layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
@@ -576,6 +577,8 @@ static const char* ArrowSchemaFormatTemplate(enum ArrowType 
type) {
       return "+s";
     case NANOARROW_TYPE_MAP:
       return "+m";
+    case NANOARROW_TYPE_RUN_END_ENCODED:
+      return "+r";
 
     default:
       return NULL;
@@ -607,6 +610,13 @@ static int ArrowSchemaInitChildrenIfNeeded(struct 
ArrowSchema* schema,
       NANOARROW_RETURN_NOT_OK(
           ArrowSchemaSetName(schema->children[0]->children[1], "value"));
       break;
+    case NANOARROW_TYPE_RUN_END_ENCODED:
+      NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 2));
+      ArrowSchemaInit(schema->children[0]);
+      NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0], 
"run_ends"));
+      schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE;
+      ArrowSchemaInit(schema->children[1]);
+      NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[1], 
"values"));
     default:
       break;
   }
@@ -729,6 +739,28 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct 
ArrowSchema* schema, enum ArrowT
   return ArrowSchemaSetFormat(schema, buffer);
 }
 
+ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema,
+                                               enum ArrowType run_end_type) {
+  switch (run_end_type) {
+    case NANOARROW_TYPE_INT16:
+    case NANOARROW_TYPE_INT32:
+    case NANOARROW_TYPE_INT64:
+      break;
+    default:
+      return EINVAL;
+  }
+
+  NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(
+      schema, ArrowSchemaFormatTemplate(NANOARROW_TYPE_RUN_END_ENCODED)));
+  NANOARROW_RETURN_NOT_OK(
+      ArrowSchemaInitChildrenIfNeeded(schema, NANOARROW_TYPE_RUN_END_ENCODED));
+  NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema->children[0], 
run_end_type));
+  NANOARROW_RETURN_NOT_OK(
+      ArrowSchemaSetType(schema->children[1], NANOARROW_TYPE_UNINITIALIZED));
+
+  return NANOARROW_OK;
+}
+
 static const char* ArrowTimeUnitFormatString(enum ArrowTimeUnit time_unit) {
   switch (time_unit) {
     case NANOARROW_TIME_UNIT_SECOND:
@@ -1202,6 +1234,13 @@ static ArrowErrorCode ArrowSchemaViewParse(struct 
ArrowSchemaView* schema_view,
           *format_end_out = format + 2;
           return NANOARROW_OK;
 
+        // run end encoded has no buffer at all
+        case 'r':
+          schema_view->storage_type = NANOARROW_TYPE_RUN_END_ENCODED;
+          schema_view->type = NANOARROW_TYPE_RUN_END_ENCODED;
+          *format_end_out = format + 2;
+          return NANOARROW_OK;
+
         // just validity buffer
         case 'w':
           if (format[2] != ':' || format[3] == '\0') {
@@ -1576,6 +1615,9 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct 
ArrowSchemaView* schema_vie
     case NANOARROW_TYPE_FIXED_SIZE_LIST:
       return ArrowSchemaViewValidateNChildren(schema_view, 1, error);
 
+    case NANOARROW_TYPE_RUN_END_ENCODED:
+      return ArrowSchemaViewValidateNChildren(schema_view, 2, error);
+
     case NANOARROW_TYPE_STRUCT:
       return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
 
@@ -2123,6 +2165,7 @@ static ArrowErrorCode ArrowArraySetStorageType(struct 
ArrowArray* array,
   switch (storage_type) {
     case NANOARROW_TYPE_UNINITIALIZED:
     case NANOARROW_TYPE_NA:
+    case NANOARROW_TYPE_RUN_END_ENCODED:
       array->n_buffers = 0;
       break;
 
@@ -2865,6 +2908,15 @@ static int ArrowArrayViewValidateMinimal(struct 
ArrowArrayView* array_view,
                       (long)array_view->n_children);
         return EINVAL;
       }
+      break;
+    case NANOARROW_TYPE_RUN_END_ENCODED:
+      if (array_view->n_children != 2) {
+        ArrowErrorSet(
+            error, "Expected 2 children for %s array but found %ld child 
arrays",
+            ArrowTypeString(array_view->storage_type), 
(long)array_view->n_children);
+        return EINVAL;
+      }
+      break;
     default:
       break;
   }
@@ -2900,6 +2952,68 @@ static int ArrowArrayViewValidateMinimal(struct 
ArrowArrayView* array_view,
         return EINVAL;
       }
       break;
+
+    case NANOARROW_TYPE_RUN_END_ENCODED: {
+      if (array_view->n_children != 2) {
+        ArrowErrorSet(error,
+                      "Expected 2 children for run-end encoded array but found 
%ld",
+                      (long)array_view->n_children);
+        return EINVAL;
+      }
+      struct ArrowArrayView* run_ends_view = array_view->children[0];
+      struct ArrowArrayView* values_view = array_view->children[1];
+      int64_t max_length;
+      switch (run_ends_view->storage_type) {
+        case NANOARROW_TYPE_INT16:
+          max_length = INT16_MAX;
+          break;
+        case NANOARROW_TYPE_INT32:
+          max_length = INT32_MAX;
+          break;
+        case NANOARROW_TYPE_INT64:
+          max_length = INT64_MAX;
+          break;
+        default:
+          ArrowErrorSet(
+              error,
+              "Run-end encoded array only supports INT16, INT32 or INT64 
run-ends "
+              "but found run-ends type %s",
+              ArrowTypeString(run_ends_view->storage_type));
+          return EINVAL;
+      }
+      // uint64_t is used here to avoid overflow when adding the offset and 
length
+      if ((uint64_t)array_view->offset + (uint64_t)array_view->length >
+          (uint64_t)max_length) {
+        ArrowErrorSet(
+            error,
+            "Offset + length of a run-end encoded array must fit in a value"
+            " of the run end type %s, but offset + length is %lu while the "
+            "allowed maximum is %lu",
+            ArrowTypeString(run_ends_view->storage_type),
+            (unsigned long)array_view->offset + (unsigned 
long)array_view->length,
+            (unsigned long)max_length);
+        return EINVAL;
+      }
+      if (run_ends_view->length > values_view->length) {
+        ArrowErrorSet(
+            error, "Length of run_ends is greater than the length of values: 
%ld > %ld",
+            (long)run_ends_view->length, (long)values_view->length);
+        return EINVAL;
+      }
+      if (run_ends_view->length == 0 && values_view->length != 0) {
+        ArrowErrorSet(error,
+                      "Run-end encoded array has zero length %ld, but values 
array has "
+                      "non-zero length",
+                      (long)values_view->length);
+        return EINVAL;
+      }
+      if (run_ends_view->null_count != 0) {
+        ArrowErrorSet(error, "Null count must be 0 for run ends array, but is 
%ld",
+                      (long)run_ends_view->null_count);
+        return EINVAL;
+      }
+      break;
+    }
     default:
       break;
   }
@@ -3049,6 +3163,18 @@ static int ArrowArrayViewValidateDefault(struct 
ArrowArrayView* array_view,
         }
       }
       break;
+
+    case NANOARROW_TYPE_RUN_END_ENCODED: {
+      struct ArrowArrayView* run_ends_view = array_view->children[0];
+      int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
+      if (last_run_end < 1) {
+        ArrowErrorSet(error,
+                      "All run ends must be greater than 0 but the first run 
end is %ld",
+                      (long)last_run_end);
+        return EINVAL;
+      }
+      break;
+    }
     default:
       break;
   }
@@ -3217,6 +3343,30 @@ static int ArrowArrayViewValidateFull(struct 
ArrowArrayView* array_view,
     }
   }
 
+  if (array_view->storage_type == NANOARROW_TYPE_RUN_END_ENCODED) {
+    struct ArrowArrayView* run_ends_view = array_view->children[0];
+    int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
+    for (int64_t i = 1; i < run_ends_view->length; i++) {
+      const int64_t run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, i);
+      if (run_end <= last_run_end) {
+        ArrowErrorSet(error,
+                      "Every run end must be strictly greater than the 
previous run end, "
+                      "but run_ends[%ld] is %ld and run_ends[%ld] is %ld",
+                      (long)i, (long)run_end, (long)i - 1, (long)last_run_end);
+        return EINVAL;
+      }
+      last_run_end = run_end;
+    }
+    last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 
run_ends_view->length - 1);
+    if (last_run_end < (array_view->offset + array_view->length)) {
+      ArrowErrorSet(error,
+                    "Last run end is %ld but it should >= %ld (offset: %ld, 
length: %ld)",
+                    (long)last_run_end, (long)(array_view->offset + 
array_view->length),
+                    (long)array_view->offset, (long)array_view->length);
+      return EINVAL;
+    }
+  }
+
   // Recurse for children
   for (int64_t i = 0; i < array_view->n_children; i++) {
     
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i], 
error));
diff --git a/dist/nanoarrow.h b/dist/nanoarrow.h
index 1227d015..bfb13f64 100644
--- a/dist/nanoarrow.h
+++ b/dist/nanoarrow.h
@@ -482,7 +482,8 @@ enum ArrowType {
   NANOARROW_TYPE_LARGE_STRING,
   NANOARROW_TYPE_LARGE_BINARY,
   NANOARROW_TYPE_LARGE_LIST,
-  NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
+  NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO,
+  NANOARROW_TYPE_RUN_END_ENCODED
 };
 
 /// \brief Get a string value of an enum ArrowType value
@@ -569,6 +570,8 @@ static inline const char* ArrowTypeString(enum ArrowType 
type) {
       return "large_list";
     case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
       return "interval_month_day_nano";
+    case NANOARROW_TYPE_RUN_END_ENCODED:
+      return "run_end_encoded";
     default:
       return NULL;
   }
@@ -1052,6 +1055,8 @@ static inline void ArrowDecimalSetBytes(struct 
ArrowDecimal* decimal,
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize)
 #define ArrowSchemaSetTypeDecimal \
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal)
+#define ArrowSchemaSetTypeRunEndEncoded \
+  NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeRunEndEncoded)
 #define ArrowSchemaSetTypeDateTime \
   NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime)
 #define ArrowSchemaSetTypeUnion \
@@ -1364,6 +1369,17 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct 
ArrowSchema* schema, enum ArrowT
                                          int32_t decimal_precision,
                                          int32_t decimal_scale);
 
+/// \brief Set the format field of a run-end encoded schema
+///
+/// Returns EINVAL for run_end_type that is not
+/// NANOARROW_TYPE_INT16, NANOARROW_TYPE_INT32 or NANOARROW_TYPE_INT64.
+/// Schema must have been initialized using ArrowSchemaInit() or 
ArrowSchemaDeepCopy().
+/// The caller must call `ArrowSchemaSetTypeXXX(schema->children[1])` to
+/// set the value type. Note that when building arrays using the 
`ArrowArrayAppendXXX()`
+/// functions, the run-end encoded array's logical length must be updated 
manually.
+ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema,
+                                               enum ArrowType run_end_type);
+
 /// \brief Set the format field of a time, timestamp, or duration schema
 ///
 /// Returns EINVAL for type that is not
@@ -3527,6 +3543,7 @@ static inline ArrowErrorCode 
ArrowArrayFinishElement(struct ArrowArray* array) {
         }
       }
       break;
+      return NANOARROW_OK;
     default:
       return EINVAL;
   }

Reply via email to