This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b5d2742e fix: Ensure negative return values from snprintf() are not 
used as indexes (#418)
b5d2742e is described below

commit b5d2742e2d0aee71c2ca5a277169e53c335f6c43
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Apr 15 15:52:56 2024 -0300

    fix: Ensure negative return values from snprintf() are not used as indexes 
(#418)
    
    It is difficult, but not impossible, to get `snprintf()` to return -1,
    and there are a number of places where we call it on untrusted user
    input. This PR ensures that the return value of all `snprintf()` calls
    is checked.
---
 .../src/nanoarrow/nanoarrow_ipc_decoder.c          | 30 +++++++++++++++++++
 src/nanoarrow/schema.c                             | 35 ++++++++++++++++++++--
 src/nanoarrow/utils.c                              |  7 +++++
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c 
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
index 2e19ebd2..b893c4b0 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
@@ -613,6 +613,11 @@ static int ArrowIpcDecoderSetTypeFixedSizeList(struct 
ArrowSchema* schema,
 
   char fixed_size_str[128];
   int n_chars = snprintf(fixed_size_str, 128, "+w:%d", fixed_size);
+  if (n_chars < 0) {
+    ArrowErrorSet(error, "snprintf() encoding error");
+    return ERANGE;
+  }
+
   fixed_size_str[n_chars] = '\0';
   return ArrowIpcDecoderSetTypeSimpleNested(schema, fixed_size_str, error);
 }
@@ -671,6 +676,11 @@ static int ArrowIpcDecoderSetTypeUnion(struct ArrowSchema* 
schema,
       return EINVAL;
   }
 
+  if (n_chars < 0) {
+    ArrowErrorSet(error, "snprintf() encoding error");
+    return ERANGE;
+  }
+
   if (ns(Union_typeIds_is_present(type))) {
     flatbuffers_int32_vec_t type_ids = ns(Union_typeIds(type));
     int64_t n_type_ids = flatbuffers_int32_vec_len(type_ids);
@@ -689,11 +699,21 @@ static int ArrowIpcDecoderSetTypeUnion(struct 
ArrowSchema* schema,
       format_cursor += n_chars;
       format_out_size -= n_chars;
 
+      if (n_chars < 0) {
+        ArrowErrorSet(error, "snprintf() encoding error");
+        return ERANGE;
+      }
+
       for (int64_t i = 1; i < n_type_ids; i++) {
         n_chars = snprintf(format_cursor, format_out_size, ",%d",
                            (int)flatbuffers_int32_vec_at(type_ids, i));
         format_cursor += n_chars;
         format_out_size -= n_chars;
+
+        if (n_chars < 0) {
+          ArrowErrorSet(error, "snprintf() encoding error");
+          return ERANGE;
+        }
       }
     }
   } else if (n_children > 0) {
@@ -701,10 +721,20 @@ static int ArrowIpcDecoderSetTypeUnion(struct 
ArrowSchema* schema,
     format_cursor += n_chars;
     format_out_size -= n_chars;
 
+    if (n_chars < 0) {
+      ArrowErrorSet(error, "snprintf() encoding error");
+      return ERANGE;
+    }
+
     for (int64_t i = 1; i < n_children; i++) {
       n_chars = snprintf(format_cursor, format_out_size, ",%d", (int)i);
       format_cursor += n_chars;
       format_out_size -= n_chars;
+
+      if (n_chars < 0) {
+        ArrowErrorSet(error, "snprintf() encoding error");
+        return ERANGE;
+      }
     }
   }
 
diff --git a/src/nanoarrow/schema.c b/src/nanoarrow/schema.c
index 21564507..7451136c 100644
--- a/src/nanoarrow/schema.c
+++ b/src/nanoarrow/schema.c
@@ -233,6 +233,10 @@ ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct 
ArrowSchema* schema,
       return EINVAL;
   }
 
+  if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) {
+    return ERANGE;
+  }
+
   buffer[n_chars] = '\0';
   NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, buffer));
 
@@ -265,6 +269,10 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct 
ArrowSchema* schema, enum ArrowT
       return EINVAL;
   }
 
+  if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) {
+    return ERANGE;
+  }
+
   buffer[n_chars] = '\0';
   return ArrowSchemaSetFormat(schema, buffer);
 }
@@ -341,7 +349,7 @@ ArrowErrorCode ArrowSchemaSetTypeDateTime(struct 
ArrowSchema* schema, enum Arrow
       return EINVAL;
   }
 
-  if (((size_t)n_chars) >= sizeof(buffer)) {
+  if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) {
     return ERANGE;
   }
 
@@ -378,6 +386,12 @@ ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* 
schema, enum ArrowTyp
       return EINVAL;
   }
 
+  // Ensure that an encoding error from snprintf() does not result
+  // in an out-of-bounds access.
+  if (n_chars < 0) {
+    return ERANGE;
+  }
+
   if (n_children > 0) {
     n_chars = snprintf(format_cursor, format_out_size, "0");
     format_cursor += n_chars;
@@ -390,6 +404,12 @@ ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* 
schema, enum ArrowTyp
     }
   }
 
+  // Ensure that an encoding error from snprintf() does not result
+  // in an out-of-bounds access.
+  if (n_chars < 0) {
+    return ERANGE;
+  }
+
   NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, format_out));
 
   NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children));
@@ -1256,6 +1276,12 @@ static int64_t ArrowSchemaTypeToStringInternal(struct 
ArrowSchemaView* schema_vi
 // among multiple sprintf calls.
 static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last,
                                          int64_t* n_remaining, int64_t* 
n_chars) {
+  // In the unlikely snprintf() returning a negative value (encoding error),
+  // ensure the result won't cause an out-of-bounds access.
+  if (n_chars_last < 0) {
+    n_chars = 0;
+  }
+
   *n_chars += n_chars_last;
   *n_remaining -= n_chars_last;
 
@@ -1351,7 +1377,12 @@ int64_t ArrowSchemaToString(const struct ArrowSchema* 
schema, char* out, int64_t
     n_chars += snprintf(out, n, ">");
   }
 
-  return n_chars;
+  // Ensure that we always return a positive result
+  if (n_chars > 0) {
+    return n_chars;
+  } else {
+    return 0;
+  }
 }
 
 ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
diff --git a/src/nanoarrow/utils.c b/src/nanoarrow/utils.c
index 37ea6f62..c9b4ebd6 100644
--- a/src/nanoarrow/utils.c
+++ b/src/nanoarrow/utils.c
@@ -430,6 +430,13 @@ ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const 
struct ArrowDecimal* decim
   // The most significant segment should have no leading zeroes
   int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, 21, "%lu",
                          (unsigned long)segments[num_segments - 1]);
+
+  // Ensure that an encoding error from snprintf() does not result
+  // in an out-of-bounds access.
+  if (n_chars < 0) {
+    return ERANGE;
+  }
+
   buffer->size_bytes += n_chars;
 
   // Subsequent output needs to be left-padded with zeroes such that each 
segment

Reply via email to