This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new b5d2742e fix: Ensure negative return values from snprintf() are not
used as indexes (#418)
b5d2742e is described below
commit b5d2742e2d0aee71c2ca5a277169e53c335f6c43
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Apr 15 15:52:56 2024 -0300
fix: Ensure negative return values from snprintf() are not used as indexes
(#418)
It is difficult, but not impossible, to get `snprintf()` to return -1,
and there are a number of places where we call it on untrusted user
input. This PR ensures that the return value of all `snprintf()` calls
is checked.
---
.../src/nanoarrow/nanoarrow_ipc_decoder.c | 30 +++++++++++++++++++
src/nanoarrow/schema.c | 35 ++++++++++++++++++++--
src/nanoarrow/utils.c | 7 +++++
3 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
index 2e19ebd2..b893c4b0 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c
@@ -613,6 +613,11 @@ static int ArrowIpcDecoderSetTypeFixedSizeList(struct
ArrowSchema* schema,
char fixed_size_str[128];
int n_chars = snprintf(fixed_size_str, 128, "+w:%d", fixed_size);
+ if (n_chars < 0) {
+ ArrowErrorSet(error, "snprintf() encoding error");
+ return ERANGE;
+ }
+
fixed_size_str[n_chars] = '\0';
return ArrowIpcDecoderSetTypeSimpleNested(schema, fixed_size_str, error);
}
@@ -671,6 +676,11 @@ static int ArrowIpcDecoderSetTypeUnion(struct ArrowSchema*
schema,
return EINVAL;
}
+ if (n_chars < 0) {
+ ArrowErrorSet(error, "snprintf() encoding error");
+ return ERANGE;
+ }
+
if (ns(Union_typeIds_is_present(type))) {
flatbuffers_int32_vec_t type_ids = ns(Union_typeIds(type));
int64_t n_type_ids = flatbuffers_int32_vec_len(type_ids);
@@ -689,11 +699,21 @@ static int ArrowIpcDecoderSetTypeUnion(struct
ArrowSchema* schema,
format_cursor += n_chars;
format_out_size -= n_chars;
+ if (n_chars < 0) {
+ ArrowErrorSet(error, "snprintf() encoding error");
+ return ERANGE;
+ }
+
for (int64_t i = 1; i < n_type_ids; i++) {
n_chars = snprintf(format_cursor, format_out_size, ",%d",
(int)flatbuffers_int32_vec_at(type_ids, i));
format_cursor += n_chars;
format_out_size -= n_chars;
+
+ if (n_chars < 0) {
+ ArrowErrorSet(error, "snprintf() encoding error");
+ return ERANGE;
+ }
}
}
} else if (n_children > 0) {
@@ -701,10 +721,20 @@ static int ArrowIpcDecoderSetTypeUnion(struct
ArrowSchema* schema,
format_cursor += n_chars;
format_out_size -= n_chars;
+ if (n_chars < 0) {
+ ArrowErrorSet(error, "snprintf() encoding error");
+ return ERANGE;
+ }
+
for (int64_t i = 1; i < n_children; i++) {
n_chars = snprintf(format_cursor, format_out_size, ",%d", (int)i);
format_cursor += n_chars;
format_out_size -= n_chars;
+
+ if (n_chars < 0) {
+ ArrowErrorSet(error, "snprintf() encoding error");
+ return ERANGE;
+ }
}
}
diff --git a/src/nanoarrow/schema.c b/src/nanoarrow/schema.c
index 21564507..7451136c 100644
--- a/src/nanoarrow/schema.c
+++ b/src/nanoarrow/schema.c
@@ -233,6 +233,10 @@ ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct
ArrowSchema* schema,
return EINVAL;
}
+ if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) {
+ return ERANGE;
+ }
+
buffer[n_chars] = '\0';
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, buffer));
@@ -265,6 +269,10 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct
ArrowSchema* schema, enum ArrowT
return EINVAL;
}
+ if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) {
+ return ERANGE;
+ }
+
buffer[n_chars] = '\0';
return ArrowSchemaSetFormat(schema, buffer);
}
@@ -341,7 +349,7 @@ ArrowErrorCode ArrowSchemaSetTypeDateTime(struct
ArrowSchema* schema, enum Arrow
return EINVAL;
}
- if (((size_t)n_chars) >= sizeof(buffer)) {
+ if (((size_t)n_chars) >= sizeof(buffer) || n_chars < 0) {
return ERANGE;
}
@@ -378,6 +386,12 @@ ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema*
schema, enum ArrowTyp
return EINVAL;
}
+ // Ensure that an encoding error from snprintf() does not result
+ // in an out-of-bounds access.
+ if (n_chars < 0) {
+ return ERANGE;
+ }
+
if (n_children > 0) {
n_chars = snprintf(format_cursor, format_out_size, "0");
format_cursor += n_chars;
@@ -390,6 +404,12 @@ ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema*
schema, enum ArrowTyp
}
}
+ // Ensure that an encoding error from snprintf() does not result
+ // in an out-of-bounds access.
+ if (n_chars < 0) {
+ return ERANGE;
+ }
+
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(schema, format_out));
NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, n_children));
@@ -1256,6 +1276,12 @@ static int64_t ArrowSchemaTypeToStringInternal(struct
ArrowSchemaView* schema_vi
// among multiple sprintf calls.
static inline void ArrowToStringLogChars(char** out, int64_t n_chars_last,
int64_t* n_remaining, int64_t*
n_chars) {
+ // In the unlikely snprintf() returning a negative value (encoding error),
+ // ensure the result won't cause an out-of-bounds access.
+ if (n_chars_last < 0) {
+ n_chars = 0;
+ }
+
*n_chars += n_chars_last;
*n_remaining -= n_chars_last;
@@ -1351,7 +1377,12 @@ int64_t ArrowSchemaToString(const struct ArrowSchema*
schema, char* out, int64_t
n_chars += snprintf(out, n, ">");
}
- return n_chars;
+ // Ensure that we always return a positive result
+ if (n_chars > 0) {
+ return n_chars;
+ } else {
+ return 0;
+ }
}
ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader,
diff --git a/src/nanoarrow/utils.c b/src/nanoarrow/utils.c
index 37ea6f62..c9b4ebd6 100644
--- a/src/nanoarrow/utils.c
+++ b/src/nanoarrow/utils.c
@@ -430,6 +430,13 @@ ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const
struct ArrowDecimal* decim
// The most significant segment should have no leading zeroes
int n_chars = snprintf((char*)buffer->data + buffer->size_bytes, 21, "%lu",
(unsigned long)segments[num_segments - 1]);
+
+ // Ensure that an encoding error from snprintf() does not result
+ // in an out-of-bounds access.
+ if (n_chars < 0) {
+ return ERANGE;
+ }
+
buffer->size_bytes += n_chars;
// Subsequent output needs to be left-padded with zeroes such that each
segment