This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/main by this push:
     new 1110d25604 AVRO-4136: [c] json encoding of byte[] containing 0x00 
(#3375)
1110d25604 is described below

commit 1110d2560480e8a1cdff8638d2312538334bcadb
Author: Steven Aerts <[email protected]>
AuthorDate: Fri Oct 24 14:51:18 2025 +0200

    AVRO-4136: [c] json encoding of byte[] containing 0x00 (#3375)
    
    Prevent the fixed and bytes type to be preliminary cut off when they
    are encoded into the json encoding.
---
 lang/c/CMakeLists.txt         |  4 ++--
 lang/c/src/value-json.c       | 11 +++++------
 lang/c/tests/test_avro_data.c |  8 ++++----
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/lang/c/CMakeLists.txt b/lang/c/CMakeLists.txt
index 123676b3d8..49e2a36bc8 100644
--- a/lang/c/CMakeLists.txt
+++ b/lang/c/CMakeLists.txt
@@ -179,13 +179,13 @@ set(CODEC_LIBRARIES ${ZLIB_LIBRARIES} ${LZMA_LIBRARIES} 
${SNAPPY_LIBRARIES})
 set(CODEC_PKG "${ZLIB_PKG} ${LZMA_PKG} ${SNAPPY_PKG}")
 
 # Jansson JSON library
-pkg_check_modules(JANSSON jansson>=2.3)
+pkg_check_modules(JANSSON jansson>=2.7)
 if (JANSSON_FOUND)
     set(JANSSON_PKG libjansson)
     include_directories(${JANSSON_INCLUDE_DIRS})
     link_directories(${JANSSON_LIBRARY_DIRS})
 else (JANSSON_FOUND)
-    message(FATAL_ERROR "libjansson >=2.3 not found")
+    message(FATAL_ERROR "libjansson >=2.7 not found")
 endif (JANSSON_FOUND)
 
 
diff --git a/lang/c/src/value-json.c b/lang/c/src/value-json.c
index 53c2b3d3e4..7927c14dd8 100644
--- a/lang/c/src/value-json.c
+++ b/lang/c/src/value-json.c
@@ -29,7 +29,8 @@
 #include "jansson.h"
 
 /*
- * Converts a binary buffer into a NUL-terminated JSON UTF-8 string.
+ * Converts a binary buffer into a JSON UTF-8 string which is NOT
+ * terminated with a null byte ('\0').
  * Avro bytes and fixed values are encoded in JSON as a string, and JSON
  * strings must be in UTF-8.  For these Avro types, the JSON string is
  * restricted to the characters U+0000..U+00FF, which corresponds to the
@@ -51,7 +52,7 @@ encode_utf8_bytes(const void *src, size_t src_len,
        // the range 0x80..0xff will take up two.
        const uint8_t  *src8 = (const uint8_t *) src;
 
-       size_t  utf8_len = src_len + 1;  // +1 for NUL terminator
+       size_t  utf8_len = src_len;
        size_t  i;
        for (i = 0; i < src_len; i++) {
                if (src8[i] & 0x80) {
@@ -76,8 +77,6 @@ encode_utf8_bytes(const void *src, size_t src_len,
                }
        }
 
-       *curr = '\0';
-
        // And we're good.
        *dest = dest8;
        *dest_len = utf8_len;
@@ -127,7 +126,7 @@ avro_value_to_json_t(const avro_value_t *value)
                                return NULL;
                        }
 
-                       json_t  *result = json_string_nocheck((const char *) 
encoded);
+                       json_t  *result = json_stringn_nocheck((const char *) 
encoded, encoded_size);
                        avro_free(encoded, encoded_size);
                        if (result == NULL) {
                                avro_set_error("Cannot allocate JSON bytes");
@@ -242,7 +241,7 @@ avro_value_to_json_t(const avro_value_t *value)
                                return NULL;
                        }
 
-                       json_t  *result = json_string_nocheck((const char *) 
encoded);
+                       json_t  *result = json_stringn_nocheck((const char *) 
encoded, encoded_size);
                        avro_free(encoded, encoded_size);
                        if (result == NULL) {
                                avro_set_error("Cannot allocate JSON fixed");
diff --git a/lang/c/tests/test_avro_data.c b/lang/c/tests/test_avro_data.c
index 1da09e6db9..3a26c67e24 100644
--- a/lang/c/tests/test_avro_data.c
+++ b/lang/c/tests/test_avro_data.c
@@ -181,14 +181,14 @@ static int test_string(void)
 
 static int test_bytes(void)
 {
-       char bytes[] = { 0xDE, 0xAD, 0xBE, 0xEF };
+       char bytes[] = { 0xDE, 0xAD, 0x00, 0xBE, 0xEF };
        avro_schema_t writer_schema = avro_schema_bytes();
        avro_datum_t datum;
        avro_datum_t expected_datum;
 
        datum = avro_givebytes(bytes, sizeof(bytes), NULL);
        write_read_check(writer_schema, datum, NULL, NULL, "bytes");
-       test_json(datum, "\"\\u00de\\u00ad\\u00be\\u00ef\"");
+       test_json(datum, "\"\\u00de\\u00ad\\u0000\\u00be\\u00ef\"");
        avro_datum_decref(datum);
        avro_schema_decref(writer_schema);
 
@@ -613,14 +613,14 @@ static int test_union(void)
 
 static int test_fixed(void)
 {
-       char bytes[] = { 0xD, 0xA, 0xD, 0xA, 0xB, 0xA, 0xB, 0xA };
+       char bytes[] = { 0xD, 0xA, 0xD, 0xA, 0xB, 0x0, 0xB, 0xA };
        avro_schema_t schema = avro_schema_fixed("msg", sizeof(bytes));
        avro_datum_t datum;
        avro_datum_t expected_datum;
 
        datum = avro_givefixed(schema, bytes, sizeof(bytes), NULL);
        write_read_check(schema, datum, NULL, NULL, "fixed");
-       test_json(datum, "\"\\r\\n\\r\\n\\u000b\\n\\u000b\\n\"");
+       test_json(datum, "\"\\r\\n\\r\\n\\u000b\\u0000\\u000b\\n\"");
        avro_datum_decref(datum);
 
        datum = avro_givefixed(schema, NULL, sizeof(bytes), NULL);

Reply via email to