[
https://issues.apache.org/jira/browse/AVRO-1167?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16715357#comment-16715357
]
ASF GitHub Bot commented on AVRO-1167:
--------------------------------------
dkulp closed pull request #217: AVRO-1167, AVRO-766: Fix c AVRO_LINK memory
leaks
URL: https://github.com/apache/avro/pull/217
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/lang/c/src/schema.c b/lang/c/src/schema.c
index 3ade1140e..97e3ff354 100644
--- a/lang/c/src/schema.c
+++ b/lang/c/src/schema.c
@@ -2,17 +2,17 @@
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
- * The ASF licenses this file to you under the Apache License, Version 2.0
+ * The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
- * permissions and limitations under the License.
+ * permissions and limitations under the License.
*/
#include "avro/allocation.h"
@@ -61,7 +61,7 @@ static int is_avro_id(const char *name)
}
}
/*
- * starts with [A-Za-z_] subsequent [A-Za-z0-9_]
+ * starts with [A-Za-z_] subsequent [A-Za-z0-9_]
*/
return 1;
}
@@ -199,7 +199,13 @@ static void avro_schema_free(avro_schema_t schema)
case AVRO_LINK:{
struct avro_link_schema_t *link;
link = avro_schema_to_link(schema);
- avro_schema_decref(link->to);
+ /* Since we didn't increment the
+ * reference count of the target
+ * schema when we created the link, we
+ * should not decrement the reference
+ * count of the target schema when we
+ * free the link.
+ */
avro_freet(struct avro_link_schema_t, link);
}
break;
@@ -727,7 +733,19 @@ avro_schema_t avro_schema_link(avro_schema_t to)
avro_set_error("Cannot allocate new link schema");
return NULL;
}
- link->to = avro_schema_incref(to);
+
+ /* Do not increment the reference count of target schema
+ * pointed to by the AVRO_LINK. AVRO_LINKs are only valid
+ * internal to a schema. The target schema pointed to by a
+ * link will be valid as long as the top-level schema is
+ * valid. Similarly, the link will be valid as long as the
+ * top-level schema is valid. Therefore the validity of the
+ * link ensures the validity of its target, and we don't need
+ * an additional reference count on the target. This mechanism
+ * of an implied validity also breaks reference count cycles
+ * for recursive schemas, which result in memory leaks.
+ */
+ link->to = to;
avro_schema_init(&link->obj, AVRO_LINK);
return &link->obj;
}
@@ -807,7 +825,7 @@ avro_type_from_json_t(json_t *json, avro_type_t *type,
return EINVAL;
}
/*
- * TODO: gperf/re2c this
+ * TODO: gperf/re2c this
*/
if (strcmp(type_str, "string") == 0) {
*type = AVRO_STRING;
@@ -1259,7 +1277,7 @@ avro_schema_from_json_length(const char *jsontext, size_t
length,
return avro_schema_from_json_root(root, schema);
}
-avro_schema_t avro_schema_copy(avro_schema_t schema)
+avro_schema_t avro_schema_copy_root(avro_schema_t schema, st_table
*named_schemas)
{
long i;
avro_schema_t new_schema = NULL;
@@ -1276,7 +1294,7 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
case AVRO_BOOLEAN:
case AVRO_NULL:
/*
- * No need to copy primitives since they're static
+ * No need to copy primitives since they're static
*/
new_schema = schema;
break;
@@ -1288,6 +1306,10 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
new_schema =
avro_schema_record(record_schema->name,
record_schema->space);
+ if (save_named_schemas(new_schema, named_schemas)) {
+ avro_set_error("Cannot save enum schema");
+ return NULL;
+ }
for (i = 0; i < record_schema->fields->num_entries;
i++) {
union {
st_data_t data;
@@ -1295,10 +1317,11 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
} val;
st_lookup(record_schema->fields, i, &val.data);
avro_schema_t type_copy =
- avro_schema_copy(val.field->type);
+ avro_schema_copy_root(val.field->type,
named_schemas);
avro_schema_record_field_append(new_schema,
val.field->name,
type_copy);
+ avro_schema_decref(type_copy);
}
}
break;
@@ -1309,6 +1332,10 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
avro_schema_to_enum(schema);
new_schema = avro_schema_enum_ns(enum_schema->name,
enum_schema->space);
+ if (save_named_schemas(new_schema, named_schemas)) {
+ avro_set_error("Cannot save enum schema");
+ return NULL;
+ }
for (i = 0; i < enum_schema->symbols->num_entries; i++)
{
union {
st_data_t data;
@@ -1329,6 +1356,10 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
avro_schema_fixed_ns(fixed_schema->name,
fixed_schema->space,
fixed_schema->size);
+ if (save_named_schemas(new_schema, named_schemas)) {
+ avro_set_error("Cannot save fixed schema");
+ return NULL;
+ }
}
break;
@@ -1337,11 +1368,12 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
struct avro_map_schema_t *map_schema =
avro_schema_to_map(schema);
avro_schema_t values_copy =
- avro_schema_copy(map_schema->values);
+ avro_schema_copy_root(map_schema->values,
named_schemas);
if (!values_copy) {
return NULL;
}
new_schema = avro_schema_map(values_copy);
+ avro_schema_decref(values_copy);
}
break;
@@ -1350,11 +1382,12 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
struct avro_array_schema_t *array_schema =
avro_schema_to_array(schema);
avro_schema_t items_copy =
- avro_schema_copy(array_schema->items);
+ avro_schema_copy_root(array_schema->items,
named_schemas);
if (!items_copy) {
return NULL;
}
new_schema = avro_schema_array(items_copy);
+ avro_schema_decref(items_copy);
}
break;
@@ -1372,12 +1405,13 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
avro_schema_t schema;
} val;
st_lookup(union_schema->branches, i, &val.data);
- schema_copy = avro_schema_copy(val.schema);
+ schema_copy = avro_schema_copy_root(val.schema,
named_schemas);
if (avro_schema_union_append
(new_schema, schema_copy)) {
avro_schema_decref(new_schema);
return NULL;
}
+ avro_schema_decref(schema_copy);
}
}
break;
@@ -1386,12 +1420,12 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
{
struct avro_link_schema_t *link_schema =
avro_schema_to_link(schema);
- /*
- * TODO: use an avro_schema_copy of to instead of
pointing to
- * the same reference
- */
- avro_schema_incref(link_schema->to);
- new_schema = avro_schema_link(link_schema->to);
+ avro_schema_t to;
+
+ to =
find_named_schemas(avro_schema_name(link_schema->to),
+
avro_schema_namespace(link_schema->to),
+
named_schemas);
+ new_schema = avro_schema_link(to);
}
break;
@@ -1401,6 +1435,23 @@ avro_schema_t avro_schema_copy(avro_schema_t schema)
return new_schema;
}
+avro_schema_t avro_schema_copy(avro_schema_t schema)
+{
+ avro_schema_t new_schema;
+ st_table *named_schemas;
+
+ named_schemas = st_init_strtable_with_size(DEFAULT_TABLE_SIZE);
+ if (!named_schemas) {
+ avro_set_error("Cannot allocate named schema map");
+ return NULL;
+ }
+
+ new_schema = avro_schema_copy_root(schema, named_schemas);
+ st_foreach(named_schemas, HASH_FUNCTION_CAST named_schema_free_foreach,
0);
+ st_free_table(named_schemas);
+ return new_schema;
+}
+
avro_schema_t avro_schema_get_subschema(const avro_schema_t schema,
const char *name)
{
diff --git a/lang/c/tests/CMakeLists.txt b/lang/c/tests/CMakeLists.txt
index 445e689a7..0870ef5ec 100644
--- a/lang/c/tests/CMakeLists.txt
+++ b/lang/c/tests/CMakeLists.txt
@@ -48,12 +48,14 @@ add_avro_test(test_data_structures)
add_avro_test(test_avro_schema)
add_avro_test(test_avro_schema_names)
add_avro_test(test_avro_values)
+add_avro_test(test_avro_766)
add_avro_test(test_avro_968)
add_avro_test(test_avro_984)
add_avro_test(test_avro_1034)
add_avro_test(test_avro_1084)
add_avro_test(test_avro_1087)
add_avro_test(test_avro_1165)
+add_avro_test(test_avro_1167)
add_avro_test(test_avro_1237)
add_avro_test(test_avro_1238)
add_avro_test(test_avro_1279)
diff --git a/lang/c/tests/test_avro_1167.c b/lang/c/tests/test_avro_1167.c
new file mode 100644
index 000000000..869b37d17
--- /dev/null
+++ b/lang/c/tests/test_avro_1167.c
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <avro.h>
+
+/* To see the AVRO-1167 memory leak, run this test program through
+ * valgrind. The specific valgrind commandline to use from the
+ * avro-trunk/lang/c/tests directory is:
+ * valgrind -v --track-origins=yes --leak-check=full
+ * --show-reachable = yes ../build/tests/test_avro_1167
+ */
+
+int main(int argc, char **argv)
+{
+ const char *json =
+ "{"
+ " \"name\": \"repeated_subrecord_array\","
+ " \"type\": \"record\","
+ " \"fields\": ["
+ " { \"name\": \"subrecord_one\","
+ " \"type\": {"
+ " \"name\": \"SubrecordType\","
+ " \"type\": \"record\","
+ " \"fields\": ["
+ " { \"name\": \"x\", \"type\": \"int\" },"
+ " { \"name\": \"y\", \"type\": \"int\" }"
+ " ]"
+ " }"
+ " },"
+ " { \"name\": \"subrecord_two\", \"type\": \"SubrecordType\"
},"
+ " { \"name\": \"subrecord_array\", \"type\": {"
+ "
\"type\":\"array\","
+ " \"items\":
\"SubrecordType\""
+ " }"
+ " }"
+ " ]"
+ "}";
+
+ int rval;
+ avro_schema_t schema = NULL;
+ avro_schema_t schema_copy = NULL;
+ avro_schema_error_t error;
+
+ (void) argc;
+ (void) argv;
+
+ rval = avro_schema_from_json(json, strlen(json), &schema, &error);
+ if ( rval )
+ {
+ printf("Failed to read schema from JSON.\n");
+ exit(EXIT_FAILURE);
+ }
+ else
+ {
+ printf("Successfully read schema from JSON.\n");
+ }
+
+ schema_copy = avro_schema_copy( schema );
+ if ( ! avro_schema_equal(schema, schema_copy) )
+ {
+ printf("Failed avro_schema_equal(schema, schema_copy)\n");
+ exit(EXIT_FAILURE);
+ }
+
+ avro_schema_decref(schema);
+ avro_schema_decref(schema_copy);
+ return 0;
+}
diff --git a/lang/c/tests/test_avro_766.c b/lang/c/tests/test_avro_766.c
new file mode 100755
index 000000000..4e21368c4
--- /dev/null
+++ b/lang/c/tests/test_avro_766.c
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <avro.h>
+
+/* To see the AVRO-766 memory leak, run this test program through
+ * valgrind. The specific valgrind commandline to use from the
+ * avro-trunk/lang/c/tests directory is:
+ * valgrind -v --track-origins=yes --leak-check=full
+ * --show-reachable = yes ../build/tests/test_avro_766
+ */
+int main(int argc, char **argv)
+{
+ const char *json =
+ "{"
+ " \"type\": \"record\","
+ " \"name\": \"list\","
+ " \"fields\": ["
+ " { \"name\": \"x\", \"type\": \"int\" },"
+ " { \"name\": \"y\", \"type\": \"int\" },"
+ " { \"name\": \"next\", \"type\": [\"null\",\"list\"]},"
+ " { \"name\": \"arraylist\", \"type\": { \"type\":\"array\",
\"items\": \"list\" } }"
+ " ]"
+ "}";
+
+ int rval;
+ avro_schema_t schema = NULL;
+ avro_schema_error_t error;
+
+ (void) argc;
+ (void) argv;
+
+ rval = avro_schema_from_json(json, strlen(json), &schema, &error);
+ if ( rval )
+ {
+ printf("Failed to read schema from JSON.\n");
+ exit(EXIT_FAILURE);
+ }
+ else
+ {
+ printf("Successfully read schema from JSON.\n");
+ }
+
+#define TEST_AVRO_1167 (1)
+ #if TEST_AVRO_1167
+ {
+ avro_schema_t schema_copy = NULL;
+ schema_copy = avro_schema_copy( schema );
+ if ( ! avro_schema_equal(schema, schema_copy) )
+ {
+ printf("Failed avro_schema_equal(schema,
schema_copy)\n");
+ exit(EXIT_FAILURE);
+ }
+ avro_schema_decref(schema_copy);
+ }
+ #endif
+
+ avro_schema_decref(schema);
+ return 0;
+}
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Avro-C: avro_schema_copy() does not copy AVRO_LINKs properly.
> -------------------------------------------------------------
>
> Key: AVRO-1167
> URL: https://issues.apache.org/jira/browse/AVRO-1167
> Project: Apache Avro
> Issue Type: Bug
> Components: c
> Affects Versions: 1.7.1
> Environment: Ubuntu Linux 11.10
> Reporter: Vivek Nadkarni
> Priority: Major
> Attachments: AVRO-1167-TEST.patch
>
> Original Estimate: 168h
> Remaining Estimate: 168h
>
> When avro_schema_copy() encounters an AVRO_LINK from an old_schema to a
> new_schema, it sets the target of the new_link to the target of the old_link
> in the old_schema. Thus, the AVRO_LINK in the new_schema points to an element
> in the old_schema.
> While this is currently safe, since the reference count of the target in the
> old_schema is incremented, we are not really making a "copy" of the schema.
> There is a "TODO" in the code, which says that we should make a
> avro_schema_copy() of the target in old_schema instead of linking directly to
> it. However, this solution of making a copy would result in a few problems:
> 1. Avro schemas are intended to be self-contained. That implies that
> AVRO_LINKs are intended to be internal links inside a self-contained schema.
> The code introduces unnecessary (and potentially disallowed) external
> dependencies in an Avro schema.
> 2. The purpose of copying a schema that we want to decouple the old_schema
> from the new_schema. The two copies may have different owners, we may want to
> deallocate old schema etc.
> 3. If the schema is recursive, then the code would enter an infinite
> recursion loop.
> It appears to me that the "correct" solution would be to replicate the entire
> structure of the current schema, including the internal links. This means
> that if old_link_A points to old_target_B, then new_link_A should point to
> new_target_B in the new schema. Note that there should only be one copy of
> new_target_B in the new schema, even if there are multiple links pointing to
> new_target_B - i.e. we should not make a new copy for each link.
> In order to implement this proper copying of links, we would need to keep a
> lookup table of pairs of old and new schemas as they are being created, as
> well as a list of all the AVRO_LINKs that are copied. Then as a post-copy
> step, we would go and fix up all the AVRO_LINKs to point to the appropriate
> targets. This is the way the schema is constructed in the first place in
> avro_schema_from_json().
> An inefficient way to obtain the correct result from avro_schema_copy() would
> be to perform an avro_schema_to_json() followed by an avro_schema_from_json().
> Note: I have not implemented a fix for this issue, but I am documenting this
> issue in AVRO-JIRA because this issue needs to be resolved before AVRO-766
> can be fixed.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)