Author: massie
Date: Thu Mar  4 02:15:53 2010
New Revision: 918818

URL: http://svn.apache.org/viewvc?rev=918818&view=rev
Log:
AVRO-445. avro_size_data() to pre-calculate the size of an avro_datum_t in 
serialized form. Contributed by Bruce Mitchener.

Added:
    hadoop/avro/trunk/lang/c/src/datum_size.c
Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/lang/c/src/Makefile.am
    hadoop/avro/trunk/lang/c/src/avro.h
    hadoop/avro/trunk/lang/c/src/encoding.h
    hadoop/avro/trunk/lang/c/src/encoding_binary.c
    hadoop/avro/trunk/lang/c/tests/test_avro_data.c
    hadoop/avro/trunk/lang/c/version.sh

Modified: hadoop/avro/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Thu Mar  4 02:15:53 2010
@@ -16,6 +16,9 @@
 
     AVRO-438. Clarify spec.  (Amichai Rothman via cutting)
 
+    AVRO-445. avro_size_data() to pre-calculate the size of an 
+    avro_datum_t in serialized form (Bruce Mitchener via massie)
+
   BUG FIXES
 
     AVRO-424. Fix the specification of the deflate codec.

Modified: hadoop/avro/trunk/lang/c/src/Makefile.am
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/Makefile.am?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/Makefile.am (original)
+++ hadoop/avro/trunk/lang/c/src/Makefile.am Thu Mar  4 02:15:53 2010
@@ -7,7 +7,7 @@
 
 lib_LTLIBRARIES = libavro.la
 libavro_la_SOURCES = st.c st.h schema.c schema.h schema_equal.c \
-datum.c datum_equal.c datum_validate.c datum_read.c datum_skip.c datum_write.c 
datum.h \
+datum.c datum_equal.c datum_validate.c datum_read.c datum_skip.c datum_write.c 
datum_size.c datum.h \
 io.c dump.c dump.h encoding_binary.c \
 avro_private.h encoding.h datafile.c
 libavro_la_LIBADD = $(top_builddir)/jansson/src/.libs/libjansson.a

Modified: hadoop/avro/trunk/lang/c/src/avro.h
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/avro.h?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/avro.h (original)
+++ hadoop/avro/trunk/lang/c/src/avro.h Thu Mar  4 02:15:53 2010
@@ -257,6 +257,8 @@
 int avro_skip_data(avro_reader_t reader, avro_schema_t writer_schema);
 int avro_write_data(avro_writer_t writer,
                    avro_schema_t writer_schema, avro_datum_t datum);
+int64_t avro_size_data(avro_writer_t writer,
+                      avro_schema_t writer_schema, avro_datum_t datum);
 
 /* File object container */
 typedef struct avro_file_reader_t *avro_file_reader_t;

Added: hadoop/avro/trunk/lang/c/src/datum_size.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/datum_size.c?rev=918818&view=auto
==============================================================================
--- hadoop/avro/trunk/lang/c/src/datum_size.c (added)
+++ hadoop/avro/trunk/lang/c/src/datum_size.c Thu Mar  4 02:15:53 2010
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0 
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.  See the License for the specific language governing
+ * permissions and limitations under the License. 
+ */
+#include <errno.h>
+#include <assert.h>
+#include <string.h>
+#include "schema.h"
+#include "datum.h"
+#include "encoding.h"
+
+#define size_check(rval, call) { rval = call; if(rval) return rval; }
+#define size_accum(rval, size, call) { rval = call; if (rval < 0) return rval; 
else size += rval; }
+
+static int64_t size_datum(avro_writer_t writer, const avro_encoding_t * enc,
+                         avro_schema_t writers_schema, avro_datum_t datum);
+
+static int64_t
+size_record(avro_writer_t writer, const avro_encoding_t * enc,
+           struct avro_record_schema_t *schema, avro_datum_t datum)
+{
+       int rval;
+       long i;
+       int64_t size;
+       avro_datum_t field_datum;
+
+       size = 0;
+       if (schema) {
+               for (i = 0; i < schema->fields->num_entries; i++) {
+                       union {
+                               st_data_t data;
+                               struct avro_record_field_t *field;
+                       } val;
+                       st_lookup(schema->fields, i, &val.data);
+                       size_check(rval,
+                                  avro_record_get(datum, val.field->name,
+                                                  &field_datum));
+                       size_accum(rval, size,
+                                  size_datum(writer, enc, val.field->type,
+                                             field_datum));
+               }
+       } else {
+               /* No schema.  Just write the record datum */
+               struct avro_record_datum_t *record =
+                   avro_datum_to_record(datum);
+               for (i = 0; i < record->field_order->num_entries; i++) {
+                       union {
+                               st_data_t data;
+                               char *name;
+                       } val;
+                       st_lookup(record->field_order, i, &val.data);
+                       size_check(rval,
+                                  avro_record_get(datum, val.name,
+                                                  &field_datum));
+                       size_accum(rval, size,
+                                  size_datum(writer, enc, NULL, field_datum));
+               }
+       }
+       return size;
+}
+
+static int64_t
+size_enum(avro_writer_t writer, const avro_encoding_t * enc,
+         struct avro_enum_schema_t *enump, struct avro_enum_datum_t *datum)
+{
+       return enc->size_long(writer, datum->value);
+}
+
+struct size_map_args {
+       int rval;
+       int64_t size;
+       avro_writer_t writer;
+       const avro_encoding_t *enc;
+       avro_schema_t values_schema;
+};
+
+static int
+size_map_foreach(char *key, avro_datum_t datum, struct size_map_args *args)
+{
+       int rval = args->enc->size_string(args->writer, key);
+       if (rval < 0) {
+               args->rval = rval;
+               return ST_STOP;
+       } else {
+               args->size += rval;
+       }
+       rval = size_datum(args->writer, args->enc, args->values_schema, datum);
+       if (rval < 0) {
+               args->rval = rval;
+               return ST_STOP;
+       } else {
+               args->size += rval;
+       }
+       return ST_CONTINUE;
+}
+
+static int64_t
+size_map(avro_writer_t writer, const avro_encoding_t * enc,
+        struct avro_map_schema_t *writers_schema,
+        struct avro_map_datum_t *datum)
+{
+       int rval;
+       int64_t size;
+       struct size_map_args args = { 0, 0, writer, enc,
+               writers_schema ? writers_schema->values : NULL
+       };
+
+       size = 0;
+       if (datum->map->num_entries) {
+               size_accum(rval, size,
+                          enc->size_long(writer, datum->map->num_entries));
+               st_foreach(datum->map, size_map_foreach, (st_data_t) & args);
+               size += args.size;
+       }
+       if (!args.rval) {
+               size_accum(rval, size, enc->size_long(writer, 0));
+       }
+       return size;
+}
+
+static int64_t
+size_array(avro_writer_t writer, const avro_encoding_t * enc,
+          struct avro_array_schema_t *schema, struct avro_array_datum_t *array)
+{
+       int rval;
+       long i;
+       int64_t size;
+
+       size = 0;
+       if (array->els->num_entries) {
+               size_accum(rval, size,
+                          enc->size_long(writer, array->els->num_entries));
+               for (i = 0; i < array->els->num_entries; i++) {
+                       union {
+                               st_data_t data;
+                               avro_datum_t datum;
+                       } val;
+                       st_lookup(array->els, i, &val.data);
+                       size_accum(rval, size,
+                                  size_datum(writer, enc,
+                                             schema ? schema->items : NULL,
+                                             val.datum));
+               }
+       }
+       size_accum(rval, size, enc->size_long(writer, 0));
+       return size;
+}
+
+static int64_t
+size_union(avro_writer_t writer, const avro_encoding_t * enc,
+          struct avro_union_schema_t *schema,
+          struct avro_union_datum_t *unionp)
+{
+       int rval;
+       int64_t size;
+       avro_schema_t write_schema = NULL;
+
+       size = 0;
+       size_accum(rval, size, enc->size_long(writer, unionp->discriminant));
+       if (schema) {
+               union {
+                       st_data_t data;
+                       avro_schema_t schema;
+               } val;
+               if (!st_lookup
+                   (schema->branches, unionp->discriminant, &val.data)) {
+                       return -EINVAL;
+               }
+               write_schema = val.schema;
+       }
+       size_accum(rval, size,
+                  size_datum(writer, enc, write_schema, unionp->value));
+       return size;
+}
+
+static int64_t size_datum(avro_writer_t writer, const avro_encoding_t * enc,
+                         avro_schema_t writers_schema, avro_datum_t datum)
+{
+       if (is_avro_schema(writers_schema) && is_avro_link(writers_schema)) {
+               return size_datum(writer, enc,
+                                 (avro_schema_to_link(writers_schema))->to,
+                                 datum);
+       }
+
+       switch (avro_typeof(datum)) {
+       case AVRO_NULL:
+               return enc->size_null(writer);
+
+       case AVRO_BOOLEAN:
+               return enc->size_boolean(writer,
+                                        avro_datum_to_boolean(datum)->i);
+
+       case AVRO_STRING:
+               return enc->size_string(writer, avro_datum_to_string(datum)->s);
+
+       case AVRO_BYTES:
+               return enc->size_bytes(writer,
+                                      avro_datum_to_bytes(datum)->bytes,
+                                      avro_datum_to_bytes(datum)->size);
+
+       case AVRO_INT32:
+       case AVRO_INT64:{
+                       int64_t val = avro_typeof(datum) == AVRO_INT32 ?
+                           avro_datum_to_int32(datum)->i32 :
+                           avro_datum_to_int64(datum)->i64;
+                       if (is_avro_schema(writers_schema)) {
+                               /* handle promotion */
+                               if (is_avro_float(writers_schema)) {
+                                       return enc->size_float(writer,
+                                                              (float)val);
+                               } else if (is_avro_double(writers_schema)) {
+                                       return enc->size_double(writer,
+                                                               (double)val);
+                               }
+                       }
+                       return enc->size_long(writer, val);
+               }
+
+       case AVRO_FLOAT:{
+                       float val = avro_datum_to_float(datum)->f;
+                       if (is_avro_schema(writers_schema)
+                           && is_avro_double(writers_schema)) {
+                               /* handle promotion */
+                               return enc->size_double(writer, (double)val);
+                       }
+                       return enc->size_float(writer, val);
+               }
+
+       case AVRO_DOUBLE:
+               return enc->size_double(writer, avro_datum_to_double(datum)->d);
+
+       case AVRO_RECORD:
+               return size_record(writer, enc,
+                                  avro_schema_to_record(writers_schema),
+                                  datum);
+
+       case AVRO_ENUM:
+               return size_enum(writer, enc,
+                                avro_schema_to_enum(writers_schema),
+                                avro_datum_to_enum(datum));
+
+       case AVRO_FIXED:
+               return avro_datum_to_fixed(datum)->size;
+
+       case AVRO_MAP:
+               return size_map(writer, enc,
+                               avro_schema_to_map(writers_schema),
+                               avro_datum_to_map(datum));
+
+       case AVRO_ARRAY:
+               return size_array(writer, enc,
+                                 avro_schema_to_array(writers_schema),
+                                 avro_datum_to_array(datum));
+
+       case AVRO_UNION:
+               return size_union(writer, enc,
+                                 avro_schema_to_union(writers_schema),
+                                 avro_datum_to_union(datum));
+
+       case AVRO_LINK:
+               break;
+       }
+
+       return 0;
+}
+
+int64_t avro_size_data(avro_writer_t writer, avro_schema_t writers_schema,
+                      avro_datum_t datum)
+{
+       if (!writer || !is_avro_datum(datum)) {
+               return -EINVAL;
+       }
+       /* Only validate datum if a writer's schema is provided */
+       if (is_avro_schema(writers_schema)
+           && !avro_schema_datum_validate(writers_schema, datum)) {
+               return -EINVAL;
+       }
+       return size_datum(writer, &avro_binary_encoding, writers_schema, datum);
+}

Modified: hadoop/avro/trunk/lang/c/src/encoding.h
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/encoding.h?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/encoding.h (original)
+++ hadoop/avro/trunk/lang/c/src/encoding.h Thu Mar  4 02:15:53 2010
@@ -29,6 +29,7 @@
        int (*read_string) (avro_reader_t reader, char **s);
        int (*skip_string) (avro_reader_t reader);
        int (*write_string) (avro_writer_t writer, const char *s);
+        int64_t(*size_string) (avro_writer_t writer, const char *s);
        /*
         * bytes 
         */
@@ -36,42 +37,50 @@
        int (*skip_bytes) (avro_reader_t reader);
        int (*write_bytes) (avro_writer_t writer,
                            const char *bytes, const int64_t len);
+        int64_t(*size_bytes) (avro_writer_t writer,
+                              const char *bytes, const int64_t len);
        /*
         * int 
         */
        int (*read_int) (avro_reader_t reader, int32_t * i);
        int (*skip_int) (avro_reader_t reader);
        int (*write_int) (avro_writer_t writer, const int32_t i);
+        int64_t(*size_int) (avro_writer_t writer, const int32_t i);
        /*
         * long 
         */
        int (*read_long) (avro_reader_t reader, int64_t * l);
        int (*skip_long) (avro_reader_t reader);
        int (*write_long) (avro_writer_t writer, const int64_t l);
+        int64_t(*size_long) (avro_writer_t writer, const int64_t l);
        /*
         * float 
         */
        int (*read_float) (avro_reader_t reader, float *f);
        int (*skip_float) (avro_reader_t reader);
        int (*write_float) (avro_writer_t writer, const float f);
+        int64_t(*size_float) (avro_writer_t writer, const float f);
        /*
         * double 
         */
        int (*read_double) (avro_reader_t reader, double *d);
        int (*skip_double) (avro_reader_t reader);
        int (*write_double) (avro_writer_t writer, const double d);
+        int64_t(*size_double) (avro_writer_t writer, const double d);
        /*
         * boolean 
         */
        int (*read_boolean) (avro_reader_t reader, int8_t * b);
        int (*skip_boolean) (avro_reader_t reader);
        int (*write_boolean) (avro_writer_t writer, const int8_t b);
+        int64_t(*size_boolean) (avro_writer_t writer, const int8_t b);
        /*
         * null 
         */
        int (*read_null) (avro_reader_t reader);
        int (*skip_null) (avro_reader_t reader);
        int (*write_null) (avro_writer_t writer);
+        int64_t(*size_null) (avro_writer_t writer);
 };
 typedef struct avro_encoding_t avro_encoding_t;
 

Modified: hadoop/avro/trunk/lang/c/src/encoding_binary.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/encoding_binary.c?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/encoding_binary.c (original)
+++ hadoop/avro/trunk/lang/c/src/encoding_binary.c Thu Mar  4 02:15:53 2010
@@ -72,6 +72,18 @@
        return 0;
 }
 
+static int64_t size_long(avro_writer_t writer, int64_t l)
+{
+       int64_t len = 0;
+       uint64_t n = (l << 1) ^ (l >> 63);
+       while (n & ~0x7F) {
+               len++;
+               n >>= 7;
+       }
+       len++;
+       return len;
+}
+
 static int read_int(avro_reader_t reader, int32_t * i)
 {
        int64_t l;
@@ -97,6 +109,12 @@
        return write_long(writer, l);
 }
 
+static int64_t size_int(avro_writer_t writer, const int32_t i)
+{
+       int64_t l = i;
+       return size_long(writer, l);
+}
+
 static int read_bytes(avro_reader_t reader, char **bytes, int64_t * len)
 {
        int rval = read_long(reader, len);
@@ -138,6 +156,12 @@
        return 0;
 }
 
+static int64_t
+size_bytes(avro_writer_t writer, const char *bytes, const int64_t len)
+{
+       return size_long(writer, len) + len;
+}
+
 static int read_string(avro_reader_t reader, char **s)
 {
        int64_t len;
@@ -155,6 +179,12 @@
        return write_bytes(writer, s, len);
 }
 
+static int64_t size_string(avro_writer_t writer, const char *s)
+{
+       int64_t len = strlen(s);
+       return size_bytes(writer, s, len);
+}
+
 static int read_float(avro_reader_t reader, float *f)
 {
 #if WORDS_BIGENDIAN
@@ -205,6 +235,11 @@
        return 0;
 }
 
+static int64_t size_float(avro_writer_t writer, const float f)
+{
+       return 4;
+}
+
 static int read_double(avro_reader_t reader, double *d)
 {
 #if WORDS_BIGENDIAN
@@ -264,6 +299,11 @@
        return 0;
 }
 
+static int64_t size_double(avro_writer_t writer, const double d)
+{
+       return 8;
+}
+
 static int read_boolean(avro_reader_t reader, int8_t * b)
 {
        AVRO_READ(reader, b, 1);
@@ -282,6 +322,11 @@
        return 0;
 }
 
+static int64_t size_boolean(avro_writer_t writer, const int8_t b)
+{
+       return 1;
+}
+
 static int read_skip_null(avro_reader_t reader)
 {
        /*
@@ -298,6 +343,11 @@
        return 0;
 }
 
+static int64_t size_null(avro_writer_t writer)
+{
+       return 0;
+}
+
 const avro_encoding_t avro_binary_encoding = {
        .description = "BINARY FORMAT",
        /*
@@ -306,46 +356,54 @@
        .read_string = read_string,
        .skip_string = skip_string,
        .write_string = write_string,
+       .size_string = size_string,
        /*
         * bytes 
         */
        .read_bytes = read_bytes,
        .skip_bytes = skip_bytes,
        .write_bytes = write_bytes,
+       .size_bytes = size_bytes,
        /*
         * int 
         */
        .read_int = read_int,
        .skip_int = skip_int,
        .write_int = write_int,
+       .size_int = size_int,
        /*
         * long 
         */
        .read_long = read_long,
        .skip_long = skip_long,
        .write_long = write_long,
+       .size_long = size_long,
        /*
         * float 
         */
        .read_float = read_float,
        .skip_float = skip_float,
        .write_float = write_float,
+       .size_float = size_float,
        /*
         * double 
         */
        .read_double = read_double,
        .skip_double = skip_double,
        .write_double = write_double,
+       .size_double = size_double,
        /*
         * boolean 
         */
        .read_boolean = read_boolean,
        .skip_boolean = skip_boolean,
        .write_boolean = write_boolean,
+       .size_boolean = size_boolean,
        /*
         * null 
         */
        .read_null = read_skip_null,
        .skip_null = read_skip_null,
-       .write_null = write_null
+       .write_null = write_null,
+       .size_null = size_null
 };

Modified: hadoop/avro/trunk/lang/c/tests/test_avro_data.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/tests/test_avro_data.c?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/tests/test_avro_data.c (original)
+++ hadoop/avro/trunk/lang/c/tests/test_avro_data.c Thu Mar  4 02:15:53 2010
@@ -67,6 +67,15 @@
                                type, validate);
                        exit(EXIT_FAILURE);
                }
+               int64_t size =
+                   avro_size_data(writer, validate ? writers_schema : NULL,
+                                  datum);
+               if (size != avro_writer_tell(writer)) {
+                       fprintf(stderr,
+                               "Unable to calculate size %s validate=%d (%lld 
!= %lld)\n",
+                               type, validate, size, avro_writer_tell(writer));
+                       exit(EXIT_FAILURE);
+               }
                if (avro_read_data
                    (reader, writers_schema, readers_schema, &datum_out)) {
                        fprintf(stderr, "Unable to read %s validate=%d\n", type,

Modified: hadoop/avro/trunk/lang/c/version.sh
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/version.sh?rev=918818&r1=918817&r2=918818&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/version.sh (original)
+++ hadoop/avro/trunk/lang/c/version.sh Thu Mar  4 02:15:53 2010
@@ -18,9 +18,9 @@
 #         libavro_binary_age = 0
 #         libavro_interface_age = 0
 #
-libavro_micro_version=18
+libavro_micro_version=19
 libavro_interface_age=0
-libavro_binary_age=0
+libavro_binary_age=1
 
 # IGNORE EVERYTHING ELSE FROM HERE DOWN.........
 if test $# != 1; then


Reply via email to