Author: massie
Date: Tue Feb 23 18:25:43 2010
New Revision: 915468

URL: http://svn.apache.org/viewvc?rev=915468&view=rev
Log:
AVRO-428. Improve file read performance by buffering data

Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/lang/c/src/io.c

Modified: hadoop/avro/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=915468&r1=915467&r2=915468&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Feb 23 18:25:43 2010
@@ -336,6 +336,8 @@
 
     AVRO-417. Produce Java documentation archive.  (Scott Carey via cutting)
 
+    AVRO-428. Improve file read performance by buffering data (massie)
+
   OPTIMIZATIONS
 
     AVRO-172. More efficient schema processing (massie)

Modified: hadoop/avro/trunk/lang/c/src/io.c
URL: 
http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/io.c?rev=915468&r1=915467&r2=915468&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/io.c (original)
+++ hadoop/avro/trunk/lang/c/src/io.c Tue Feb 23 18:25:43 2010
@@ -42,6 +42,9 @@
 struct avro_file_reader_t {
        struct avro_reader_t reader;
        FILE *fp;
+       char *cur;
+       char *end;
+       char buffer[4096];
 };
 
 struct avro_file_writer_t {
@@ -91,6 +94,7 @@
        if (!file_reader) {
                return NULL;
        }
+       memset(file_reader, 0, sizeof(struct avro_file_reader_t));
        file_reader->fp = fp;
        reader_init(&file_reader->reader, AVRO_FILE_IO);
        return &file_reader->reader;
@@ -149,15 +153,58 @@
        return 0;
 }
 
+#define bytes_available(reader) (reader->end - reader->cur)
+#define buffer_reset(reader) {reader->cur = reader->end = reader->buffer;}
+
 static int
 avro_read_file(struct avro_file_reader_t *reader, void *buf, int64_t len)
 {
-       int rval = fread(buf, len, 1, reader->fp);
+       int64_t needed = len;
+       void *p = buf;
+       int rval;
 
-       if (rval == 0) {
-               return ferror(reader->fp) || feof(reader->fp) ? -1 : 0;
+       if (len == 0) {
+               return 0;
        }
-       return 0;
+
+       if (needed > sizeof(reader->buffer)) {
+               if (bytes_available(reader) > 0) {
+                       memcpy(p, reader->cur, bytes_available(reader));
+                       p += bytes_available(reader);
+                       needed -= bytes_available(reader);
+                       buffer_reset(reader);
+               }
+               rval = fread(p, 1, needed, reader->fp);
+               if (rval != needed) {
+                       return -1;
+               }
+               return 0;
+       } else if (needed <= bytes_available(reader)) {
+               memcpy(p, reader->cur, needed);
+               reader->cur += needed;
+               return 0;
+       } else {
+               memcpy(p, reader->cur, bytes_available(reader));
+               p += bytes_available(reader);
+               needed -= bytes_available(reader);
+
+               rval =
+                   fread(reader->buffer, 1, sizeof(reader->buffer),
+                         reader->fp);
+               if (rval == 0) {
+                       return -1;
+               }
+               reader->cur = reader->buffer;
+               reader->end = reader->cur + rval;
+
+               if (bytes_available(reader) < needed) {
+                       return -1;
+               }
+               memcpy(p, reader->cur, needed);
+               reader->cur += needed;
+               return 0;
+       }
+       return -1;
 }
 
 int avro_read(avro_reader_t reader, void *buf, int64_t len)
@@ -188,8 +235,17 @@
 static int avro_skip_file(struct avro_file_reader_t *reader, int64_t len)
 {
        int rval;
-       if (len > 0) {
-               rval = fseek(reader->fp, len, SEEK_CUR);
+       int64_t needed = len;
+
+       if (len == 0) {
+               return 0;
+       }
+       if (needed <= bytes_available(reader)) {
+               reader->cur += needed;
+       } else {
+               needed -= bytes_available(reader);
+               buffer_reset(reader);
+               rval = fseek(reader->fp, needed, SEEK_CUR);
                if (rval < 0) {
                        return rval;
                }


Reply via email to