Hey Felix, See the test_append() function at http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_datafile.py?view=markup .
Regards, Jeff On Wed, Dec 22, 2010 at 4:59 PM, felix gao <[email protected]> wrote: > Hi all, > > I am having trouble adding more data into a file. > > Environment: Python 2.6.5, avro-1.3.3-py2.6 > > Program looks like this > > from avro import schema, datafile, io > > OUTFILE_NAME = 'sample.avro' > > SCHEMA_STR = """{ > "type": "record", > "name": "bkSampleAvro", > "namespace": "bk_avro_example", > "fields": [ > { "name": "name" , "type": "string" }, > { "name": "age" , "type": "int" }, > { "name": "address", "type": "string" }, > { "name": "value" , "type": "long" } > ] > }""" > > SCHEMA = schema.parse(SCHEMA_STR) > def write_avro_file(): > # Lets generate our data > data = {} > data['name'] = 'Foo' > data['age'] = 19 > data['address'] = '10, Bar Eggs Spam' > data['value'] = 800 > > rec_writer = io.DatumWriter(SCHEMA) > > df_writer = datafile.DataFileWriter( > open(OUTFILE_NAME, 'ab'), > rec_writer, > writers_schema = SCHEMA, > codec = 'deflate' > ) > > df_writer.append(data) > > df_writer.close() > > def read_avro_file(): > rec_reader = io.DatumReader() > > df_reader = datafile.DataFileReader( > open(OUTFILE_NAME, "rb"), > rec_reader > ) > > for record in df_reader: > print record['name'], record['age'] > print record['address'], record['value'] > > > if __name__ == '__main__': > # Write an AVRO file first > write_avro_file() > write_avro_file() > > # Now, read it > read_avro_file() > > > The result looks like > > Foo 19 > 10, Bar Eggs Spam 800 > Traceback (most recent call last): > File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", > line 124, in <module> > read_avro_file() > File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", > line 112, in read_avro_file > for record in df_reader: > File > "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/datafile.py", > line 318, in next > datum = self.datum_reader.read(self.datum_decoder) > File > "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", > line 411, in read > return self.read_data(self.writers_schema, self.readers_schema, > decoder) > File > "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", > line 456, in read_data > return self.read_record(writers_schema, readers_schema, decoder) > File > "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", > line 648, in read_record > field_val = self.read_data(field.type, readers_field.type, decoder) > File > "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", > line 434, in read_data > return decoder.read_utf8() > File > "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py", > line 210, in read_utf8 > return unicode(self.read_bytes(), "utf-8") > UnicodeDecodeError: 'utf8' codec can't decode bytes in position 14-15: > invalid data > > > > if I remove the second write_avro_file() call then everything is fine. How > to properly append more data into the file? > > Thanks, > > Felix >
