Hi all,
I am having trouble adding more data into a file.
Environment: Python 2.6.5, avro-1.3.3-py2.6
Program looks like this
from avro import schema, datafile, io
OUTFILE_NAME = 'sample.avro'
SCHEMA_STR = """{
"type": "record",
"name": "bkSampleAvro",
"namespace": "bk_avro_example",
"fields": [
{ "name": "name" , "type": "string" },
{ "name": "age" , "type": "int" },
{ "name": "address", "type": "string" },
{ "name": "value" , "type": "long" }
]
}"""
SCHEMA = schema.parse(SCHEMA_STR)
def write_avro_file():
# Lets generate our data
data = {}
data['name'] = 'Foo'
data['age'] = 19
data['address'] = '10, Bar Eggs Spam'
data['value'] = 800
rec_writer = io.DatumWriter(SCHEMA)
df_writer = datafile.DataFileWriter(
open(OUTFILE_NAME, 'ab'),
rec_writer,
writers_schema = SCHEMA,
codec = 'deflate'
)
df_writer.append(data)
df_writer.close()
def read_avro_file():
rec_reader = io.DatumReader()
df_reader = datafile.DataFileReader(
open(OUTFILE_NAME, "rb"),
rec_reader
)
for record in df_reader:
print record['name'], record['age']
print record['address'], record['value']
if __name__ == '__main__':
# Write an AVRO file first
write_avro_file()
write_avro_file()
# Now, read it
read_avro_file()
The result looks like
Foo 19
10, Bar Eggs Spam 800
Traceback (most recent call last):
File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", line
124, in <module>
read_avro_file()
File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", line
112, in read_avro_file
for record in df_reader:
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/datafile.py",
line 318, in next
datum = self.datum_reader.read(self.datum_decoder)
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 411, in read
return self.read_data(self.writers_schema, self.readers_schema, decoder)
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 456, in read_data
return self.read_record(writers_schema, readers_schema, decoder)
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 648, in read_record
field_val = self.read_data(field.type, readers_field.type, decoder)
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 434, in read_data
return decoder.read_utf8()
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 210, in read_utf8
return unicode(self.read_bytes(), "utf-8")
UnicodeDecodeError: 'utf8' codec can't decode bytes in position 14-15:
invalid data
if I remove the second write_avro_file() call then everything is fine. How
to properly append more data into the file?
Thanks,
Felix