[
https://issues.apache.org/jira/browse/AVRO-3760?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17737021#comment-17737021
]
Anton Agestam commented on AVRO-3760:
-------------------------------------
[~clesaec] As Ryan pointed out, there's another exception raised when properly
providing the writer schema.
Here's a complete updated example:
{code:python}
import io
from avro.io import DatumReader, DatumWriter, BinaryDecoder, BinaryEncoder
import avro.schema
current_schema = avro.schema.parse("""
{
"fields": [
{
"default": "unknown",
"name": "checksum_algorithm",
"type": {
"name": "ChecksumAlgorithm",
"symbols": [
"unknown",
"xxhash3_64_be"
],
"type": "enum",
"default": "unknown"
}
}
],
"name": "Metadata",
"type": "record"
}
""")
# Future schema adds the "crc32_be" symbol.
future_schema = avro.schema.parse("""
{
"fields": [
{
"default": "unknown",
"name": "checksum_algorithm",
"type": {
"name": "ChecksumAlgorithm",
"symbols": [
"unknown",
"xxhash3_64_be",
"crc32_be"
],
"type": "enum",
"default": "unknown"
}
}
],
"name": "Metadata",
"type": "record"
}
""")
with io.BytesIO() as buffer:
writer = DatumWriter(future_schema)
encoder = BinaryEncoder(buffer)
writer.write({"checksum_algorithm": "crc32_be"}, encoder)
buffer.seek(0)
reader = DatumReader(future_schema, current_schema)
decoder = BinaryDecoder(buffer)
decoded = reader.read(decoder)
print(decoded)
{code}
Which outputs:
{code:text}
Traceback (most recent call last):
File "avro-repro.py", line 60, in <module>
decoded = reader.read(decoder)
File
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
line 639, in read
return self.read_data(self.writers_schema, self.readers_schema, decoder)
File
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
line 717, in read_data
return self.read_record(writers_schema, readers_schema, decoder)
File
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
line 915, in read_record
field_val = self.read_data(field.type, readers_field.type, decoder)
File
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
line 710, in read_data
return self.read_enum(writers_schema, readers_schema, decoder)
File
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
line 779, in read_enum
raise avro.errors.SchemaResolutionException(f"Symbol {read_symbol} not
present in Reader's Schema", writers_schema, readers_schema)
avro.errors.SchemaResolutionException: Symbol crc32_be not present in Reader's
Schema
Writer's Schema: {
"type": "enum",
"default": "unknown",
"name": "ChecksumAlgorithm",
"symbols": [
"unknown",
"xxhash3_64_be",
"crc32_be"
]
}
Reader's Schema: {
"type": "enum",
"default": "unknown",
"name": "ChecksumAlgorithm",
"symbols": [
"unknown",
"xxhash3_64_be"
]
}
{code}
> Using enum with default symbol, cannot parse future value
> ---------------------------------------------------------
>
> Key: AVRO-3760
> URL: https://issues.apache.org/jira/browse/AVRO-3760
> Project: Apache Avro
> Issue Type: Bug
> Components: python
> Affects Versions: 1.11.1
> Environment: {code}
> $ pip freeze | grep -i avro
> avro==1.11.1
> $ python --version
> Python 3.8.16
> {code}
> Reporter: Anton Agestam
> Assignee: Anton Agestam
> Priority: Major
> Labels: pull-request-available
> Fix For: 1.11.2
>
> Time Spent: 10m
> Remaining Estimate: 0h
>
> It seems like support for default symbols is broken. In the example below,
> since I'm using default symbols, I expected to be able to add new values to
> the enum and see the default value when parsing using the old schema.
> {code:python}
> import io
> from avro.io import DatumReader, DatumWriter, BinaryDecoder, BinaryEncoder
> import avro.schema
> current_schema = avro.schema.parse("""
> {
> "fields": [
> {
> "default": "unknown",
> "name": "checksum_algorithm",
> "type": {
> "name": "ChecksumAlgorithm",
> "symbols": [
> "unknown",
> "xxhash3_64_be"
> ],
> "type": "enum",
> "default": "unknown"
> }
> }
> ],
> "name": "Metadata",
> "type": "record"
> }
> """)
> # Future schema adds the "crc32_be" symbol.
> future_schema = avro.schema.parse("""
> {
> "fields": [
> {
> "default": "unknown",
> "name": "checksum_algorithm",
> "type": {
> "name": "ChecksumAlgorithm",
> "symbols": [
> "unknown",
> "xxhash3_64_be",
> "crc32_be"
> ],
> "type": "enum",
> "default": "unknown"
> }
> }
> ],
> "name": "Metadata",
> "type": "record"
> }
> """)
> with io.BytesIO() as buffer:
> writer = DatumWriter(future_schema)
> encoder = BinaryEncoder(buffer)
> writer.write({"checksum_algorithm": "crc32_be"}, encoder)
> buffer.seek(0)
> reader = DatumReader(current_schema)
> decoder = BinaryDecoder(buffer)
> decoded = reader.read(decoder)
> print(decoded)
> {code}
> Instead, this results in an exception:
> {code}
> Traceback (most recent call last):
> File "reproduce-avro.py", line 58, in <module>
> decoded = reader.read(decoder)
> File
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
> line 649, in read
> return self.read_data(self.writers_schema, self.readers_schema, decoder)
> File
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
> line 727, in read_data
> return self.read_record(writers_schema, readers_schema, decoder)
> File
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
> line 922, in read_record
> field_val = self.read_data(field.type, readers_field.type, decoder)
> File
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
> line 720, in read_data
> return self.read_enum(writers_schema, readers_schema, decoder)
> File
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
> line 779, in read_enum
> raise avro.errors.SchemaResolutionException(
> avro.errors.SchemaResolutionException: Can't access enum index 2 for enum
> with 2 symbols
> Writer's Schema: {
> "type": "enum",
> "default": "unknown",
> "name": "ChecksumAlgorithm",
> "symbols": [
> "unknown",
> "xxhash3_64_be"
> ]
> }
> Reader's Schema: {
> "type": "enum",
> "default": "unknown",
> "name": "ChecksumAlgorithm",
> "symbols": [
> "unknown",
> "xxhash3_64_be"
> ]
> }
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)