[ 
https://issues.apache.org/jira/browse/AVRO-3760?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17737021#comment-17737021
 ] 

Anton Agestam commented on AVRO-3760:
-------------------------------------

[~clesaec] As Ryan pointed out, there's another exception raised when properly 
providing the writer schema.

Here's a complete updated example:

{code:python}
import io
from avro.io import DatumReader, DatumWriter, BinaryDecoder, BinaryEncoder
import avro.schema

current_schema = avro.schema.parse("""
{
    "fields": [
        {
            "default": "unknown",
            "name": "checksum_algorithm",
            "type": {
                "name": "ChecksumAlgorithm",
                "symbols": [
                    "unknown",
                    "xxhash3_64_be"
                ],
                "type": "enum",
                "default": "unknown"
            }
        }
    ],
    "name": "Metadata",
    "type": "record"
}
""")

# Future schema adds the "crc32_be" symbol.
future_schema = avro.schema.parse("""
{
    "fields": [
        {
            "default": "unknown",
            "name": "checksum_algorithm",
            "type": {
                "name": "ChecksumAlgorithm",
                "symbols": [
                    "unknown",
                    "xxhash3_64_be",
                    "crc32_be"
                ],
                "type": "enum",
                "default": "unknown"
            }
        }
    ],
    "name": "Metadata",
    "type": "record"
}
""")


with io.BytesIO() as buffer:
    writer = DatumWriter(future_schema)
    encoder = BinaryEncoder(buffer)
    writer.write({"checksum_algorithm": "crc32_be"}, encoder)
    buffer.seek(0)

    reader = DatumReader(future_schema, current_schema)
    decoder = BinaryDecoder(buffer)
    decoded = reader.read(decoder)

print(decoded)
{code}

Which outputs:

{code:text}
Traceback (most recent call last):
  File "avro-repro.py", line 60, in <module>
    decoded = reader.read(decoder)
  File 
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py", 
line 639, in read
    return self.read_data(self.writers_schema, self.readers_schema, decoder)
  File 
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py", 
line 717, in read_data
    return self.read_record(writers_schema, readers_schema, decoder)
  File 
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py", 
line 915, in read_record
    field_val = self.read_data(field.type, readers_field.type, decoder)
  File 
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py", 
line 710, in read_data
    return self.read_enum(writers_schema, readers_schema, decoder)
  File 
"/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py", 
line 779, in read_enum
    raise avro.errors.SchemaResolutionException(f"Symbol {read_symbol} not 
present in Reader's Schema", writers_schema, readers_schema)
avro.errors.SchemaResolutionException: Symbol crc32_be not present in Reader's 
Schema
Writer's Schema: {
  "type": "enum",
  "default": "unknown",
  "name": "ChecksumAlgorithm",
  "symbols": [
    "unknown",
    "xxhash3_64_be",
    "crc32_be"
  ]
}
Reader's Schema: {
  "type": "enum",
  "default": "unknown",
  "name": "ChecksumAlgorithm",
  "symbols": [
    "unknown",
    "xxhash3_64_be"
  ]
}
{code}

> Using enum with default symbol, cannot parse future value
> ---------------------------------------------------------
>
>                 Key: AVRO-3760
>                 URL: https://issues.apache.org/jira/browse/AVRO-3760
>             Project: Apache Avro
>          Issue Type: Bug
>          Components: python
>    Affects Versions: 1.11.1
>         Environment: {code}
> $ pip freeze | grep -i avro
> avro==1.11.1
> $ python --version
> Python 3.8.16
> {code}
>            Reporter: Anton Agestam
>            Assignee: Anton Agestam
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 1.11.2
>
>          Time Spent: 10m
>  Remaining Estimate: 0h
>
> It seems like support for default symbols is broken. In the example below, 
> since I'm using default symbols, I expected to be able to add new values to 
> the enum and see the default value when parsing using the old schema.
> {code:python}
> import io
> from avro.io import DatumReader, DatumWriter, BinaryDecoder, BinaryEncoder
> import avro.schema
> current_schema = avro.schema.parse("""
> {
>     "fields": [
>         {
>             "default": "unknown",
>             "name": "checksum_algorithm",
>             "type": {
>                 "name": "ChecksumAlgorithm",
>                 "symbols": [
>                     "unknown",
>                     "xxhash3_64_be"
>                 ],
>                 "type": "enum",
>                 "default": "unknown"
>             }
>         }
>     ],
>     "name": "Metadata",
>     "type": "record"
> }
> """)
> # Future schema adds the "crc32_be" symbol.
> future_schema = avro.schema.parse("""
> {
>     "fields": [
>         {
>             "default": "unknown",
>             "name": "checksum_algorithm",
>             "type": {
>                 "name": "ChecksumAlgorithm",
>                 "symbols": [
>                     "unknown",
>                     "xxhash3_64_be",
>                     "crc32_be"
>                 ],
>                 "type": "enum",
>                 "default": "unknown"
>             }
>         }
>     ],
>     "name": "Metadata",
>     "type": "record"
> }
> """)
> with io.BytesIO() as buffer:
>     writer = DatumWriter(future_schema)
>     encoder = BinaryEncoder(buffer)
>     writer.write({"checksum_algorithm": "crc32_be"}, encoder)
>     buffer.seek(0)
>     reader = DatumReader(current_schema)
>     decoder = BinaryDecoder(buffer)
>     decoded = reader.read(decoder)
> print(decoded)
> {code}
> Instead, this results in an exception:
> {code}
> Traceback (most recent call last):
>   File "reproduce-avro.py", line 58, in <module>
>     decoded = reader.read(decoder)
>   File 
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
>  line 649, in read
>     return self.read_data(self.writers_schema, self.readers_schema, decoder)
>   File 
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
>  line 727, in read_data
>     return self.read_record(writers_schema, readers_schema, decoder)
>   File 
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
>  line 922, in read_record
>     field_val = self.read_data(field.type, readers_field.type, decoder)
>   File 
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
>  line 720, in read_data
>     return self.read_enum(writers_schema, readers_schema, decoder)
>   File 
> "/Users/anton/.pyenv/versions/karapace/lib/python3.8/site-packages/avro/io.py",
>  line 779, in read_enum
>     raise avro.errors.SchemaResolutionException(
> avro.errors.SchemaResolutionException: Can't access enum index 2 for enum 
> with 2 symbols
> Writer's Schema: {
>   "type": "enum",
>   "default": "unknown",
>   "name": "ChecksumAlgorithm",
>   "symbols": [
>     "unknown",
>     "xxhash3_64_be"
>   ]
> }
> Reader's Schema: {
>   "type": "enum",
>   "default": "unknown",
>   "name": "ChecksumAlgorithm",
>   "symbols": [
>     "unknown",
>     "xxhash3_64_be"
>   ]
> }
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to