[ 
https://issues.apache.org/jira/browse/AVRO-1350?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Bin Guo updated AVRO-1350:
--------------------------

    Description: 
We can't get a correct result when decoding enums using resolving decoder.  e.g.

{code:title=schema}
{
  "type" : "record",
  "name" : "TestEnum",
  "fields" : [
        {
            "name" : "MyMode",
            "type" : {
              "type" : "enum",
              "name" : "Mode",
              "symbols" : [ "MEMORY", "DISK" ]
            }
        }
  ]
}
{code}

We encoded "DISK"(1), then decoded with resolving decoder, got "MEMORY"(0).
I examined the code and found that there is a *sort* after reading names of 
reader.
I could't quite understand the author's intention, but it really can not work 
well.
When decoding my enum, the return value is actually the *position of the sorted 
names*, and obviously it's not correct.

{code:title=Symbol.cc}
Symbol Symbol::enumAdjustSymbol(const NodePtr& writer, const NodePtr& reader)
{
    vector<string> rs;
    size_t rc = reader->names();
    for (size_t i = 0; i < rc; ++i) {
        rs.push_back(reader->nameAt(i));
    }
    sort(rs.begin(), rs.end()); // the strange sort
{code}


Here is my complete test case.

{code:title=generated structure}
enum Mode {
    MEMORY,
    DISK,
};

struct TestEnum {
    Mode MyMode;
};
{code}

{code:title=My test case}
#include "ts_enum.h"
#include "avro/Compiler.hh"
#include "avro/ValidSchema.hh"

using namespace std;
using namespace avro;
using namespace enum_test;

static const char ts_schema_string[] =
        "{ \"type\" : \"record\", \"name\" : \"TestEnum\", \"fields\" : "
        "[ { \"name\" : \"MyMode\", \"type\" : "
        "{ \"type\" : \"enum\", \"name\" : \"Mode\", "
        "\"symbols\" : [ \"MEMORY\", \"DISK\" ] } } ]}";

int main(int argc, char * argv[]) {
    TestEnum te1, te2;
    ValidSchema reader = compileJsonSchemaFromString(ts_schema_string);
    ValidSchema writer = compileJsonSchemaFromString(ts_schema_string);

    //encode TestEnum
    auto_ptr<OutputStream> out_stream = memoryOutputStream();
    EncoderPtr encoder = binaryEncoder();
    encoder->init(*out_stream);
    te1.MyMode = DISK;
    encode(*encoder, te1);
    encoder->flush();

    //decode TestEnum
    auto_ptr<InputStream> in_stream = memoryInputStream(*out_stream);
    DecoderPtr decoder = resolvingDecoder(writer, reader, 
avro::binaryDecoder());
    decoder->init(*in_stream);
    decode(*decoder, te2);

    cout<<"TE1: "<<te1.MyMode << " | TE2: "<<te2.MyMode<<endl;
    return 0;
}
{code}

The result
-------------------
TE1: 1 | TE2: 0

I debuged into avro code. 
In Symbol::enumAdjustSymbol, there is a vector<string> of reader's enum names, 
and after the sort, "MEMOEY, DISK" turned to be "DISK, MEMORY". 
At last, a vector<int> of writer's enum names saved every position of the 
sorted vector<string>. 
As a result, in the returned symbol, MEMORY's position is 1 and DISK's position 
is 0. 
Finally, when we decoding the enum, the *position* is returned to the target 
object.
I could't quite understand the author's intention here but when I commented the 
sort, everything worked well.


  was:
We can't get a correct result when decoding enums using resolving decoder. 
e.g.
{code:title=An enum example}
{"type":"enum","name":"Mode","symbols":["MEMORY","DISK"]}
{code}
We encoded "DISK"(1), then decoded with resolving decoder, got "MEMORY"(0).
I examined the code and found that there is a sort after reading names of 
reader.
I could't quite understand the author's intention, but it really can not work 
well.
When decoding my enum, the return value is actually the *position of the sorted 
names*, and obviously it's not correct.

{code:title=Symbol.cc}
Symbol Symbol::enumAdjustSymbol(const NodePtr& writer, const NodePtr& reader)
{
    vector<string> rs;
    size_t rc = reader->names();
    for (size_t i = 0; i < rc; ++i) {
        rs.push_back(reader->nameAt(i));
    }
    sort(rs.begin(), rs.end()); // the strange sort
{code}


    
> Error in decoding enums using ResolvingDecoder
> ----------------------------------------------
>
>                 Key: AVRO-1350
>                 URL: https://issues.apache.org/jira/browse/AVRO-1350
>             Project: Avro
>          Issue Type: Bug
>          Components: c++
>    Affects Versions: 1.7.4
>            Reporter: Bin Guo
>
> We can't get a correct result when decoding enums using resolving decoder.  
> e.g.
> {code:title=schema}
> {
>   "type" : "record",
>   "name" : "TestEnum",
>   "fields" : [
>       {
>           "name" : "MyMode",
>           "type" : {
>             "type" : "enum",
>             "name" : "Mode",
>             "symbols" : [ "MEMORY", "DISK" ]
>           }
>       }
>   ]
> }
> {code}
> We encoded "DISK"(1), then decoded with resolving decoder, got "MEMORY"(0).
> I examined the code and found that there is a *sort* after reading names of 
> reader.
> I could't quite understand the author's intention, but it really can not work 
> well.
> When decoding my enum, the return value is actually the *position of the 
> sorted names*, and obviously it's not correct.
> {code:title=Symbol.cc}
> Symbol Symbol::enumAdjustSymbol(const NodePtr& writer, const NodePtr& reader)
> {
>     vector<string> rs;
>     size_t rc = reader->names();
>     for (size_t i = 0; i < rc; ++i) {
>         rs.push_back(reader->nameAt(i));
>     }
>     sort(rs.begin(), rs.end()); // the strange sort
> {code}
> Here is my complete test case.
> {code:title=generated structure}
> enum Mode {
>     MEMORY,
>     DISK,
> };
> struct TestEnum {
>     Mode MyMode;
> };
> {code}
> {code:title=My test case}
> #include "ts_enum.h"
> #include "avro/Compiler.hh"
> #include "avro/ValidSchema.hh"
> using namespace std;
> using namespace avro;
> using namespace enum_test;
> static const char ts_schema_string[] =
>         "{ \"type\" : \"record\", \"name\" : \"TestEnum\", \"fields\" : "
>         "[ { \"name\" : \"MyMode\", \"type\" : "
>         "{ \"type\" : \"enum\", \"name\" : \"Mode\", "
>         "\"symbols\" : [ \"MEMORY\", \"DISK\" ] } } ]}";
> int main(int argc, char * argv[]) {
>     TestEnum te1, te2;
>     ValidSchema reader = compileJsonSchemaFromString(ts_schema_string);
>     ValidSchema writer = compileJsonSchemaFromString(ts_schema_string);
>     //encode TestEnum
>     auto_ptr<OutputStream> out_stream = memoryOutputStream();
>     EncoderPtr encoder = binaryEncoder();
>     encoder->init(*out_stream);
>     te1.MyMode = DISK;
>     encode(*encoder, te1);
>     encoder->flush();
>     //decode TestEnum
>     auto_ptr<InputStream> in_stream = memoryInputStream(*out_stream);
>     DecoderPtr decoder = resolvingDecoder(writer, reader, 
> avro::binaryDecoder());
>     decoder->init(*in_stream);
>     decode(*decoder, te2);
>     cout<<"TE1: "<<te1.MyMode << " | TE2: "<<te2.MyMode<<endl;
>     return 0;
> }
> {code}
> The result
> -------------------
> TE1: 1 | TE2: 0
> I debuged into avro code. 
> In Symbol::enumAdjustSymbol, there is a vector<string> of reader's enum 
> names, and after the sort, "MEMOEY, DISK" turned to be "DISK, MEMORY". 
> At last, a vector<int> of writer's enum names saved every position of the 
> sorted vector<string>. 
> As a result, in the returned symbol, MEMORY's position is 1 and DISK's 
> position is 0. 
> Finally, when we decoding the enum, the *position* is returned to the target 
> object.
> I could't quite understand the author's intention here but when I commented 
> the sort, everything worked well.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

Reply via email to