Hmm, perhaps the types are unequal, then. Can you print them out
(including field metadata)?
Le 10/02/2021 à 18:03, Ying Zhou a écrit :
> Thanks! Now we have an even weirder phenomenon. Even the null bitmaps and
> offsets are equal. However the arrays aren’t! Does anyone know why?
>
> TEST(TestAdapterWriteNested, writeList) {
> std::shared_ptr<Schema> table_schema = schema({field("list",
> list(int32()))});
> int64_t num_rows = 10000;
> arrow::random::RandomArrayGenerator rand(kRandomSeed);
> auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
> std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 0.8);
> std::shared_ptr<ChunkedArray> chunked_array =
> std::make_shared<ChunkedArray>(array);
> std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});
>
> std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
> io::BufferOutputStream::Create(kDefaultSmallMemStreamSize *
> 15).ValueOrDie();
> std::unique_ptr<adapters::orc::ORCFileWriter> writer =
> adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
> ARROW_EXPECT_OK(writer->Write(*table));
> ARROW_EXPECT_OK(writer->Close());
> std::shared_ptr<Buffer> buffer =
> buffer_output_stream->Finish().ValueOrDie();
> std::shared_ptr<io::RandomAccessFile> in_stream(new
> io::BufferReader(buffer));
> std::unique_ptr<adapters::orc::ORCFileReader> reader;
> ARROW_EXPECT_OK(
> adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(),
> &reader));
> std::shared_ptr<Table> actual_output_table;
> ARROW_EXPECT_OK(reader->Read(&actual_output_table));
> auto actual_array =
>
> std::static_pointer_cast<ListArray>(actual_output_table->column(0)->chunk(0));
> auto expected_array =
> std::static_pointer_cast<ListArray>(table->column(0)->chunk(0));
> AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
> AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
> AssertBufferEqual(*(actual_array->null_bitmap()),
> *(expected_array->null_bitmap()));
> RecordProperty("array_equality", actual_array->Equals(*expected_array));
> }
>
> <testcase name="writeList" status="run" result="completed" time="0.028"
> timestamp="2021-02-10T11:58:23" classname="TestAdapterWriteNested">
> <properties>
> <property name="array_equality" value="0"/>
> </properties>
> </testcase>
>
>> On Feb 10, 2021, at 3:52 AM, Antoine Pitrou <[email protected]> wrote:
>>
>>
>> Hi Ying,
>>
>> Hmm, yes, this may be related to the null bitmaps, or the offsets.
>> Can you try to inspect or pretty-print the offsets arrays for the two
>> list arrays?
>>
>> Regards
>>
>> Antoine.
>>
>>
>> Le 10/02/2021 à 03:26, Ying Zhou a écrit :
>>> Hi,
>>>
>>> This is an extremely weird phenomenon. There are two 2*1 tables that are
>>> supposedly different when I got a confusing error message like this:
>>>
>>> [ RUN ] TestAdapterWriteNested.writeList
>>> /Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459:
>>> Failure
>>> Failed
>>> Unequal at absolute position 2
>>> Expected:
>>> [
>>> [
>>> null,
>>> 1074834796,
>>> null,
>>> null
>>> ],
>>> null
>>> ]
>>> Actual:
>>> [
>>> [
>>> null,
>>> 1074834796,
>>> null,
>>> null
>>> ],
>>> null
>>> ]
>>> [ FAILED ] TestAdapterWriteNested.writeList (2 ms)
>>>
>>> Here is the code that causes the issue:
>>>
>>> TEST(TestAdapterWriteNested, writeList) {
>>> std::shared_ptr<Schema> table_schema = schema({field("list",
>>> list(int32()))});
>>> int64_t num_rows = 2;
>>> arrow::random::RandomArrayGenerator rand(kRandomSeed);
>>> auto value_array = rand.ArrayOf(int32(), 2 * num_rows, 0.6);
>>> std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 1);
>>> std::shared_ptr<ChunkedArray> chunked_array =
>>> std::make_shared<ChunkedArray>(array);
>>> std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});
>>> AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize * 5);
>>> }
>>>
>>> Here AssertTableWriteReadEqual is a function I use to test that
>>> from_orc(to_orc(table_in)) == expected_table_out. The function did not have
>>> issues before.
>>>
>>> void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
>>> const std::shared_ptr<Table>&
>>> expected_output_table,
>>> const int64_t max_size =
>>> kDefaultSmallMemStreamSize) {
>>> std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
>>> io::BufferOutputStream::Create(max_size).ValueOrDie();
>>> std::unique_ptr<adapters::orc::ORCFileWriter> writer =
>>> adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>>> ARROW_EXPECT_OK(writer->Write(*input_table));
>>> ARROW_EXPECT_OK(writer->Close());
>>> std::shared_ptr<Buffer> buffer =
>>> buffer_output_stream->Finish().ValueOrDie();
>>> std::shared_ptr<io::RandomAccessFile> in_stream(new
>>> io::BufferReader(buffer));
>>> std::unique_ptr<adapters::orc::ORCFileReader> reader;
>>> ARROW_EXPECT_OK(
>>> adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(),
>>> &reader));
>>> std::shared_ptr<Table> actual_output_table;
>>> ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>>> AssertTablesEqual(*actual_output_table, *expected_output_table, false,
>>> false);
>>> }
>>>
>>> I strongly suspect that this is related to the null bitmaps. What do you
>>> guys think?
>>>
>>> Ying
>>>
>
>