Thanks! Now we have an even weirder phenomenon. Even the null bitmaps and
offsets are equal. However the arrays aren’t! Does anyone know why?
TEST(TestAdapterWriteNested, writeList) {
std::shared_ptr<Schema> table_schema = schema({field("list", list(int32()))});
int64_t num_rows = 10000;
arrow::random::RandomArrayGenerator rand(kRandomSeed);
auto value_array = rand.ArrayOf(int32(), 5 * num_rows, 0.6);
std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 0.8);
std::shared_ptr<ChunkedArray> chunked_array =
std::make_shared<ChunkedArray>(array);
std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});
std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
io::BufferOutputStream::Create(kDefaultSmallMemStreamSize *
15).ValueOrDie();
std::unique_ptr<adapters::orc::ORCFileWriter> writer =
adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
ARROW_EXPECT_OK(writer->Write(*table));
ARROW_EXPECT_OK(writer->Close());
std::shared_ptr<Buffer> buffer = buffer_output_stream->Finish().ValueOrDie();
std::shared_ptr<io::RandomAccessFile> in_stream(new io::BufferReader(buffer));
std::unique_ptr<adapters::orc::ORCFileReader> reader;
ARROW_EXPECT_OK(
adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(),
&reader));
std::shared_ptr<Table> actual_output_table;
ARROW_EXPECT_OK(reader->Read(&actual_output_table));
auto actual_array =
std::static_pointer_cast<ListArray>(actual_output_table->column(0)->chunk(0));
auto expected_array =
std::static_pointer_cast<ListArray>(table->column(0)->chunk(0));
AssertArraysEqual(*(actual_array->offsets()), *(expected_array->offsets()));
AssertArraysEqual(*(actual_array->values()), *(expected_array->values()));
AssertBufferEqual(*(actual_array->null_bitmap()),
*(expected_array->null_bitmap()));
RecordProperty("array_equality", actual_array->Equals(*expected_array));
}
<testcase name="writeList" status="run" result="completed" time="0.028"
timestamp="2021-02-10T11:58:23" classname="TestAdapterWriteNested">
<properties>
<property name="array_equality" value="0"/>
</properties>
</testcase>
> On Feb 10, 2021, at 3:52 AM, Antoine Pitrou <[email protected]> wrote:
>
>
> Hi Ying,
>
> Hmm, yes, this may be related to the null bitmaps, or the offsets.
> Can you try to inspect or pretty-print the offsets arrays for the two
> list arrays?
>
> Regards
>
> Antoine.
>
>
> Le 10/02/2021 à 03:26, Ying Zhou a écrit :
>> Hi,
>>
>> This is an extremely weird phenomenon. There are two 2*1 tables that are
>> supposedly different when I got a confusing error message like this:
>>
>> [ RUN ] TestAdapterWriteNested.writeList
>> /Users/karlkatzen/Documents/code/arrow-dev/arrow/cpp/src/arrow/testing/gtest_util.cc:459:
>> Failure
>> Failed
>> Unequal at absolute position 2
>> Expected:
>> [
>> [
>> null,
>> 1074834796,
>> null,
>> null
>> ],
>> null
>> ]
>> Actual:
>> [
>> [
>> null,
>> 1074834796,
>> null,
>> null
>> ],
>> null
>> ]
>> [ FAILED ] TestAdapterWriteNested.writeList (2 ms)
>>
>> Here is the code that causes the issue:
>>
>> TEST(TestAdapterWriteNested, writeList) {
>> std::shared_ptr<Schema> table_schema = schema({field("list",
>> list(int32()))});
>> int64_t num_rows = 2;
>> arrow::random::RandomArrayGenerator rand(kRandomSeed);
>> auto value_array = rand.ArrayOf(int32(), 2 * num_rows, 0.6);
>> std::shared_ptr<Array> array = rand.List(*value_array, num_rows + 1, 1);
>> std::shared_ptr<ChunkedArray> chunked_array =
>> std::make_shared<ChunkedArray>(array);
>> std::shared_ptr<Table> table = Table::Make(table_schema, {chunked_array});
>> AssertTableWriteReadEqual(table, table, kDefaultSmallMemStreamSize * 5);
>> }
>>
>> Here AssertTableWriteReadEqual is a function I use to test that
>> from_orc(to_orc(table_in)) == expected_table_out. The function did not have
>> issues before.
>>
>> void AssertTableWriteReadEqual(const std::shared_ptr<Table>& input_table,
>> const std::shared_ptr<Table>&
>> expected_output_table,
>> const int64_t max_size =
>> kDefaultSmallMemStreamSize) {
>> std::shared_ptr<io::BufferOutputStream> buffer_output_stream =
>> io::BufferOutputStream::Create(max_size).ValueOrDie();
>> std::unique_ptr<adapters::orc::ORCFileWriter> writer =
>> adapters::orc::ORCFileWriter::Open(*buffer_output_stream).ValueOrDie();
>> ARROW_EXPECT_OK(writer->Write(*input_table));
>> ARROW_EXPECT_OK(writer->Close());
>> std::shared_ptr<Buffer> buffer =
>> buffer_output_stream->Finish().ValueOrDie();
>> std::shared_ptr<io::RandomAccessFile> in_stream(new
>> io::BufferReader(buffer));
>> std::unique_ptr<adapters::orc::ORCFileReader> reader;
>> ARROW_EXPECT_OK(
>> adapters::orc::ORCFileReader::Open(in_stream, default_memory_pool(),
>> &reader));
>> std::shared_ptr<Table> actual_output_table;
>> ARROW_EXPECT_OK(reader->Read(&actual_output_table));
>> AssertTablesEqual(*actual_output_table, *expected_output_table, false,
>> false);
>> }
>>
>> I strongly suspect that this is related to the null bitmaps. What do you
>> guys think?
>>
>> Ying
>>