[
https://issues.apache.org/jira/browse/ARROW-11548?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
ASF GitHub Bot updated ARROW-11548:
-----------------------------------
Labels: pull-request-available (was: )
> [C++] RandomArrayGenerator::List size mismatch
> -----------------------------------------------
>
> Key: ARROW-11548
> URL: https://issues.apache.org/jira/browse/ARROW-11548
> Project: Apache Arrow
> Issue Type: Bug
> Components: C++
> Affects Versions: 3.0.0
> Reporter: Ying Zhou
> Assignee: Antoine Pitrou
> Priority: Major
> Labels: pull-request-available
> Fix For: 4.0.0
>
> Time Spent: 10m
> Remaining Estimate: 0h
>
> RandomArrayGenerator::List consistently produces ListArrays with their length
> 1 below what they should be according to their documentation. Moreover the
> bitmaps we have are weird.
>
> Here is some simple test:
>
> {color:#dcdcaa}TEST{color}(TestAdapterWriteNested, ListTest) {
> {color:#569cd6}int64_t{color} num_rows = {color:#b5cea8}2{color};
> {color:#569cd6}static{color} {color:#569cd6}constexpr{color}
> {color:#4ec9b0}random{color}::SeedType kRandomSeed2 =
> {color:#b5cea8}0x0ff1ce{color};
> {color:#4ec9b0}arrow{color}::{color:#4ec9b0}random{color}::RandomArrayGenerator
> {color:#dcdcaa}rand{color}(kRandomSeed2);
> {color:#4ec9b0}std{color}::shared_ptr<Array> value_array =
> {color:#9cdcfe}rand{color}.{color:#dcdcaa}ArrayOf{color}({color:#dcdcaa}int32{color}(),
> {color:#b5cea8}2{color} * num_rows, {color:#b5cea8}0.2{color});
> {color:#4ec9b0}std{color}::shared_ptr<Array> array =
> {color:#9cdcfe}rand{color}.{color:#dcdcaa}List{color}(*value_array, num_rows,
> {color:#b5cea8}1{color});
> {color:#dcdcaa}RecordProperty{color}({color:#ce9178}"bitmap"{color},*({color:#9cdcfe}array{color}->{color:#dcdcaa}null_bitmap_data{color}()));
> {color:#dcdcaa}RecordProperty{color}({color:#ce9178}"length"{color},{color:#9cdcfe}array{color}->{color:#dcdcaa}length{color}());
> {color:#dcdcaa}RecordProperty{color}({color:#ce9178}"array"{color},{color:#9cdcfe}array{color}->{color:#dcdcaa}ToString{color}());
> }
>
> Here are the results:
>
> {color:#808080}<{color}{color:#569cd6}testcase{color}
> {color:#9cdcfe}name{color}={color:#ce9178}"ListTest"{color}
> {color:#9cdcfe}status{color}={color:#ce9178}"run"{color}
> {color:#9cdcfe}result{color}={color:#ce9178}"completed"{color}
> {color:#9cdcfe}time{color}={color:#ce9178}"0"{color}
> {color:#9cdcfe}timestamp{color}={color:#ce9178}"2021-02-07T15:23:16"{color}
> {color:#9cdcfe}classname{color}={color:#ce9178}"TestAdapterWriteNested"{color}{color:#808080}>{color}
> {color:#808080}<{color}{color:#569cd6}properties{color}{color:#808080}>{color}
> {color:#808080}<{color}{color:#569cd6}property{color}
> {color:#9cdcfe}name{color}={color:#ce9178}"bitmap"{color}
> {color:#9cdcfe}value{color}={color:#ce9178}"3"{color}{color:#808080}/>{color}
> {color:#808080}<{color}{color:#569cd6}property{color}
> {color:#9cdcfe}name{color}={color:#ce9178}"length"{color}
> {color:#9cdcfe}value{color}={color:#ce9178}"1"{color}{color:#808080}/>{color}
> {color:#808080}<{color}{color:#569cd6}property{color}
> {color:#9cdcfe}name{color}={color:#ce9178}"array"{color}
> {color:#9cdcfe}value{color}={color:#ce9178}"[{color}{color:#569cd6}
{color}{color:#ce9178}
> [{color}{color:#569cd6}
{color}{color:#ce9178}
> null,{color}{color:#569cd6}
{color}{color:#ce9178}
> 1074834796,{color}{color:#569cd6}
{color}{color:#ce9178}
> 551076274,{color}{color:#569cd6}
{color}{color:#ce9178}
> 1184187771{color}{color:#569cd6}
{color}{color:#ce9178}
> ]{color}{color:#569cd6}
{color}{color:#ce9178}]"{color}{color:#808080}/>{color}
> {color:#808080}</{color}{color:#569cd6}properties{color}{color:#808080}>{color}
> {color:#808080}</{color}{color:#569cd6}testcase{color}{color:#808080}>{color}
>
> Here is what RandomArrayGenerator::List should do:
>
> {color:#6a9955} /// {color}{color:#569cd6}\brief{color}{color:#6a9955}
> Generate a random ListArray{color}
> {color:#6a9955} ///{color}
> {color:#6a9955} ///
> {color}{color:#569cd6}\param{color}{color:#6a9955}[{color}{color:#569cd6}in{color}{color:#6a9955}]
> {color}{color:#9cdcfe}values{color}{color:#6a9955} The underlying values
> array{color}
> {color:#6a9955} ///
> {color}{color:#569cd6}\param{color}{color:#6a9955}[{color}{color:#569cd6}in{color}{color:#6a9955}]
> {color}{color:#9cdcfe}size{color}{color:#6a9955} The size of the generated
> list array{color}
> {color:#6a9955} ///
> {color}{color:#569cd6}\param{color}{color:#6a9955}[{color}{color:#569cd6}in{color}{color:#6a9955}]
> {color}{color:#9cdcfe}null_probability{color}{color:#6a9955} the probability
> of a list value being null{color}
> {color:#6a9955} ///
> {color}{color:#569cd6}\param{color}{color:#6a9955}[{color}{color:#569cd6}in{color}{color:#6a9955}]
> {color}{color:#9cdcfe}force_empty_nulls{color}{color:#6a9955} if true, null
> list entries must have 0 length{color}
> {color:#6a9955} ///{color}
> {color:#6a9955} /// {color}{color:#569cd6}\return{color}{color:#6a9955} a
> generated Array{color}
> {color:#4ec9b0}std{color}::{color:#4ec9b0}shared_ptr{color}<{color:#4ec9b0}Array{color}>
> {color:#dcdcaa}List{color}({color:#569cd6}const{color}
> {color:#4ec9b0}Array{color}{color:#569cd6}&{color}
> {color:#9cdcfe}values{color}, {color:#4ec9b0}int64_t{color}
> {color:#9cdcfe}size{color}, {color:#569cd6}double{color}
> {color:#9cdcfe}null_probability{color},
> {color:#569cd6}bool{color} {color:#9cdcfe}force_empty_nulls{color} =
> {color:#569cd6}false{color});
>
> Note that the generator failed in at least two aspects:
> 1. The length of the generated array is too low.
> 2. Even when null_probability is set to 1 there are still 1s in the bitmap.
> 3. The size of the bitmap is larger than the size of the Array.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)