cocoa-xu commented on issue #482:
URL: 
https://github.com/apache/arrow-nanoarrow/issues/482#issuecomment-2135945894

   Hi @paleolimbot, many thanks for your previous help, and sorry for the ping 
again. May I ask one more question regarding constructing a nested list? I've 
been trying to achieve this in the past a few days but still have no clues how 
to get it done right. 
   
   Let's say now each row is a `list<list<int32>>`, and the goal is to 
construct the following query results:
   
   | Row ID | data                       |
   |--------|----------------------------|
   | 0      | [[1,2,3], [4,5,6]]         |
   | 1      | [[2,3,4], [5,6,7]]         |
   
   Following the approach using `ArrowArrayAppendXX()` functions, I wrote the 
following function:
   
   <details>
   <summary>Minimal code</summary>
   
   ```cpp
   // | Row ID | data                       |
   // |--------|----------------------------|
   // | 0      | [[1,2,3], [4,5,6]]         |
   // | 1      | [[2,3,4], [5,6,7]]         |
   #define NESTING_LEVEL 2
   int make_nested_list(
       struct ArrowSchema* schema,
       struct ArrowArray* array, 
       struct ArrowError* arrow_error, 
       int level, 
       int row_id)
   {
       printf("make_nested_list:level: %d\n", level);
       if (level == NESTING_LEVEL) {
           // level == 2
           //   [
           //     [            <- schema
           //       int32      <- schema->children[0]
           //     ]
           //   ]
           //
           // Make the schema
           ArrowSchemaInit(schema);
           NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, 
NANOARROW_TYPE_LIST));
           NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema->children[0], 
NANOARROW_TYPE_INT32));
   
           // Build the array
           NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, 
arrow_error));
           NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array));
   
           // First element
           // [1,2,3] if row_id == 0
           // [2,3,4] if row_id == 1
           NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array->children[0], 1 + 
row_id));
           NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array->children[0], 2 + 
row_id));
           NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array->children[0], 3 + 
row_id));
           NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array));
   
           // Second element
           // [4,5,6] if row_id == 0
           // [5,6,7] if row_id == 1
           NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array->children[0], 4 + 
row_id));
           NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array->children[0], 5 + 
row_id));
           NANOARROW_RETURN_NOT_OK(ArrowArrayAppendInt(array->children[0], 6 + 
row_id));
           NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array));
   
           // Finish the outer array
           // [[1,2,3], [4,5,6]] if row_id == 0
           // [[2,3,4], [5,6,7]] if row_id == 1
           NANOARROW_RETURN_NOT_OK(ArrowArrayFinishBuildingDefault(array, 
arrow_error));
       } else {
           // level == 1
           //   [              <- schema
           //     [            <- schema->children[0]
           //       int32
           //     ]
           //   ]
           // Make the schema for the outer and middle array
           ArrowSchemaInit(schema);
           NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, 
NANOARROW_TYPE_LIST));
           NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema->children[0], 
NANOARROW_TYPE_LIST));
   
           // Build the outer and middle array
           NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, 
arrow_error));
           NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array));
   
           // First row
           int row_id = 0;
           NANOARROW_RETURN_NOT_OK(make_nested_list(schema->children[0], 
array->children[0], arrow_error, level + 1, row_id));
           NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array));
   
           // Second row
           row_id = 1;
           NANOARROW_RETURN_NOT_OK(make_nested_list(schema->children[0], 
array->children[0], arrow_error, level + 1, row_id));
   
           // Finish the outer array
           // [                       <- outer array
           //    [[1,2,3], [4,5,6]],  <- first row in the query result
           //    [[2,3,4], [5,6,7]]   <- second row in the query result
           // ]
           NANOARROW_RETURN_NOT_OK(ArrowArrayFinishBuildingDefault(array, 
arrow_error));
       }
       return 0;
   }
   
   
   struct ArrowSchema schema{};
   struct ArrowArray array{};
   struct ArrowError error{};
   int level = 1;
   make_nested_list(&schema, &array, &error, level, 0);
   ```
   
   </details>
   
   My understanding is that, we can first make a schema for the outer and 
middle array:
   
   ```c
   // Make the schema for the outer and middle array
   NANOARROW_RETURN_NOT_OK(ArrowSchemaInit(&schema));
   NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(&schema, NANOARROW_TYPE_LIST));
   NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema.children[0], 
NANOARROW_TYPE_LIST);
   ```
   
   Then we initialise the middle array using `ArrowArrayInitFromSchema`, 
   
   ```c
   // Build the outer and middle array
   NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromSchema(array, schema, 
arrow_error));
   NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array));
   ```
   
   and pass `schema->children[0]` and `array->children[0]` to 
`make_nested_list` with `level+1` for row 0 and row 1 respectively, it should 
in theory construct the inner arrays in `array->children[0]`.
   
   ```c
   // First row
   int row_id = 0;
   NANOARROW_RETURN_NOT_OK(make_nested_list(schema->children[0], 
array->children[0], arrow_error, level + 1, row_id));
   NANOARROW_RETURN_NOT_OK(ArrowArrayFinishElement(array));
   
   // Second row
   row_id = 1;
   NANOARROW_RETURN_NOT_OK(make_nested_list(schema->children[0], 
array->children[0], arrow_error, level + 1, row_id));
   ```
   
   But I got an error message saying that `Error parsing schema->format: 
Expected a null-terminated string but found NULL` when constructing the outer 
and middle array (i.e., when `level == 1`) from 
`ArrowArrayInitFromSchema(array, schema, arrow_error)`.
   
   I wonder if you could please shed some light on me and let me know which 
functions I should use? (Or was I fundamentally wrong about how one should 
construct a nested arrow array?)


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to