carols10cents commented on a change in pull request #8402: URL: https://github.com/apache/arrow/pull/8402#discussion_r506562885
########## File path: rust/parquet/src/arrow/arrow_writer.rs ########## @@ -175,15 +175,61 @@ fn write_leaves( } Ok(()) } + ArrowDataType::Dictionary(k, v) => { + // Materialize the packed dictionary and let the writer repack it + let any_array = array.as_any(); + let (k2, v2) = match &**k { + ArrowDataType::Int32 => { + let typed_array = any_array + .downcast_ref::<arrow_array::Int32DictionaryArray>() + .expect("Unable to get dictionary array"); + + (typed_array.keys(), typed_array.values()) + } + o => unimplemented!("Unknown key type {:?}", o), + }; + + let k3 = k2; + let v3 = v2 + .as_any() + .downcast_ref::<arrow_array::StringArray>() + .unwrap(); + + // TODO: This removes NULL values; what _should_ be done? + // FIXME: Don't use `as` + let materialized: Vec<_> = k3 + .flatten() + .map(|k| v3.value(k as usize)) + .map(ByteArray::from) + .collect(); + // Review comment: @vertexclique I [updated the roundtrip dictionary test to include some `None` values](https://github.com/apache/arrow/pull/8402/commits/0dcf149392bcff328ddc393356c9977e135a2799), and it passes, so I think this code is fine-- it seems that the `None` values are handled by the `definition` levels, so we don't need to handle them here. Do I have that right or am I still missing something? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org