mapleFU commented on code in PR #35825:
URL: https://github.com/apache/arrow/pull/35825#discussion_r1232947694
##########
cpp/src/parquet/encoding.cc:
##########
@@ -1559,11 +1631,19 @@ class DictDecoderImpl : public DecoderImpl, virtual
public DictDecoder<Type> {
valid_bits, valid_bits_offset, num_values, null_count,
[&]() { valid_bytes[i++] = 1; }, [&]() { ++i; });
- auto binary_builder =
checked_cast<::arrow::BinaryDictionary32Builder*>(builder);
- PARQUET_THROW_NOT_OK(
- binary_builder->AppendIndices(indices_buffer, num_values,
valid_bytes.data()));
- num_values_ -= num_values - null_count;
- return num_values - null_count;
+ // It looks like this method is only called by ByteArray types. Previously,
+ // there was an unconditional cast to
::arrow::Dictionary32Builder<::arrow::BinaryType>.
+ // This won't work for LargeByteArrayType and the Type template argument
can't be used
+ // unconditionally because it is not defined for several other types.
+ if constexpr (std::is_same_v<ByteArrayType, Type> ||
std::is_same_v<LargeByteArrayType, Type>) {
+ auto binary_builder = checked_cast<typename
EncodingTraits<Type>::DictAccumulator*>(builder);
+ PARQUET_THROW_NOT_OK(
+ binary_builder->AppendIndices(indices_buffer, num_values,
valid_bytes.data()));
+ num_values_ -= num_values - null_count;
+ return num_values - null_count;
+ }
+
+ ParquetException::NYI("DecodeIndicesSpaced not implemented for this type");
Review Comment:
https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout
@arthurpassos
Even if it's large binary, the "dictionary index" is 32bits width. It's
apart from 64bits offset.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]