hhhizzz commented on code in PR #8607:
URL: https://github.com/apache/arrow-rs/pull/8607#discussion_r2443006618
##########
parquet/src/column/reader/decoder.rs:
##########
@@ -136,14 +134,75 @@ pub trait ColumnValueDecoder {
fn skip_values(&mut self, num_values: usize) -> Result<usize>;
}
+/// Bucket-based storage for decoder instances keyed by `Encoding`.
+///
+/// This replaces `HashMap` lookups with direct indexing to avoid hashing
overhead in the
+/// hot decoding paths.
+const ENCODING_SLOTS: usize = 10; // covers the encodings handled in `enc_slot`
+
+#[inline]
+fn enc_slot(e: Encoding) -> usize {
+ match e {
+ Encoding::PLAIN => 0,
+ Encoding::PLAIN_DICTIONARY => 2,
+ Encoding::RLE => 3,
+ #[allow(deprecated)]
+ Encoding::BIT_PACKED => 4,
+ Encoding::DELTA_BINARY_PACKED => 5,
+ Encoding::DELTA_LENGTH_BYTE_ARRAY => 6,
+ Encoding::DELTA_BYTE_ARRAY => 7,
+ Encoding::RLE_DICTIONARY => 8,
+ Encoding::BYTE_STREAM_SPLIT => 9,
+ }
+}
+
+/// Fixed-capacity storage for decoder instances keyed by Parquet encoding.
+struct DecoderBuckets<V> {
+ inner: [Option<V>; ENCODING_SLOTS],
+}
+
+impl<V> DecoderBuckets<V> {
+ #[inline]
+ fn new() -> Self {
+ Self {
+ inner: std::array::from_fn(|_| None),
+ }
+ }
+
+ #[inline]
+ fn contains_key(&self, e: Encoding) -> bool {
+ self.inner[enc_slot(e)].is_some()
+ }
+
+ #[inline]
+ fn get_mut(&mut self, e: Encoding) -> Option<&mut V> {
+ self.inner[enc_slot(e)].as_mut()
+ }
+
+ #[inline]
+ fn insert_and_get_mut(&mut self, e: Encoding, v: V) -> &mut V {
+ let slot = &mut self.inner[enc_slot(e)];
+ debug_assert!(slot.is_none());
+ *slot = Some(v);
+ slot.as_mut().unwrap()
+ }
+}
+
+impl<V> Default for DecoderBuckets<V> {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
/// An implementation of [`ColumnValueDecoder`] for `[T::T]`
pub struct ColumnValueDecoderImpl<T: DataType> {
descr: ColumnDescPtr,
current_encoding: Option<Encoding>,
- // Cache of decoders for existing encodings
- decoders: HashMap<Encoding, Box<dyn Decoder<T>>>,
+ /// Cache of decoders for existing encodings.
Review Comment:
Thank you!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]