Copilot commented on code in PR #490:
URL: https://github.com/apache/hudi-rs/pull/490#discussion_r2596506920


##########
crates/core/src/hfile/reader.rs:
##########
@@ -0,0 +1,1317 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+//! HFile reader implementation.
+
+use std::collections::BTreeMap;
+
+use crate::hfile::block::{
+    read_var_long, var_long_size_on_disk, BlockIndexEntry, DataBlock, 
HFileBlock, BLOCK_HEADER_SIZE,
+};
+use crate::hfile::block_type::HFileBlockType;
+use crate::hfile::compression::CompressionCodec;
+use crate::hfile::error::{HFileError, Result};
+use crate::hfile::key::{compare_keys, Key, KeyValue, Utf8Key};
+use crate::hfile::proto::InfoProto;
+use crate::hfile::record::HFileRecord;
+use crate::hfile::trailer::{HFileTrailer, TRAILER_SIZE};
+use prost::Message;
+
+/// Magic bytes indicating protobuf format in file info block
+const PBUF_MAGIC: &[u8; 4] = b"PBUF";
+
+/// Seek result codes (matching Java implementation)
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum SeekResult {
+    /// Lookup key is before the fake first key of a block but >= actual first 
key
+    BeforeBlockFirstKey = -2,
+    /// Lookup key is before the first key of the file
+    BeforeFileFirstKey = -1,
+    /// Exact match found
+    Found = 0,
+    /// Key not found but within range; cursor points to greatest key < lookup
+    InRange = 1,
+    /// Key is greater than the last key; EOF reached
+    Eof = 2,
+}
+
+/// File info key for last key in the file
+const FILE_INFO_LAST_KEY: &str = "hfile.LASTKEY";
+/// File info key for key-value version
+const FILE_INFO_KEY_VALUE_VERSION: &str = "KEY_VALUE_VERSION";
+/// File info key for max MVCC timestamp
+const FILE_INFO_MAX_MVCC_TS: &str = "MAX_MEMSTORE_TS_KEY";
+
+/// Key-value version indicating MVCC timestamp support
+const KEY_VALUE_VERSION_WITH_MVCC_TS: i32 = 1;
+
+/// HFile reader that supports sequential reads and seeks.
+pub struct HFileReader {
+    /// Raw file bytes
+    bytes: Vec<u8>,
+    /// Parsed trailer
+    trailer: HFileTrailer,
+    /// Compression codec from trailer
+    codec: CompressionCodec,
+    /// Data block index (first key -> entry)
+    data_block_index: BTreeMap<Key, BlockIndexEntry>,
+    /// Meta block index (name -> entry)
+    meta_block_index: BTreeMap<String, BlockIndexEntry>,
+    /// File info map
+    file_info: BTreeMap<String, Vec<u8>>,
+    /// Last key in the file
+    last_key: Option<Key>,
+    /// Current cursor position
+    cursor: Cursor,
+    /// Currently loaded data block
+    current_block: Option<DataBlock>,
+    /// Current block's index entry
+    current_block_entry: Option<BlockIndexEntry>,
+}
+
+/// Cursor tracking current position in the file.
+#[derive(Debug, Clone, Default)]
+struct Cursor {
+    /// Absolute offset in file
+    offset: usize,
+    /// Cached key-value at current position
+    cached_kv: Option<KeyValue>,
+    /// Whether we've reached EOF
+    eof: bool,
+    /// Whether seek has been called
+    seeked: bool,
+}
+
+impl HFileReader {
+    /// Create a new HFile reader from raw bytes.
+    pub fn new(bytes: Vec<u8>) -> Result<Self> {
+        let trailer = HFileTrailer::read(&bytes)?;
+        let codec = trailer.compression_codec;
+
+        let mut reader = Self {
+            bytes,
+            trailer,
+            codec,
+            data_block_index: BTreeMap::new(),
+            meta_block_index: BTreeMap::new(),
+            file_info: BTreeMap::new(),
+            last_key: None,
+            cursor: Cursor::default(),
+            current_block: None,
+            current_block_entry: None,
+        };
+
+        reader.initialize_metadata()?;
+        Ok(reader)
+    }
+
+    /// Initialize metadata by reading index blocks and file info.
+    fn initialize_metadata(&mut self) -> Result<()> {
+        // Read the "load-on-open" section starting from 
load_on_open_data_offset
+        let start = self.trailer.load_on_open_data_offset as usize;
+        let end = self.bytes.len() - TRAILER_SIZE;

Review Comment:
   The variable `end` is assigned on line 127 but never used in the function. 
It appears to be intended for bounds checking or validation but is currently 
unused. Consider removing it or adding validation logic to ensure `start < end`.
   ```suggestion
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to