albertlockett commented on code in PR #9196:
URL: https://github.com/apache/arrow-rs/pull/9196#discussion_r2694420911
##########
arrow-ipc/src/writer.rs:
##########
@@ -4174,4 +4186,73 @@ mod tests {
let all_passed = (0..20).all(|_| create_hash() == expected);
assert!(all_passed);
}
+
+ #[test]
+ fn test_dictionary_tracker_reset() {
+ let data_gen = IpcDataGenerator::default();
+ let mut dictionary_tracker = DictionaryTracker::new(false);
+ let writer_options = IpcWriteOptions::default();
+ let mut compression_ctx = CompressionContext::default();
+
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "a",
+ DataType::Dictionary(Box::new(DataType::UInt8),
Box::new(DataType::Utf8)),
+ false,
+ )]));
+
+ let mut write_single_batch_stream =
+ |batch: RecordBatch, dict_tracker: &mut DictionaryTracker| ->
Vec<u8> {
+ let mut buffer = Vec::new();
+
+ // create a new IPC stream:
+ let stream_header =
data_gen.schema_to_bytes_with_dictionary_tracker(
+ &schema,
+ dict_tracker,
+ &writer_options,
+ );
+ _ = write_message(&mut buffer, stream_header,
&writer_options).unwrap();
+
+ let (encoded_dicts, encoded_batch) = data_gen
+ .encode(&batch, dict_tracker, &writer_options, &mut
compression_ctx)
+ .unwrap();
+ for encoded_dict in encoded_dicts {
+ _ = write_message(&mut buffer, encoded_dict,
&writer_options).unwrap();
+ }
+ _ = write_message(&mut buffer, encoded_batch,
&writer_options).unwrap();
+
+ buffer
+ };
+
+ let batch1 = RecordBatch::try_new(
+ schema.clone(),
+ vec![Arc::new(DictionaryArray::new(
+ UInt8Array::from_iter_values([0]),
+ Arc::new(StringArray::from_iter_values(["a"])),
+ ))],
+ )
+ .unwrap();
+ let buffer = write_single_batch_stream(batch1.clone(), &mut
dictionary_tracker);
+
+ // ensure we can read the stream back
+ let mut reader = StreamReader::try_new(Cursor::new(buffer),
None).unwrap();
+ let read_batch = reader.next().unwrap().unwrap();
+ assert_eq!(read_batch, batch1);
+
+ // reset the dictionary tracker so it can be used for next stream
+ dictionary_tracker.reset();
Review Comment:
If this method weren't invoked, the test would fail with:
```
thread 'writer::tests::test_dictionary_tracker_reset' (19464958) panicked at
arrow-ipc/src/writer.rs:4255:49:
called `Result::unwrap()` on an `Err` value: ParseError("Cannot find a
dictionary batch with dict id: 1")
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]