alamb commented on a change in pull request #709:
URL: https://github.com/apache/arrow-rs/pull/709#discussion_r694308857
##########
File path: parquet/src/arrow/arrow_array_reader.rs
##########
@@ -1559,4 +1605,120 @@ mod tests {
array_reader.get_rep_levels()
);
}
+
+ /// Allows to write parquet into memory. Intended only for use in tests.
+ #[derive(Clone)]
+ struct VecWriter {
+ data: Arc<Mutex<Cursor<Vec<u8>>>>,
+ }
+
+ impl VecWriter {
+ pub fn new() -> VecWriter {
+ VecWriter {
+ data: Arc::new(Mutex::new(Cursor::new(Vec::new()))),
+ }
+ }
+
+ pub fn consume(self) -> Vec<u8> {
+ Arc::try_unwrap(self.data)
+ .unwrap()
+ .into_inner()
+ .unwrap()
+ .into_inner()
+ }
+ }
+
+ impl TryClone for VecWriter {
+ fn try_clone(&self) -> std::io::Result<Self> {
+ Ok(self.clone())
+ }
+ }
+
+ impl Seek for VecWriter {
+ fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
+ self.data.lock().unwrap().seek(pos)
+ }
+
+ fn stream_position(&mut self) -> std::io::Result<u64> {
+ self.data.lock().unwrap().stream_position()
+ }
+ }
+
+ impl Write for VecWriter {
+ fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+ self.data.lock().unwrap().write(buf)
+ }
+
+ fn flush(&mut self) -> std::io::Result<()> {
+ self.data.lock().unwrap().flush()
+ }
+ }
+
+ #[test]
+ fn test_string_delta_byte_array() {
+ use crate::basic;
+ use crate::schema::types::Type;
+
+ let data = VecWriter::new();
+ let schema = Arc::new(
+ Type::group_type_builder("string_test")
+ .with_fields(&mut vec![Arc::new(
+ Type::primitive_type_builder("c", basic::Type::BYTE_ARRAY)
+ .with_converted_type(ConvertedType::UTF8)
+ .build()
+ .unwrap(),
+ )])
+ .build()
+ .unwrap(),
+ );
+ // Disable dictionary and use the fallback encoding.
+ let p = Arc::new(
+ WriterProperties::builder()
+ .set_dictionary_enabled(false)
+ .set_encoding(Encoding::DELTA_BYTE_ARRAY)
+ .build(),
+ );
+ // Write a few strings.
+ let mut w = SerializedFileWriter::new(data.clone(), schema,
p).unwrap();
+ let mut rg = w.next_row_group().unwrap();
+ let mut c = rg.next_column().unwrap().unwrap();
+ match &mut c {
+ ColumnWriter::ByteArrayColumnWriter(c) => {
+ c.write_batch(
+ &[ByteArray::from("foo"), ByteArray::from("bar")],
Review comment:
I recommend also adding a test here for `Null` / `None` (aka definition
level 0)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]