scovich commented on code in PR #7653:
URL: https://github.com/apache/arrow-rs/pull/7653#discussion_r2147015818


##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();

Review Comment:
   In order to deal with nested data efficiently, we will need a separate 
metadata builder that multiple object builders can make use of. Otherwise, we 
would need to merge multiple metadata dictionaries, which would also require 
rewriting the corresponding field ids. Very complex and error-prone.
   
   We should also give some thought into how to build sorted metadata 
dictionaries. It probably would require making two passes -- one to identify 
the string values and sort them into a variant metadata; and another to build 
the variant value that references the string ids?



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())

Review Comment:
   Is this `unwrap` somehow infallible? Otherwise we have a lurking panic here.



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())
+            .num_days() as i32;
+        self.buffer
+            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal4));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal8(&mut self, integer: i64, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal8));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal16(&mut self, integer: i128, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal16));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_binary(&mut self, value: &[u8]) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Binary));
+        self.buffer
+            .extend_from_slice(&(value.len() as u32).to_le_bytes());
+        self.buffer.extend_from_slice(value);
+    }
+
+    fn append_string(&mut self, value: &str) {
+        if value.len() <= MAX_SHORT_STRING_SIZE {
+            self.buffer.push(short_string_header(value.len()));
+            self.buffer.extend_from_slice(value.as_bytes());
+        } else {
+            self.buffer
+                .push(primitive_header(VariantPrimitiveType::String));
+            self.buffer
+                .extend_from_slice(&(value.len() as u32).to_le_bytes());
+            self.buffer.extend_from_slice(value.as_bytes());
+        }
+    }
+
+    /// Add key to dictionary, return its ID
+    fn add_key(&mut self, key: &str) -> u32 {
+        use std::collections::hash_map::Entry;
+        match self.dict.entry(key.to_string()) {
+            Entry::Occupied(entry) => *entry.get(),
+            Entry::Vacant(entry) => {
+                let id = self.dict_keys.len() as u32;
+                entry.insert(id);
+                self.dict_keys.push(key.to_string());
+                id
+            }
+        }
+    }
+
+    fn offset(&self) -> usize {
+        self.buffer.len()
+    }
+
+    /// Create an [`ArrayBuilder`] for creating [`Variant::Array`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_array(&mut self) -> ArrayBuilder {
+        ArrayBuilder::new(self)
+    }
+
+    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_object(&mut self) -> ObjectBuilder {
+        ObjectBuilder::new(self)
+    }
+
+    pub fn finish(self) -> (Vec<u8>, Vec<u8>) {
+        let nkeys = self.dict_keys.len();
+
+        // Calculate metadata size
+        let total_dict_size: usize = self.dict_keys.iter().map(|k| 
k.len()).sum();
+
+        // Determine appropriate offset size based on the larger of dict size 
or total string size
+        let max_offset = std::cmp::max(total_dict_size, nkeys);
+        let offset_size = int_size(max_offset);
+
+        let offset_start = 1 + offset_size as usize;
+        let string_start = offset_start + (nkeys + 1) * offset_size as usize;
+        let metadata_size = string_start + total_dict_size;
+
+        // Pre-allocate exact size to avoid reallocations
+        let mut metadata = vec![0u8; metadata_size];
+
+        // Write header: version=1, not sorted, with calculated offset_size
+        metadata[0] = 0x01 | ((offset_size - 1) << 6);
+
+        // Write dictionary size
+        write_offset(&mut metadata[1..], nkeys, offset_size);
+
+        // Write offsets and string data
+        let mut cur_offset = 0;
+        for (i, key) in self.dict_keys.iter().enumerate() {
+            write_offset(
+                &mut metadata[offset_start + i * offset_size as usize..],
+                cur_offset,
+                offset_size,
+            );
+            let start = string_start + cur_offset;
+            metadata[start..start + key.len()].copy_from_slice(key.as_bytes());
+            cur_offset += key.len();
+        }
+        // Write final offset
+        write_offset(
+            &mut metadata[offset_start + nkeys * offset_size as usize..],
+            cur_offset,
+            offset_size,
+        );
+
+        (metadata, self.buffer)
+    }
+
+    pub fn append_value<T: Into<Variant<'static, 'static>>>(&mut self, value: 
T) {

Review Comment:
   Can we adjust lifetimes, so normal strings can be appended and not just 
string constants?
   ```suggestion
       pub fn append_value<'v, T: Into<Variant<'_, 'v>>>(&mut self, value: T) {
   ```



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())
+            .num_days() as i32;
+        self.buffer
+            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal4));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal8(&mut self, integer: i64, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal8));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal16(&mut self, integer: i128, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal16));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_binary(&mut self, value: &[u8]) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Binary));
+        self.buffer
+            .extend_from_slice(&(value.len() as u32).to_le_bytes());
+        self.buffer.extend_from_slice(value);
+    }
+
+    fn append_string(&mut self, value: &str) {
+        if value.len() <= MAX_SHORT_STRING_SIZE {
+            self.buffer.push(short_string_header(value.len()));
+            self.buffer.extend_from_slice(value.as_bytes());
+        } else {
+            self.buffer
+                .push(primitive_header(VariantPrimitiveType::String));
+            self.buffer
+                .extend_from_slice(&(value.len() as u32).to_le_bytes());
+            self.buffer.extend_from_slice(value.as_bytes());
+        }
+    }
+
+    /// Add key to dictionary, return its ID
+    fn add_key(&mut self, key: &str) -> u32 {
+        use std::collections::hash_map::Entry;
+        match self.dict.entry(key.to_string()) {
+            Entry::Occupied(entry) => *entry.get(),
+            Entry::Vacant(entry) => {
+                let id = self.dict_keys.len() as u32;
+                entry.insert(id);
+                self.dict_keys.push(key.to_string());
+                id
+            }
+        }
+    }
+
+    fn offset(&self) -> usize {
+        self.buffer.len()
+    }
+
+    /// Create an [`ArrayBuilder`] for creating [`Variant::Array`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_array(&mut self) -> ArrayBuilder {
+        ArrayBuilder::new(self)
+    }
+
+    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_object(&mut self) -> ObjectBuilder {
+        ObjectBuilder::new(self)
+    }
+
+    pub fn finish(self) -> (Vec<u8>, Vec<u8>) {

Review Comment:
   Update: Never mind -- I missed the fact that `VariantBuilder` is the truly 
top-level builder, and the array/object builders both nest around it. There's 
still the problem that variant can't impl From arrays/objects, but I guess we 
could add a call to create a new object/array builder from an existing one, in 
order to achieve nesting?



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())
+            .num_days() as i32;
+        self.buffer
+            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal4));
+        self.buffer.push(scale);

Review Comment:
   Should we validate the scale before pushing it?



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())
+            .num_days() as i32;
+        self.buffer
+            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {

Review Comment:
   nit, to match the variant spec:
   ```suggestion
       fn append_decimal4(&mut self, unscaled_value: i32, scale: u8) {
   ```
   (more below)



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;

Review Comment:
   probably don't need the 0xff mask if anyway casting to u8?



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())
+            .num_days() as i32;
+        self.buffer
+            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal4));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal8(&mut self, integer: i64, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal8));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal16(&mut self, integer: i128, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal16));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_binary(&mut self, value: &[u8]) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Binary));
+        self.buffer
+            .extend_from_slice(&(value.len() as u32).to_le_bytes());
+        self.buffer.extend_from_slice(value);
+    }
+
+    fn append_string(&mut self, value: &str) {
+        if value.len() <= MAX_SHORT_STRING_SIZE {
+            self.buffer.push(short_string_header(value.len()));
+            self.buffer.extend_from_slice(value.as_bytes());
+        } else {
+            self.buffer
+                .push(primitive_header(VariantPrimitiveType::String));
+            self.buffer
+                .extend_from_slice(&(value.len() as u32).to_le_bytes());
+            self.buffer.extend_from_slice(value.as_bytes());
+        }
+    }
+
+    /// Add key to dictionary, return its ID
+    fn add_key(&mut self, key: &str) -> u32 {
+        use std::collections::hash_map::Entry;
+        match self.dict.entry(key.to_string()) {
+            Entry::Occupied(entry) => *entry.get(),
+            Entry::Vacant(entry) => {
+                let id = self.dict_keys.len() as u32;
+                entry.insert(id);
+                self.dict_keys.push(key.to_string());

Review Comment:
   It's a bit unfortunate we have to double-allocate strings. Ideally the 
dictionary could track `&str` backed by `dict_keys`, but I don't know how to 
manage mutability and lifetimes to achieve that...



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())
+            .num_days() as i32;
+        self.buffer
+            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal4));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal8(&mut self, integer: i64, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal8));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal16(&mut self, integer: i128, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal16));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_binary(&mut self, value: &[u8]) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Binary));
+        self.buffer
+            .extend_from_slice(&(value.len() as u32).to_le_bytes());
+        self.buffer.extend_from_slice(value);
+    }
+
+    fn append_string(&mut self, value: &str) {
+        if value.len() <= MAX_SHORT_STRING_SIZE {
+            self.buffer.push(short_string_header(value.len()));
+            self.buffer.extend_from_slice(value.as_bytes());
+        } else {
+            self.buffer
+                .push(primitive_header(VariantPrimitiveType::String));
+            self.buffer
+                .extend_from_slice(&(value.len() as u32).to_le_bytes());
+            self.buffer.extend_from_slice(value.as_bytes());
+        }
+    }
+
+    /// Add key to dictionary, return its ID
+    fn add_key(&mut self, key: &str) -> u32 {
+        use std::collections::hash_map::Entry;
+        match self.dict.entry(key.to_string()) {
+            Entry::Occupied(entry) => *entry.get(),
+            Entry::Vacant(entry) => {
+                let id = self.dict_keys.len() as u32;
+                entry.insert(id);
+                self.dict_keys.push(key.to_string());
+                id
+            }
+        }
+    }
+
+    fn offset(&self) -> usize {
+        self.buffer.len()
+    }
+
+    /// Create an [`ArrayBuilder`] for creating [`Variant::Array`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_array(&mut self) -> ArrayBuilder {
+        ArrayBuilder::new(self)
+    }
+
+    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_object(&mut self) -> ObjectBuilder {
+        ObjectBuilder::new(self)
+    }
+
+    pub fn finish(self) -> (Vec<u8>, Vec<u8>) {

Review Comment:
   I don't think this approach can be easily extended to allow nested variants? 



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())

Review Comment:
   Update: Looks like 
[from_ymd_opt](https://docs.rs/chrono/latest/chrono/naive/struct.NaiveDate.html#method.from_ymd_opt)
 is `const`; we could define+use a constant so a failed `unwrap` produces a 
compilation failure instead of a runtime panic.



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())
+            .num_days() as i32;
+        self.buffer
+            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal4));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal8(&mut self, integer: i64, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal8));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal16(&mut self, integer: i128, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal16));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_binary(&mut self, value: &[u8]) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Binary));
+        self.buffer
+            .extend_from_slice(&(value.len() as u32).to_le_bytes());
+        self.buffer.extend_from_slice(value);
+    }
+
+    fn append_string(&mut self, value: &str) {
+        if value.len() <= MAX_SHORT_STRING_SIZE {
+            self.buffer.push(short_string_header(value.len()));
+            self.buffer.extend_from_slice(value.as_bytes());
+        } else {
+            self.buffer
+                .push(primitive_header(VariantPrimitiveType::String));
+            self.buffer
+                .extend_from_slice(&(value.len() as u32).to_le_bytes());
+            self.buffer.extend_from_slice(value.as_bytes());
+        }
+    }
+
+    /// Add key to dictionary, return its ID
+    fn add_key(&mut self, key: &str) -> u32 {
+        use std::collections::hash_map::Entry;
+        match self.dict.entry(key.to_string()) {
+            Entry::Occupied(entry) => *entry.get(),
+            Entry::Vacant(entry) => {
+                let id = self.dict_keys.len() as u32;
+                entry.insert(id);
+                self.dict_keys.push(key.to_string());
+                id
+            }
+        }
+    }
+
+    fn offset(&self) -> usize {
+        self.buffer.len()
+    }
+
+    /// Create an [`ArrayBuilder`] for creating [`Variant::Array`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_array(&mut self) -> ArrayBuilder {
+        ArrayBuilder::new(self)
+    }
+
+    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_object(&mut self) -> ObjectBuilder {

Review Comment:
   Arrays and objects are tricky to build incrementally. The value offset and 
field id arrays require either knowing the number of elements/fields to be 
created in advance (and then worrying about what happens if the caller builds 
too many/few entries afterward), or building the arrays in separate storage and 
then moving an arbitrarily large number of buffered bytes to make room for the 
them after the fact. 
   
   I guess this PR takes the latter approach, but for deeply nested data that 
will lead to something that looks like quadratic cost -- O(num_bytes * depth) 
-- as the deepest fields get moved repeatedly while finishing each of the 
parent builders in turn.
   
   Not sure the best solution here...



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())
+            .num_days() as i32;
+        self.buffer
+            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal4));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal8(&mut self, integer: i64, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal8));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal16(&mut self, integer: i128, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal16));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_binary(&mut self, value: &[u8]) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Binary));
+        self.buffer
+            .extend_from_slice(&(value.len() as u32).to_le_bytes());
+        self.buffer.extend_from_slice(value);
+    }
+
+    fn append_string(&mut self, value: &str) {
+        if value.len() <= MAX_SHORT_STRING_SIZE {
+            self.buffer.push(short_string_header(value.len()));
+            self.buffer.extend_from_slice(value.as_bytes());
+        } else {
+            self.buffer
+                .push(primitive_header(VariantPrimitiveType::String));
+            self.buffer
+                .extend_from_slice(&(value.len() as u32).to_le_bytes());
+            self.buffer.extend_from_slice(value.as_bytes());
+        }
+    }
+
+    /// Add key to dictionary, return its ID
+    fn add_key(&mut self, key: &str) -> u32 {
+        use std::collections::hash_map::Entry;
+        match self.dict.entry(key.to_string()) {
+            Entry::Occupied(entry) => *entry.get(),
+            Entry::Vacant(entry) => {
+                let id = self.dict_keys.len() as u32;
+                entry.insert(id);
+                self.dict_keys.push(key.to_string());
+                id
+            }
+        }
+    }
+
+    fn offset(&self) -> usize {
+        self.buffer.len()
+    }
+
+    /// Create an [`ArrayBuilder`] for creating [`Variant::Array`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_array(&mut self) -> ArrayBuilder {
+        ArrayBuilder::new(self)
+    }
+
+    /// Create an [`ObjectBuilder`] for creating [`Variant::Object`] values.
+    ///
+    /// See the examples on [`VariantBuilder`] for usage.
+    pub fn new_object(&mut self) -> ObjectBuilder {
+        ObjectBuilder::new(self)
+    }
+
+    pub fn finish(self) -> (Vec<u8>, Vec<u8>) {
+        let nkeys = self.dict_keys.len();
+
+        // Calculate metadata size
+        let total_dict_size: usize = self.dict_keys.iter().map(|k| 
k.len()).sum();
+
+        // Determine appropriate offset size based on the larger of dict size 
or total string size
+        let max_offset = std::cmp::max(total_dict_size, nkeys);
+        let offset_size = int_size(max_offset);
+
+        let offset_start = 1 + offset_size as usize;
+        let string_start = offset_start + (nkeys + 1) * offset_size as usize;
+        let metadata_size = string_start + total_dict_size;
+
+        // Pre-allocate exact size to avoid reallocations
+        let mut metadata = vec![0u8; metadata_size];
+
+        // Write header: version=1, not sorted, with calculated offset_size
+        metadata[0] = 0x01 | ((offset_size - 1) << 6);
+
+        // Write dictionary size
+        write_offset(&mut metadata[1..], nkeys, offset_size);
+
+        // Write offsets and string data
+        let mut cur_offset = 0;
+        for (i, key) in self.dict_keys.iter().enumerate() {
+            write_offset(
+                &mut metadata[offset_start + i * offset_size as usize..],
+                cur_offset,
+                offset_size,
+            );
+            let start = string_start + cur_offset;
+            metadata[start..start + key.len()].copy_from_slice(key.as_bytes());
+            cur_offset += key.len();
+        }
+        // Write final offset
+        write_offset(
+            &mut metadata[offset_start + nkeys * offset_size as usize..],
+            cur_offset,
+            offset_size,
+        );
+
+        (metadata, self.buffer)
+    }
+
+    pub fn append_value<T: Into<Variant<'static, 'static>>>(&mut self, value: 
T) {
+        let variant = value.into();
+        match variant {
+            Variant::Null => self.append_null(),
+            Variant::BooleanTrue => self.append_bool(true),
+            Variant::BooleanFalse => self.append_bool(false),
+            Variant::Int8(v) => self.append_int8(v),
+            Variant::Int16(v) => self.append_int16(v),
+            Variant::Int32(v) => self.append_int32(v),
+            Variant::Int64(v) => self.append_int64(v),
+            Variant::Date(v) => self.append_date(v),
+            Variant::TimestampMicros(v) => self.append_timestamp_micros(v),
+            Variant::TimestampNtzMicros(v) => 
self.append_timestamp_ntz_micros(v),
+            Variant::Decimal4 { integer, scale } => 
self.append_decimal4(integer, scale),
+            Variant::Decimal8 { integer, scale } => 
self.append_decimal8(integer, scale),
+            Variant::Decimal16 { integer, scale } => 
self.append_decimal16(integer, scale),
+            Variant::Float(v) => self.append_float(v),
+            Variant::Double(v) => self.append_double(v),
+            Variant::Binary(v) => self.append_binary(v),
+            Variant::String(s) | Variant::ShortString(s) => 
self.append_string(s),

Review Comment:
   this will make three times deciding between short vs. long string? one to 
make the input, another here, and a third to consume it? Any way we could make 
this more direct/efficient?



##########
parquet-variant/src/builder.rs:
##########
@@ -0,0 +1,757 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use crate::decoder::{VariantBasicType, VariantPrimitiveType};
+use crate::Variant;
+use std::collections::HashMap;
+
+const BASIC_TYPE_BITS: u8 = 2;
+const MAX_SHORT_STRING_SIZE: usize = 0x3F;
+
+fn primitive_header(primitive_type: VariantPrimitiveType) -> u8 {
+    (primitive_type as u8) << 2 | VariantBasicType::Primitive as u8
+}
+
+fn short_string_header(len: usize) -> u8 {
+    (len as u8) << 2 | VariantBasicType::ShortString as u8
+}
+
+fn array_header(large: bool, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Array as u8
+}
+
+fn object_header(large: bool, id_size: u8, offset_size: u8) -> u8 {
+    let large_bit = if large { 1 } else { 0 };
+    (large_bit << (BASIC_TYPE_BITS + 4))
+        | ((id_size - 1) << (BASIC_TYPE_BITS + 2))
+        | ((offset_size - 1) << BASIC_TYPE_BITS)
+        | VariantBasicType::Object as u8
+}
+
+fn int_size(v: usize) -> u8 {
+    match v {
+        0..=0xFF => 1,
+        0x100..=0xFFFF => 2,
+        0x10000..=0xFFFFFF => 3,
+        _ => 4,
+    }
+}
+
+/// Write little-endian integer to buffer
+fn write_offset(buf: &mut [u8], value: usize, nbytes: u8) {
+    for i in 0..nbytes {
+        buf[i as usize] = ((value >> (i * 8)) & 0xFF) as u8;
+    }
+}
+
+/// Helper to make room for header by moving data
+fn make_room_for_header(buffer: &mut Vec<u8>, start_pos: usize, header_size: 
usize) {
+    let current_len = buffer.len();
+    buffer.resize(current_len + header_size, 0);
+
+    let src_start = start_pos;
+    let src_end = current_len;
+    let dst_start = start_pos + header_size;
+
+    buffer.copy_within(src_start..src_end, dst_start);
+}
+
+/// Builder for [`Variant`] values
+///
+/// # Example: create a Primitive Int8
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// builder.append_value(Variant::Int8(42));
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// assert_eq!(variant, Variant::Int8(42));
+/// ```
+///
+/// # Example: Create an Object
+/// This example shows how to create an object with two fields:
+/// ```json
+/// {
+///  "first_name": "Jiaying",
+///  "last_name": "Li"
+/// }
+/// ```
+///
+/// ```
+/// # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+/// let mut builder = VariantBuilder::new();
+/// // Create an object builder that will write fields to the object
+/// let mut object_builder = builder.new_object();
+/// object_builder.append_value("first_name", "Jiaying");
+/// object_builder.append_value("last_name", "Li");
+/// object_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Object(variant_object) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// /* TODO: uncomment this, but now VariantObject:field is not implemented
+/// assert_eq!(
+///   variant_object.field("first_name").unwrap(),
+///   Variant::String("Jiaying")
+/// );
+/// assert_eq!(
+///   variant_object.field("last_name").unwrap(),
+///   Variant::String("Li")
+/// );
+/// */
+/// ```
+///
+/// # Example: Create an Array
+///
+/// This example shows how to create an array of integers: `[1, 2, 3]`.
+/// ```
+///  # use parquet_variant::{Variant, VariantBuilder, VariantMetadata};
+///  let mut builder = VariantBuilder::new();
+///  // Create an array builder that will write elements to the array
+///  let mut array_builder = builder.new_array();
+///  array_builder.append_value(1i8);
+///  array_builder.append_value(2i8);
+///  array_builder.append_value(3i8);
+///  array_builder.finish();
+/// // Finish the builder to get the metadata and value
+/// let (metadata, value) = builder.finish();
+/// // use the Variant API to verify the result
+/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
+/// let variant = Variant::try_new(&metadata, &value).unwrap();
+/// let Variant::Array(variant_array) = variant else {
+///   panic!("unexpected variant type")
+/// };
+/// // Verify the array contents
+/// assert_eq!(variant_array.get(0).unwrap(), Variant::Int8(1));
+/// assert_eq!(variant_array.get(1).unwrap(), Variant::Int8(2));
+/// assert_eq!(variant_array.get(2).unwrap(), Variant::Int8(3));
+/// ```
+///
+/// # Example: Array of objects
+///
+/// THis example shows how to create an array of objects:
+/// ```json
+/// [
+///  {
+///   "first_name": "Jiaying",
+///  "last_name": "Li"
+/// },
+///   {
+///    "first_name": "Malthe",
+///    "last_name": "Karbo"
+/// }
+/// ```
+///
+/// TODO
+///
+pub struct VariantBuilder {
+    buffer: Vec<u8>,
+    dict: HashMap<String, u32>,
+    dict_keys: Vec<String>,
+}
+
+impl VariantBuilder {
+    pub fn new() -> Self {
+        Self {
+            buffer: Vec::new(),
+            dict: HashMap::new(),
+            dict_keys: Vec::new(),
+        }
+    }
+
+    fn append_null(&mut self) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Null));
+    }
+
+    fn append_bool(&mut self, value: bool) {
+        let primitive_type = if value {
+            VariantPrimitiveType::BooleanTrue
+        } else {
+            VariantPrimitiveType::BooleanFalse
+        };
+        self.buffer.push(primitive_header(primitive_type));
+    }
+
+    fn append_int8(&mut self, value: i8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int8));
+        self.buffer.push(value as u8);
+    }
+
+    fn append_int16(&mut self, value: i16) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int16));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int32(&mut self, value: i32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int32));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_int64(&mut self, value: i64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Int64));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_float(&mut self, value: f32) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Float));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_double(&mut self, value: f64) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Double));
+        self.buffer.extend_from_slice(&value.to_le_bytes());
+    }
+
+    fn append_date(&mut self, value: chrono::NaiveDate) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Date));
+        let days_since_epoch = value
+            .signed_duration_since(chrono::NaiveDate::from_ymd_opt(1970, 1, 
1).unwrap())
+            .num_days() as i32;
+        self.buffer
+            .extend_from_slice(&days_since_epoch.to_le_bytes());
+    }
+
+    fn append_timestamp_micros(&mut self, value: 
chrono::DateTime<chrono::Utc>) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampMicros));
+        let micros = value.timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_timestamp_ntz_micros(&mut self, value: chrono::NaiveDateTime) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::TimestampNtzMicros));
+        let micros = value.and_utc().timestamp_micros();
+        self.buffer.extend_from_slice(&micros.to_le_bytes());
+    }
+
+    fn append_decimal4(&mut self, integer: i32, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal4));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal8(&mut self, integer: i64, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal8));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_decimal16(&mut self, integer: i128, scale: u8) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Decimal16));
+        self.buffer.push(scale);
+        self.buffer.extend_from_slice(&integer.to_le_bytes());
+    }
+
+    fn append_binary(&mut self, value: &[u8]) {
+        self.buffer
+            .push(primitive_header(VariantPrimitiveType::Binary));
+        self.buffer
+            .extend_from_slice(&(value.len() as u32).to_le_bytes());
+        self.buffer.extend_from_slice(value);

Review Comment:
   If we're truncating the reported length to u32, should we also truncate the 
slice we extend from? That would produce an incorrect but "safe" result.
   
   Actually tho... the buffer's max allocation size is isize::MAX, so we 
probably need a length check to avoid panic?
   
   Allocation panics in general would seem to be a problem... how can code that 
aims to be panic-free defend itself?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to