tustvold commented on code in PR #3553:
URL: https://github.com/apache/arrow-rs/pull/3553#discussion_r1083460059


##########
arrow-array/src/array/run_end_encoded_array.rs:
##########
@@ -0,0 +1,519 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType, Field};
+
+use crate::{
+    builder::StringREEArrayBuilder,
+    make_array,
+    types::{ArrowRunEndIndexType, Int16Type, Int32Type, Int64Type},
+    Array, ArrayRef, PrimitiveArray,
+};
+
+///
+/// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
+/// This encoding is good for representing data containing same values 
repeated consecutively
+/// called runs. Each run is represented by the value of data and the index at 
which the run ends.
+///
+/// [`RunEndEncodedArray`] has `run_ends` array and `values` array of same 
length.
+/// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
+/// stores the value of the run. Below example illustrates how a logical array 
is represented in
+/// [`RunEndEncodedArray`]
+///
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
+///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
+/// │ │        A        │  │    2    │ │     │        A        │     
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        D        │  │    3    │ │     │        A        │    run length 
of 'A' = runs_ends[0] - 0 = 2
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        B        │  │    6    │ │     │        D        │    run length 
of 'D' = run_ends[1] - run_ends[0] = 1
+///   └─────────────────┘  └─────────┘       ├─────────────────┤
+/// │        values          run_ends  │     │        B        │     
+///                                          ├─────────────────┤
+/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │     
+///                                          ├─────────────────┤
+///           RunEndEncodedArray             │        B        │    run length 
of 'B' = run_ends[2] - run_ends[1] = 3
+///               length = 3                 └─────────────────┘
+///  
+///                                             Logical array
+///                                                Contents
+/// ```
+
+pub struct RunEndEncodedArray<R: ArrowRunEndIndexType> {
+    data: ArrayData,
+    run_ends: PrimitiveArray<R>,
+    values: ArrayRef,
+}
+
+impl<R: ArrowRunEndIndexType> RunEndEncodedArray<R> {
+    /// Attempts to create RunEndEncodedArray using given run_ends (index 
where a run ends)
+    /// and the values (value of the run). Returns an error if the given data 
is not compatible
+    /// with RunEndEncoded specification.
+    pub fn try_new(
+        run_ends: &PrimitiveArray<R>,
+        values: &dyn Array,
+    ) -> Result<Self, ArrowError> {
+        let run_ends_type = run_ends.data_type().clone();
+        let values_type = values.data_type().clone();
+        let ree_array_type = DataType::RunEndEncoded(
+            Box::new(Field::new("run_ends", run_ends_type, false)),
+            Box::new(Field::new("values", values_type, true)),
+        );
+        let builder = ArrayDataBuilder::new(ree_array_type)
+            .add_child_data(run_ends.data().clone())
+            .add_child_data(values.data().clone());
+
+        // `build_unchecked` is used to avoid recursive validation of child 
arrays.
+        let array_data = unsafe { builder.build_unchecked() };
+
+        // Safety: `validate_data` checks below
+        //    1. run_ends array does not have null values
+        //    2. run_ends array has non-zero and strictly increasing values.
+        //    3. The length of run_ends array and values array are the same.
+        array_data.validate_data()?;
+
+        Ok(array_data.into())
+    }
+    /// Returns a reference to run_ends array
+    pub fn run_ends(&self) -> &PrimitiveArray<R> {
+        &self.run_ends
+    }
+
+    /// Returns a reference to values array
+    pub fn values(&self) -> &ArrayRef {
+        &self.values
+    }
+}
+
+impl<R: ArrowRunEndIndexType> From<ArrayData> for RunEndEncodedArray<R> {
+    fn from(data: ArrayData) -> Self {
+        match data.data_type() {
+            DataType::RunEndEncoded(run_ends_data_type, _) => {
+                assert_eq!(
+                    &R::DATA_TYPE,
+                    run_ends_data_type.data_type(),
+                    "Data type mismatch for run_ends array, expected {} got 
{}",
+                    R::DATA_TYPE,
+                    run_ends_data_type.data_type()
+                );
+            }
+            _ => {
+                panic!("Invalid data type for RunEndEncodedArray. The data 
type should be DataType::RunEndEncoded");
+            }
+        }
+
+        // Safety: `validate_data` checks below
+        //    1. The given array data has exactly two child arrays.
+        //    2. The first child array (run_ends) has valid data type.
+        //    3. run_ends array does not have null values
+        //    4. run_ends array has non-zero and strictly increasing values.
+        //    5. The length of run_ends array and values array are the same.
+        data.validate_data().unwrap();

Review Comment:
   ```suggestion
   ```
   
   Validation should be performed when constructing the `ArrayData` not when 
creating an array from it



##########
arrow-array/src/array/run_end_encoded_array.rs:
##########
@@ -0,0 +1,519 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType, Field};
+
+use crate::{
+    builder::StringREEArrayBuilder,
+    make_array,
+    types::{ArrowRunEndIndexType, Int16Type, Int32Type, Int64Type},
+    Array, ArrayRef, PrimitiveArray,
+};
+
+///
+/// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
+/// This encoding is good for representing data containing same values 
repeated consecutively
+/// called runs. Each run is represented by the value of data and the index at 
which the run ends.
+///
+/// [`RunEndEncodedArray`] has `run_ends` array and `values` array of same 
length.
+/// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
+/// stores the value of the run. Below example illustrates how a logical array 
is represented in
+/// [`RunEndEncodedArray`]
+///
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
+///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
+/// │ │        A        │  │    2    │ │     │        A        │     
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        D        │  │    3    │ │     │        A        │    run length 
of 'A' = runs_ends[0] - 0 = 2
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        B        │  │    6    │ │     │        D        │    run length 
of 'D' = run_ends[1] - run_ends[0] = 1
+///   └─────────────────┘  └─────────┘       ├─────────────────┤
+/// │        values          run_ends  │     │        B        │     
+///                                          ├─────────────────┤
+/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │     
+///                                          ├─────────────────┤
+///           RunEndEncodedArray             │        B        │    run length 
of 'B' = run_ends[2] - run_ends[1] = 3
+///               length = 3                 └─────────────────┘
+///  
+///                                             Logical array
+///                                                Contents
+/// ```
+
+pub struct RunEndEncodedArray<R: ArrowRunEndIndexType> {
+    data: ArrayData,
+    run_ends: PrimitiveArray<R>,
+    values: ArrayRef,
+}
+
+impl<R: ArrowRunEndIndexType> RunEndEncodedArray<R> {
+    /// Attempts to create RunEndEncodedArray using given run_ends (index 
where a run ends)
+    /// and the values (value of the run). Returns an error if the given data 
is not compatible
+    /// with RunEndEncoded specification.
+    pub fn try_new(
+        run_ends: &PrimitiveArray<R>,
+        values: &dyn Array,
+    ) -> Result<Self, ArrowError> {
+        let run_ends_type = run_ends.data_type().clone();
+        let values_type = values.data_type().clone();
+        let ree_array_type = DataType::RunEndEncoded(
+            Box::new(Field::new("run_ends", run_ends_type, false)),
+            Box::new(Field::new("values", values_type, true)),
+        );
+        let builder = ArrayDataBuilder::new(ree_array_type)
+            .add_child_data(run_ends.data().clone())
+            .add_child_data(values.data().clone());
+
+        // `build_unchecked` is used to avoid recursive validation of child 
arrays.
+        let array_data = unsafe { builder.build_unchecked() };
+
+        // Safety: `validate_data` checks below
+        //    1. run_ends array does not have null values
+        //    2. run_ends array has non-zero and strictly increasing values.
+        //    3. The length of run_ends array and values array are the same.
+        array_data.validate_data()?;
+
+        Ok(array_data.into())
+    }
+    /// Returns a reference to run_ends array
+    pub fn run_ends(&self) -> &PrimitiveArray<R> {
+        &self.run_ends
+    }
+
+    /// Returns a reference to values array
+    pub fn values(&self) -> &ArrayRef {
+        &self.values
+    }
+}
+
+impl<R: ArrowRunEndIndexType> From<ArrayData> for RunEndEncodedArray<R> {
+    fn from(data: ArrayData) -> Self {
+        match data.data_type() {
+            DataType::RunEndEncoded(run_ends_data_type, _) => {
+                assert_eq!(
+                    &R::DATA_TYPE,
+                    run_ends_data_type.data_type(),
+                    "Data type mismatch for run_ends array, expected {} got 
{}",
+                    R::DATA_TYPE,
+                    run_ends_data_type.data_type()
+                );
+            }
+            _ => {
+                panic!("Invalid data type for RunEndEncodedArray. The data 
type should be DataType::RunEndEncoded");
+            }
+        }
+
+        // Safety: `validate_data` checks below
+        //    1. The given array data has exactly two child arrays.
+        //    2. The first child array (run_ends) has valid data type.
+        //    3. run_ends array does not have null values
+        //    4. run_ends array has non-zero and strictly increasing values.
+        //    5. The length of run_ends array and values array are the same.
+        data.validate_data().unwrap();
+
+        let run_ends = PrimitiveArray::<R>::from(data.child_data()[0].clone());
+        let values = make_array(data.child_data()[1].clone());
+        Self {
+            data,
+            run_ends,
+            values,
+        }
+    }
+}
+
+impl<R: ArrowRunEndIndexType> From<RunEndEncodedArray<R>> for ArrayData {
+    fn from(array: RunEndEncodedArray<R>) -> Self {
+        array.data
+    }
+}
+
+impl<T: ArrowRunEndIndexType> Array for RunEndEncodedArray<T> {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data(&self) -> &ArrayData {
+        &self.data
+    }
+
+    fn into_data(self) -> ArrayData {
+        self.into()
+    }
+}
+
+impl<R: ArrowRunEndIndexType> std::fmt::Debug for RunEndEncodedArray<R> {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        writeln!(
+            f,
+            "RunEndEncodedArray {{run_ends: {:?}, values: {:?}}}",
+            self.run_ends, self.values
+        )
+    }
+}
+
+/// Constructs a `RunEndEncodedArray` from an iterator of optional strings.
+///
+/// # Example:
+/// ```
+/// use arrow_array::{RunEndEncodedArray, PrimitiveArray, StringArray, 
types::Int16Type};
+///
+/// let test = vec!["a", "a", "b", "c", "c"];
+/// let array: RunEndEncodedArray<Int16Type> = test
+///     .iter()
+///     .map(|&x| if x == "b" { None } else { Some(x) })
+///     .collect();
+/// assert_eq!(
+///     "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  3,\n  
5,\n], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
+///     format!("{:?}", array)
+/// );
+/// ```
+impl<'a, T: ArrowRunEndIndexType> FromIterator<Option<&'a str>>
+    for RunEndEncodedArray<T>
+{
+    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
+        let it = iter.into_iter();
+        let (lower, _) = it.size_hint();
+        let mut builder = StringREEArrayBuilder::with_capacity(lower, 256);
+        it.for_each(|i| {
+            if let Some(i) = i {
+                builder
+                    .append_value(i)
+                    .expect("Unable to append a value to a run end encoded 
array.");
+            } else {
+                builder
+                    .append_null()
+                    .expect("Unable to append null value to run end encoded 
array.");
+            }
+        });
+
+        builder.finish()
+    }
+}
+
+/// Constructs a `RunEndEncodedArray` from an iterator of strings.
+///
+/// # Example:
+///
+/// ```
+/// use arrow_array::{RunEndEncodedArray, PrimitiveArray, StringArray, 
types::Int16Type};
+///
+/// let test = vec!["a", "a", "b", "c"];
+/// let array: RunEndEncodedArray<Int16Type> = test.into_iter().collect();
+/// assert_eq!(
+///     "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  3,\n  
4,\n], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
+///     format!("{:?}", array)
+/// );
+/// ```
+impl<'a, T: ArrowRunEndIndexType> FromIterator<&'a str> for 
RunEndEncodedArray<T> {
+    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
+        let it = iter.into_iter();
+        let (lower, _) = it.size_hint();
+        let mut builder = StringREEArrayBuilder::with_capacity(lower, 256);
+        it.for_each(|i| {
+            builder
+                .append_value(i)
+                .expect("Unable to append a value to a dictionary array.");
+        });
+
+        builder.finish()
+    }
+}
+
+///
+/// A [`RunEndEncodedArray`] array where run ends are stored using `i16` data 
type.
+///
+/// # Example: Using `collect`
+/// ```
+/// # use arrow_array::{Array, Int16RunEndEncodedArray, Int16Array, 
StringArray};
+/// # use std::sync::Arc;
+///
+/// let array: Int16RunEndEncodedArray = vec!["a", "a", "b", "c", 
"c"].into_iter().collect();
+/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+/// assert_eq!(array.run_ends(), &Int16Array::from(vec![2, 3, 5]));
+/// assert_eq!(array.values(), &values);
+/// ```
+pub type Int16RunEndEncodedArray = RunEndEncodedArray<Int16Type>;
+
+///
+/// A [`RunEndEncodedArray`] array where run ends are stored using `i32` data 
type.
+///
+/// # Example: Using `collect`
+/// ```
+/// # use arrow_array::{Array, Int32RunEndEncodedArray, Int32Array, 
StringArray};
+/// # use std::sync::Arc;
+///
+/// let array: Int32RunEndEncodedArray = vec!["a", "a", "b", "c", 
"c"].into_iter().collect();
+/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+/// assert_eq!(array.run_ends(), &Int32Array::from(vec![2, 3, 5]));
+/// assert_eq!(array.values(), &values);
+/// ```
+pub type Int32RunEndEncodedArray = RunEndEncodedArray<Int32Type>;
+
+///
+/// A [`RunEndEncodedArray`] array where run ends are stored using `i64` data 
type.
+///
+/// # Example: Using `collect`
+/// ```
+/// # use arrow_array::{Array, Int64RunEndEncodedArray, Int64Array, 
StringArray};
+/// # use std::sync::Arc;
+///
+/// let array: Int64RunEndEncodedArray = vec!["a", "a", "b", "c", 
"c"].into_iter().collect();
+/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+/// assert_eq!(array.run_ends(), &Int64Array::from(vec![2, 3, 5]));
+/// assert_eq!(array.values(), &values);
+/// ```
+pub type Int64RunEndEncodedArray = RunEndEncodedArray<Int64Type>;
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use super::*;
+    use crate::builder::PrimitiveREEArrayBuilder;
+    use crate::types::{Int16Type, Int32Type, UInt32Type};
+    use crate::{Array, Int16Array, Int32Array, StringArray};
+    use arrow_buffer::{Buffer, ToByteSlice};
+    use arrow_schema::Field;
+
+    #[test]
+    fn test_ree_array() {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int8)

Review Comment:
   FWIW using `PrimitiveArray::from_iter_values([]).into_data()`, might be nicer



##########
arrow-array/src/array/run_end_encoded_array.rs:
##########
@@ -0,0 +1,519 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType, Field};
+
+use crate::{
+    builder::StringREEArrayBuilder,
+    make_array,
+    types::{ArrowRunEndIndexType, Int16Type, Int32Type, Int64Type},
+    Array, ArrayRef, PrimitiveArray,
+};
+
+///
+/// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
+/// This encoding is good for representing data containing same values 
repeated consecutively
+/// called runs. Each run is represented by the value of data and the index at 
which the run ends.
+///
+/// [`RunEndEncodedArray`] has `run_ends` array and `values` array of same 
length.
+/// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
+/// stores the value of the run. Below example illustrates how a logical array 
is represented in
+/// [`RunEndEncodedArray`]
+///
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
+///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
+/// │ │        A        │  │    2    │ │     │        A        │     
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        D        │  │    3    │ │     │        A        │    run length 
of 'A' = runs_ends[0] - 0 = 2
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        B        │  │    6    │ │     │        D        │    run length 
of 'D' = run_ends[1] - run_ends[0] = 1
+///   └─────────────────┘  └─────────┘       ├─────────────────┤
+/// │        values          run_ends  │     │        B        │     
+///                                          ├─────────────────┤
+/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │     
+///                                          ├─────────────────┤
+///           RunEndEncodedArray             │        B        │    run length 
of 'B' = run_ends[2] - run_ends[1] = 3
+///               length = 3                 └─────────────────┘
+///  
+///                                             Logical array
+///                                                Contents
+/// ```
+
+pub struct RunEndEncodedArray<R: ArrowRunEndIndexType> {
+    data: ArrayData,
+    run_ends: PrimitiveArray<R>,
+    values: ArrayRef,
+}
+
+impl<R: ArrowRunEndIndexType> RunEndEncodedArray<R> {
+    /// Attempts to create RunEndEncodedArray using given run_ends (index 
where a run ends)
+    /// and the values (value of the run). Returns an error if the given data 
is not compatible
+    /// with RunEndEncoded specification.
+    pub fn try_new(
+        run_ends: &PrimitiveArray<R>,
+        values: &dyn Array,
+    ) -> Result<Self, ArrowError> {
+        let run_ends_type = run_ends.data_type().clone();
+        let values_type = values.data_type().clone();
+        let ree_array_type = DataType::RunEndEncoded(
+            Box::new(Field::new("run_ends", run_ends_type, false)),
+            Box::new(Field::new("values", values_type, true)),
+        );
+        let builder = ArrayDataBuilder::new(ree_array_type)
+            .add_child_data(run_ends.data().clone())
+            .add_child_data(values.data().clone());
+
+        // `build_unchecked` is used to avoid recursive validation of child 
arrays.
+        let array_data = unsafe { builder.build_unchecked() };
+
+        // Safety: `validate_data` checks below
+        //    1. run_ends array does not have null values
+        //    2. run_ends array has non-zero and strictly increasing values.
+        //    3. The length of run_ends array and values array are the same.
+        array_data.validate_data()?;
+
+        Ok(array_data.into())
+    }
+    /// Returns a reference to run_ends array
+    pub fn run_ends(&self) -> &PrimitiveArray<R> {
+        &self.run_ends
+    }
+
+    /// Returns a reference to values array
+    pub fn values(&self) -> &ArrayRef {
+        &self.values
+    }
+}
+
+impl<R: ArrowRunEndIndexType> From<ArrayData> for RunEndEncodedArray<R> {
+    fn from(data: ArrayData) -> Self {
+        match data.data_type() {
+            DataType::RunEndEncoded(run_ends_data_type, _) => {
+                assert_eq!(

Review Comment:
   FWIW I think this check should already be being performed by 
`PrimitiveArray::<R>::from` below. I think it would be sufficient to just check 
that `DataType::RunEndEncoded` is as expected



##########
arrow-array/src/builder/generic_byte_ree_array_builder.rs:
##########
@@ -0,0 +1,423 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    types::{
+        ArrowRunEndIndexType, BinaryType, ByteArrayType, LargeBinaryType, 
LargeUtf8Type,
+        Utf8Type,
+    },
+    ArrowPrimitiveType, RunEndEncodedArray,
+};
+
+use super::{GenericByteBuilder, PrimitiveBuilder};
+
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::ArrowError;
+
+/// Array builder for [`RunEndEncodedArray`] for String and Binary types.
+///
+/// # Example:
+///
+/// ```
+///
+/// # use arrow_array::builder::GenericByteREEArrayBuilder;
+/// # use arrow_array::{GenericByteArray, BinaryArray};
+/// # use arrow_array::types::{BinaryType, Int16Type};
+/// # use arrow_array::{Array, Int16Array};
+///
+/// let mut builder =
+/// GenericByteREEArrayBuilder::<Int16Type, BinaryType>::new();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_null().unwrap();
+/// builder.append_value(b"def").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///     array.run_ends(),
+///     &Int16Array::from(vec![Some(2), Some(3), Some(4)])
+/// );
+///
+/// let av = array.values();
+///
+/// assert!(!av.is_null(0));
+/// assert!(av.is_null(1));
+/// assert!(!av.is_null(2));
+///
+/// // Values are polymorphic and so require a downcast.
+/// let ava: &BinaryArray = av.as_any().downcast_ref::<BinaryArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), b"abc");
+/// assert_eq!(ava.value(2), b"def");
+/// ```
+#[derive(Debug)]
+pub struct GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    run_ends_builder: PrimitiveBuilder<R>,
+    values_builder: GenericByteBuilder<V>,
+    current_value: Option<Vec<u8>>,
+    current_run_end_index: usize,
+}
+

Review Comment:
   This doesn't appear to implement the `ArrayBuilder` trait?



##########
arrow-array/src/builder/generic_byte_ree_array_builder.rs:
##########
@@ -0,0 +1,423 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    types::{
+        ArrowRunEndIndexType, BinaryType, ByteArrayType, LargeBinaryType, 
LargeUtf8Type,
+        Utf8Type,
+    },
+    ArrowPrimitiveType, RunEndEncodedArray,
+};
+
+use super::{GenericByteBuilder, PrimitiveBuilder};
+
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::ArrowError;
+
+/// Array builder for [`RunEndEncodedArray`] for String and Binary types.
+///
+/// # Example:
+///
+/// ```
+///
+/// # use arrow_array::builder::GenericByteREEArrayBuilder;
+/// # use arrow_array::{GenericByteArray, BinaryArray};
+/// # use arrow_array::types::{BinaryType, Int16Type};
+/// # use arrow_array::{Array, Int16Array};
+///
+/// let mut builder =
+/// GenericByteREEArrayBuilder::<Int16Type, BinaryType>::new();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_null().unwrap();
+/// builder.append_value(b"def").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///     array.run_ends(),
+///     &Int16Array::from(vec![Some(2), Some(3), Some(4)])
+/// );
+///
+/// let av = array.values();
+///
+/// assert!(!av.is_null(0));
+/// assert!(av.is_null(1));
+/// assert!(!av.is_null(2));
+///
+/// // Values are polymorphic and so require a downcast.
+/// let ava: &BinaryArray = av.as_any().downcast_ref::<BinaryArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), b"abc");
+/// assert_eq!(ava.value(2), b"def");
+/// ```
+#[derive(Debug)]
+pub struct GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    run_ends_builder: PrimitiveBuilder<R>,
+    values_builder: GenericByteBuilder<V>,
+    current_value: Option<Vec<u8>>,
+    current_run_end_index: usize,
+}
+
+impl<R, V> Default for GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<R, V> GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    /// Creates a new `GenericByteREEArrayBuilder`
+    pub fn new() -> Self {
+        Self {
+            run_ends_builder: PrimitiveBuilder::new(),
+            values_builder: GenericByteBuilder::<V>::new(),
+            current_value: None,
+            current_run_end_index: 0,
+        }
+    }
+
+    /// Creates a new `GenericByteREEArrayBuilder` with the provided capacity
+    ///
+    /// `capacity`: the expected number of run-end encoded values.
+    /// `data_capacity`: the expected number of bytes of run end encoded values
+    pub fn with_capacity(capacity: usize, data_capacity: usize) -> Self {
+        Self {
+            run_ends_builder: PrimitiveBuilder::with_capacity(capacity),
+            values_builder: GenericByteBuilder::<V>::with_capacity(
+                capacity,
+                data_capacity,
+            ),
+            current_value: None,
+            current_run_end_index: 0,
+        }
+    }
+}
+
+impl<R, V> GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowRunEndIndexType,
+    V: ByteArrayType,
+{
+    /// Appends optional value to the logical array encoded by the 
RunEndEncodedArray.
+    pub fn append_option(
+        &mut self,
+        input_value: Option<impl AsRef<V::Native>>,
+    ) -> Result<(), ArrowError> {
+        match input_value {
+            Some(value) => self.append_value(value)?,
+            None => self.append_null()?,
+        }
+        Ok(())
+    }
+
+    /// Appends value to the logical array encoded by the RunEndEncodedArray.
+    pub fn append_value(
+        &mut self,
+        input_value: impl AsRef<V::Native>,
+    ) -> Result<(), ArrowError> {
+        let value: &[u8] = input_value.as_ref().as_ref();
+        match self.current_value.as_deref() {
+            None if self.current_run_end_index > 0 => {
+                self.append_run_end()?;
+                self.current_value = Some(value.to_owned());
+            }
+            None if self.current_run_end_index == 0 => {
+                self.current_value = Some(value.to_owned());
+            }
+            Some(current_value) if current_value != value => {
+                self.append_run_end()?;
+                self.current_value = Some(value.to_owned());
+            }
+            _ => {}
+        }
+        self.current_run_end_index = self
+            .current_run_end_index
+            .checked_add(1)
+            .ok_or(ArrowError::RunEndIndexOverflowError)?;
+        Ok(())
+    }
+
+    /// Appends null to the logical array encoded by the RunEndEncodedArray.
+    pub fn append_null(&mut self) -> Result<(), ArrowError> {
+        if self.current_value.is_some() {
+            self.append_run_end()?;
+            self.current_value = None;
+        }
+        self.current_run_end_index = self
+            .current_run_end_index
+            .checked_add(1)
+            .ok_or(ArrowError::RunEndIndexOverflowError)?;
+        Ok(())
+    }
+
+    /// Creates the RunEndEncodedArray and resets the builder.
+    /// Panics if RunEndEncodedArray cannot be built.
+    pub fn finish(&mut self) -> RunEndEncodedArray<R> {
+        // write the last run end to the array.
+        self.append_run_end().unwrap();
+
+        // reset the run end index to zero.
+        self.current_value = None;
+        self.current_run_end_index = 0;
+
+        // build the run encoded array by adding run_ends and values array as 
its children.
+        let run_ends_array = self.run_ends_builder.finish();
+        let values_array = self.values_builder.finish();
+        RunEndEncodedArray::<R>::try_new(&run_ends_array, 
&values_array).unwrap()
+    }
+
+    /// Creates the RunEndEncodedArray and without resetting the builder.
+    /// Panics if RunEndEncodedArray cannot be built.
+    pub fn finish_cloned(&mut self) -> RunEndEncodedArray<R> {
+        // write the last run end to the array.
+        self.append_run_end().unwrap();
+
+        // build the run encoded array by adding run_ends and values array as 
its children.
+        let run_ends_array = self.run_ends_builder.finish_cloned();
+        let values_array = self.values_builder.finish_cloned();
+        RunEndEncodedArray::<R>::try_new(&run_ends_array, 
&values_array).unwrap()
+    }
+
+    // Appends the current run to the array
+    fn append_run_end(&mut self) -> Result<(), ArrowError> {
+        let run_end_index = R::Native::from_usize(self.current_run_end_index)
+            .ok_or_else(|| {
+                ArrowError::ParseError(format!(
+                    "Cannot convert the value {} from `usize` to native form 
of arrow datatype {}",
+                    self.current_run_end_index,
+                    R::DATA_TYPE
+                ))
+            })?;
+        self.run_ends_builder.append_value(run_end_index);
+        match self.current_value.as_deref() {
+            Some(value) => self.values_builder.append_slice(value),
+            None => self.values_builder.append_null(),
+        }
+        Ok(())
+    }
+}
+
+/// Array builder for [`RunEndEncodedArray`] that encodes strings 
([`Utf8Type`]).
+///
+/// ```
+/// // Create a run-end encoded array with run-end indexes data type as `i16`.
+/// // The encoded values are Strings.
+///
+/// # use arrow_array::builder::StringREEArrayBuilder;
+/// # use arrow_array::{Int16Array, StringArray};
+/// # use arrow_array::types::Int16Type;
+///
+/// let mut builder = StringREEArrayBuilder::<Int16Type>::new();
+///
+/// // The builder builds the dictionary value by value
+/// builder.append_value("abc").unwrap();
+/// builder.append_null();
+/// builder.append_value("def").unwrap();
+/// builder.append_value("def").unwrap();
+/// builder.append_value("abc").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///   array.run_ends(),
+///   &Int16Array::from(vec![Some(1), Some(2), Some(4), Some(5)])
+/// );
+///
+/// // Values are polymorphic and so require a downcast.
+/// let av = array.values();
+/// let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), "abc");
+/// assert!(av.is_null(1));
+/// assert_eq!(ava.value(2), "def");
+/// assert_eq!(ava.value(3), "abc");
+///
+/// ```
+pub type StringREEArrayBuilder<K> = GenericByteREEArrayBuilder<K, Utf8Type>;
+
+/// Array builder for [`RunEndEncodedArray`] that encodes large strings 
([`LargeUtf8Type`]). See [`StringREEArrayBuilder`] for an example.
+pub type LargeStringREEArrayBuilder<K> = GenericByteREEArrayBuilder<K, 
LargeUtf8Type>;
+
+/// Array builder for [`RunEndEncodedArray`] that encodes binary 
values([`BinaryType`]).
+///
+/// ```
+/// // Create a run-end encoded array with run-end indexes data type as `i16`.
+/// // The encoded data is binary values.
+///
+/// # use arrow_array::builder::BinaryREEArrayBuilder;
+/// # use arrow_array::{BinaryArray, Int16Array};
+/// # use arrow_array::types::Int16Type;
+///
+/// let mut builder = BinaryREEArrayBuilder::<Int16Type>::new();
+///
+/// // The builder builds the dictionary value by value
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_null();
+/// builder.append_value(b"def").unwrap();
+/// builder.append_value(b"def").unwrap();
+/// builder.append_value(b"abc").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///   array.run_ends(),
+///   &Int16Array::from(vec![Some(1), Some(2), Some(4), Some(5)])
+/// );
+///
+/// // Values are polymorphic and so require a downcast.
+/// let av = array.values();
+/// let ava: &BinaryArray = av.as_any().downcast_ref::<BinaryArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), b"abc");
+/// assert!(av.is_null(1));
+/// assert_eq!(ava.value(2), b"def");
+/// assert_eq!(ava.value(3), b"abc");
+///
+/// ```
+pub type BinaryREEArrayBuilder<K> = GenericByteREEArrayBuilder<K, BinaryType>;
+
+/// Array builder for [`RunEndEncodedArray`] that encodes large binary 
values([`LargeBinaryType`]).
+/// See documentation of [`BinaryREEArrayBuilder`] for an example.
+pub type LargeBinaryREEArrayBuilder<K> = GenericByteREEArrayBuilder<K, 
LargeBinaryType>;
+
+#[cfg(test)]
+mod tests {

Review Comment:
   I think we should probably have at least one test of an empty string



##########
arrow-array/src/builder/generic_byte_ree_array_builder.rs:
##########
@@ -0,0 +1,423 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    types::{
+        ArrowRunEndIndexType, BinaryType, ByteArrayType, LargeBinaryType, 
LargeUtf8Type,
+        Utf8Type,
+    },
+    ArrowPrimitiveType, RunEndEncodedArray,
+};
+
+use super::{GenericByteBuilder, PrimitiveBuilder};
+
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::ArrowError;
+
+/// Array builder for [`RunEndEncodedArray`] for String and Binary types.
+///
+/// # Example:
+///
+/// ```
+///
+/// # use arrow_array::builder::GenericByteREEArrayBuilder;
+/// # use arrow_array::{GenericByteArray, BinaryArray};
+/// # use arrow_array::types::{BinaryType, Int16Type};
+/// # use arrow_array::{Array, Int16Array};
+///
+/// let mut builder =
+/// GenericByteREEArrayBuilder::<Int16Type, BinaryType>::new();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_null().unwrap();
+/// builder.append_value(b"def").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///     array.run_ends(),
+///     &Int16Array::from(vec![Some(2), Some(3), Some(4)])
+/// );
+///
+/// let av = array.values();
+///
+/// assert!(!av.is_null(0));
+/// assert!(av.is_null(1));
+/// assert!(!av.is_null(2));
+///
+/// // Values are polymorphic and so require a downcast.
+/// let ava: &BinaryArray = av.as_any().downcast_ref::<BinaryArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), b"abc");
+/// assert_eq!(ava.value(2), b"def");
+/// ```
+#[derive(Debug)]
+pub struct GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    run_ends_builder: PrimitiveBuilder<R>,
+    values_builder: GenericByteBuilder<V>,
+    current_value: Option<Vec<u8>>,
+    current_run_end_index: usize,
+}
+
+impl<R, V> Default for GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<R, V> GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    /// Creates a new `GenericByteREEArrayBuilder`
+    pub fn new() -> Self {
+        Self {
+            run_ends_builder: PrimitiveBuilder::new(),
+            values_builder: GenericByteBuilder::<V>::new(),
+            current_value: None,
+            current_run_end_index: 0,
+        }
+    }
+
+    /// Creates a new `GenericByteREEArrayBuilder` with the provided capacity
+    ///
+    /// `capacity`: the expected number of run-end encoded values.
+    /// `data_capacity`: the expected number of bytes of run end encoded values
+    pub fn with_capacity(capacity: usize, data_capacity: usize) -> Self {
+        Self {
+            run_ends_builder: PrimitiveBuilder::with_capacity(capacity),
+            values_builder: GenericByteBuilder::<V>::with_capacity(
+                capacity,
+                data_capacity,
+            ),
+            current_value: None,
+            current_run_end_index: 0,
+        }
+    }
+}
+
+impl<R, V> GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowRunEndIndexType,
+    V: ByteArrayType,
+{
+    /// Appends optional value to the logical array encoded by the 
RunEndEncodedArray.
+    pub fn append_option(
+        &mut self,
+        input_value: Option<impl AsRef<V::Native>>,
+    ) -> Result<(), ArrowError> {

Review Comment:
   IMO I'd prefer these methods were infallible and users can instead check the 
length if they want to avoid a potential panic? See #3562 



##########
arrow-data/src/data.rs:
##########
@@ -1446,6 +1493,40 @@ impl ArrayData {
         })
     }
 
+    /// Validates that each value in run_ends array is positive and strictly 
increasing.
+    fn check_run_ends<T>(&self) -> Result<(), ArrowError>

Review Comment:
   I think this should also check that the last run end is less than or equal 
to the length of the values array



##########
arrow-data/src/data.rs:
##########
@@ -198,9 +198,9 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: 
usize) -> [MutableBuff
             ],
             _ => unreachable!(),
         },
-        DataType::FixedSizeList(_, _) | DataType::Struct(_) => {
-            [empty_buffer, MutableBuffer::new(0)]
-        }
+        DataType::FixedSizeList(_, _)
+        | DataType::Struct(_)
+        | DataType::RunEndEncoded(_, _) => [empty_buffer, 
MutableBuffer::new(0)],

Review Comment:
   We should probably clean up this method to return `Vec<Buffer>` to be 
consistent with what `ArrayData` expects



##########
arrow-array/src/array/run_end_encoded_array.rs:
##########
@@ -0,0 +1,519 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType, Field};
+
+use crate::{
+    builder::StringREEArrayBuilder,
+    make_array,
+    types::{ArrowRunEndIndexType, Int16Type, Int32Type, Int64Type},
+    Array, ArrayRef, PrimitiveArray,
+};
+
+///
+/// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
+/// This encoding is good for representing data containing same values 
repeated consecutively
+/// called runs. Each run is represented by the value of data and the index at 
which the run ends.
+///
+/// [`RunEndEncodedArray`] has `run_ends` array and `values` array of same 
length.
+/// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
+/// stores the value of the run. Below example illustrates how a logical array 
is represented in
+/// [`RunEndEncodedArray`]
+///
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
+///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
+/// │ │        A        │  │    2    │ │     │        A        │     
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        D        │  │    3    │ │     │        A        │    run length 
of 'A' = runs_ends[0] - 0 = 2
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        B        │  │    6    │ │     │        D        │    run length 
of 'D' = run_ends[1] - run_ends[0] = 1
+///   └─────────────────┘  └─────────┘       ├─────────────────┤
+/// │        values          run_ends  │     │        B        │     
+///                                          ├─────────────────┤
+/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │     
+///                                          ├─────────────────┤
+///           RunEndEncodedArray             │        B        │    run length 
of 'B' = run_ends[2] - run_ends[1] = 3
+///               length = 3                 └─────────────────┘
+///  
+///                                             Logical array
+///                                                Contents
+/// ```
+
+pub struct RunEndEncodedArray<R: ArrowRunEndIndexType> {

Review Comment:
   ```suggestion
   pub struct RunArray<R: ArrowRunEndIndexType> {
   ```
   
   I don't feel too strongly about this, but it occurs to me that it is 
`DictionaryArray` not `DictionaryEncodedArray`. I don't feel particularly 
strongly about this though



##########
arrow-array/src/array/run_end_encoded_array.rs:
##########
@@ -0,0 +1,519 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType, Field};
+
+use crate::{
+    builder::StringREEArrayBuilder,
+    make_array,
+    types::{ArrowRunEndIndexType, Int16Type, Int32Type, Int64Type},
+    Array, ArrayRef, PrimitiveArray,
+};
+
+///
+/// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
+/// This encoding is good for representing data containing same values 
repeated consecutively
+/// called runs. Each run is represented by the value of data and the index at 
which the run ends.
+///
+/// [`RunEndEncodedArray`] has `run_ends` array and `values` array of same 
length.
+/// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
+/// stores the value of the run. Below example illustrates how a logical array 
is represented in
+/// [`RunEndEncodedArray`]

Review Comment:
   ```suggestion
   /// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
   ///
   /// This encoding is good for representing data containing same values 
repeated consecutively.
   ///
   /// [`RunEndEncodedArray`] contains `run_ends` array and `values` array of 
same length.
   /// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
   /// stores the value of each run. Below example illustrates how a logical 
array is represented in
   /// [`RunEndEncodedArray`]
   ```



##########
arrow-array/src/array/run_end_encoded_array.rs:
##########
@@ -0,0 +1,519 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType, Field};
+
+use crate::{
+    builder::StringREEArrayBuilder,
+    make_array,
+    types::{ArrowRunEndIndexType, Int16Type, Int32Type, Int64Type},
+    Array, ArrayRef, PrimitiveArray,
+};
+
+///
+/// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
+/// This encoding is good for representing data containing same values 
repeated consecutively
+/// called runs. Each run is represented by the value of data and the index at 
which the run ends.
+///
+/// [`RunEndEncodedArray`] has `run_ends` array and `values` array of same 
length.
+/// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
+/// stores the value of the run. Below example illustrates how a logical array 
is represented in
+/// [`RunEndEncodedArray`]
+///
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
+///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
+/// │ │        A        │  │    2    │ │     │        A        │     
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        D        │  │    3    │ │     │        A        │    run length 
of 'A' = runs_ends[0] - 0 = 2
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        B        │  │    6    │ │     │        D        │    run length 
of 'D' = run_ends[1] - run_ends[0] = 1
+///   └─────────────────┘  └─────────┘       ├─────────────────┤
+/// │        values          run_ends  │     │        B        │     
+///                                          ├─────────────────┤
+/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │     
+///                                          ├─────────────────┤
+///           RunEndEncodedArray             │        B        │    run length 
of 'B' = run_ends[2] - run_ends[1] = 3
+///               length = 3                 └─────────────────┘
+///  
+///                                             Logical array
+///                                                Contents
+/// ```
+
+pub struct RunEndEncodedArray<R: ArrowRunEndIndexType> {
+    data: ArrayData,
+    run_ends: PrimitiveArray<R>,
+    values: ArrayRef,
+}
+
+impl<R: ArrowRunEndIndexType> RunEndEncodedArray<R> {
+    /// Attempts to create RunEndEncodedArray using given run_ends (index 
where a run ends)
+    /// and the values (value of the run). Returns an error if the given data 
is not compatible
+    /// with RunEndEncoded specification.
+    pub fn try_new(
+        run_ends: &PrimitiveArray<R>,
+        values: &dyn Array,
+    ) -> Result<Self, ArrowError> {
+        let run_ends_type = run_ends.data_type().clone();
+        let values_type = values.data_type().clone();
+        let ree_array_type = DataType::RunEndEncoded(
+            Box::new(Field::new("run_ends", run_ends_type, false)),
+            Box::new(Field::new("values", values_type, true)),
+        );
+        let builder = ArrayDataBuilder::new(ree_array_type)
+            .add_child_data(run_ends.data().clone())
+            .add_child_data(values.data().clone());
+
+        // `build_unchecked` is used to avoid recursive validation of child 
arrays.
+        let array_data = unsafe { builder.build_unchecked() };
+
+        // Safety: `validate_data` checks below
+        //    1. run_ends array does not have null values
+        //    2. run_ends array has non-zero and strictly increasing values.
+        //    3. The length of run_ends array and values array are the same.
+        array_data.validate_data()?;

Review Comment:
   :+1: 



##########
arrow-array/src/array/run_end_encoded_array.rs:
##########
@@ -0,0 +1,519 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType, Field};
+
+use crate::{
+    builder::StringREEArrayBuilder,
+    make_array,
+    types::{ArrowRunEndIndexType, Int16Type, Int32Type, Int64Type},
+    Array, ArrayRef, PrimitiveArray,
+};
+
+///
+/// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
+/// This encoding is good for representing data containing same values 
repeated consecutively
+/// called runs. Each run is represented by the value of data and the index at 
which the run ends.
+///
+/// [`RunEndEncodedArray`] has `run_ends` array and `values` array of same 
length.
+/// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
+/// stores the value of the run. Below example illustrates how a logical array 
is represented in
+/// [`RunEndEncodedArray`]
+///
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
+///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
+/// │ │        A        │  │    2    │ │     │        A        │     
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        D        │  │    3    │ │     │        A        │    run length 
of 'A' = runs_ends[0] - 0 = 2
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        B        │  │    6    │ │     │        D        │    run length 
of 'D' = run_ends[1] - run_ends[0] = 1
+///   └─────────────────┘  └─────────┘       ├─────────────────┤
+/// │        values          run_ends  │     │        B        │     
+///                                          ├─────────────────┤
+/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │     
+///                                          ├─────────────────┤
+///           RunEndEncodedArray             │        B        │    run length 
of 'B' = run_ends[2] - run_ends[1] = 3
+///               length = 3                 └─────────────────┘
+///  
+///                                             Logical array
+///                                                Contents
+/// ```
+
+pub struct RunEndEncodedArray<R: ArrowRunEndIndexType> {
+    data: ArrayData,
+    run_ends: PrimitiveArray<R>,
+    values: ArrayRef,
+}
+
+impl<R: ArrowRunEndIndexType> RunEndEncodedArray<R> {
+    /// Attempts to create RunEndEncodedArray using given run_ends (index 
where a run ends)
+    /// and the values (value of the run). Returns an error if the given data 
is not compatible
+    /// with RunEndEncoded specification.
+    pub fn try_new(
+        run_ends: &PrimitiveArray<R>,
+        values: &dyn Array,
+    ) -> Result<Self, ArrowError> {
+        let run_ends_type = run_ends.data_type().clone();
+        let values_type = values.data_type().clone();
+        let ree_array_type = DataType::RunEndEncoded(
+            Box::new(Field::new("run_ends", run_ends_type, false)),
+            Box::new(Field::new("values", values_type, true)),
+        );
+        let builder = ArrayDataBuilder::new(ree_array_type)
+            .add_child_data(run_ends.data().clone())
+            .add_child_data(values.data().clone());
+
+        // `build_unchecked` is used to avoid recursive validation of child 
arrays.
+        let array_data = unsafe { builder.build_unchecked() };
+
+        // Safety: `validate_data` checks below
+        //    1. run_ends array does not have null values
+        //    2. run_ends array has non-zero and strictly increasing values.
+        //    3. The length of run_ends array and values array are the same.
+        array_data.validate_data()?;
+
+        Ok(array_data.into())
+    }
+    /// Returns a reference to run_ends array

Review Comment:
   ```suggestion
       /// Returns a reference to run_ends array
       ///
       /// Note: any slicing of this array is not applied to the returned array
       /// and must be handled separately
   ```



##########
arrow-array/src/builder/generic_byte_ree_array_builder.rs:
##########
@@ -0,0 +1,423 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    types::{
+        ArrowRunEndIndexType, BinaryType, ByteArrayType, LargeBinaryType, 
LargeUtf8Type,
+        Utf8Type,
+    },
+    ArrowPrimitiveType, RunEndEncodedArray,
+};
+
+use super::{GenericByteBuilder, PrimitiveBuilder};
+
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::ArrowError;
+
+/// Array builder for [`RunEndEncodedArray`] for String and Binary types.
+///
+/// # Example:
+///
+/// ```
+///
+/// # use arrow_array::builder::GenericByteREEArrayBuilder;
+/// # use arrow_array::{GenericByteArray, BinaryArray};
+/// # use arrow_array::types::{BinaryType, Int16Type};
+/// # use arrow_array::{Array, Int16Array};
+///
+/// let mut builder =
+/// GenericByteREEArrayBuilder::<Int16Type, BinaryType>::new();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_null().unwrap();
+/// builder.append_value(b"def").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///     array.run_ends(),
+///     &Int16Array::from(vec![Some(2), Some(3), Some(4)])
+/// );
+///
+/// let av = array.values();
+///
+/// assert!(!av.is_null(0));
+/// assert!(av.is_null(1));
+/// assert!(!av.is_null(2));
+///
+/// // Values are polymorphic and so require a downcast.
+/// let ava: &BinaryArray = av.as_any().downcast_ref::<BinaryArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), b"abc");
+/// assert_eq!(ava.value(2), b"def");
+/// ```
+#[derive(Debug)]
+pub struct GenericByteREEArrayBuilder<R, V>

Review Comment:
   ```suggestion
   pub struct GenericByteRunBuilder<R, V>
   ```
   
   Or something, I think `REE` is not a common enough initialism to use here. 
The builders also generally don't contain `Array` in the name 
https://docs.rs/arrow-array/latest/arrow_array/builder/index.html



##########
arrow-array/src/array/run_end_encoded_array.rs:
##########
@@ -0,0 +1,519 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType, Field};
+
+use crate::{
+    builder::StringREEArrayBuilder,
+    make_array,
+    types::{ArrowRunEndIndexType, Int16Type, Int32Type, Int64Type},
+    Array, ArrayRef, PrimitiveArray,
+};
+
+///
+/// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
+/// This encoding is good for representing data containing same values 
repeated consecutively
+/// called runs. Each run is represented by the value of data and the index at 
which the run ends.
+///
+/// [`RunEndEncodedArray`] has `run_ends` array and `values` array of same 
length.
+/// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
+/// stores the value of the run. Below example illustrates how a logical array 
is represented in
+/// [`RunEndEncodedArray`]
+///
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
+///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
+/// │ │        A        │  │    2    │ │     │        A        │     
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        D        │  │    3    │ │     │        A        │    run length 
of 'A' = runs_ends[0] - 0 = 2
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        B        │  │    6    │ │     │        D        │    run length 
of 'D' = run_ends[1] - run_ends[0] = 1
+///   └─────────────────┘  └─────────┘       ├─────────────────┤
+/// │        values          run_ends  │     │        B        │     
+///                                          ├─────────────────┤
+/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │     
+///                                          ├─────────────────┤
+///           RunEndEncodedArray             │        B        │    run length 
of 'B' = run_ends[2] - run_ends[1] = 3
+///               length = 3                 └─────────────────┘
+///  
+///                                             Logical array
+///                                                Contents
+/// ```
+
+pub struct RunEndEncodedArray<R: ArrowRunEndIndexType> {
+    data: ArrayData,
+    run_ends: PrimitiveArray<R>,
+    values: ArrayRef,
+}
+
+impl<R: ArrowRunEndIndexType> RunEndEncodedArray<R> {
+    /// Attempts to create RunEndEncodedArray using given run_ends (index 
where a run ends)
+    /// and the values (value of the run). Returns an error if the given data 
is not compatible
+    /// with RunEndEncoded specification.
+    pub fn try_new(
+        run_ends: &PrimitiveArray<R>,
+        values: &dyn Array,
+    ) -> Result<Self, ArrowError> {
+        let run_ends_type = run_ends.data_type().clone();
+        let values_type = values.data_type().clone();
+        let ree_array_type = DataType::RunEndEncoded(
+            Box::new(Field::new("run_ends", run_ends_type, false)),
+            Box::new(Field::new("values", values_type, true)),
+        );
+        let builder = ArrayDataBuilder::new(ree_array_type)
+            .add_child_data(run_ends.data().clone())
+            .add_child_data(values.data().clone());
+
+        // `build_unchecked` is used to avoid recursive validation of child 
arrays.
+        let array_data = unsafe { builder.build_unchecked() };
+
+        // Safety: `validate_data` checks below
+        //    1. run_ends array does not have null values
+        //    2. run_ends array has non-zero and strictly increasing values.
+        //    3. The length of run_ends array and values array are the same.
+        array_data.validate_data()?;
+
+        Ok(array_data.into())
+    }
+    /// Returns a reference to run_ends array
+    pub fn run_ends(&self) -> &PrimitiveArray<R> {
+        &self.run_ends
+    }
+
+    /// Returns a reference to values array
+    pub fn values(&self) -> &ArrayRef {
+        &self.values
+    }
+}
+
+impl<R: ArrowRunEndIndexType> From<ArrayData> for RunEndEncodedArray<R> {
+    fn from(data: ArrayData) -> Self {
+        match data.data_type() {
+            DataType::RunEndEncoded(run_ends_data_type, _) => {
+                assert_eq!(
+                    &R::DATA_TYPE,
+                    run_ends_data_type.data_type(),
+                    "Data type mismatch for run_ends array, expected {} got 
{}",
+                    R::DATA_TYPE,
+                    run_ends_data_type.data_type()
+                );
+            }
+            _ => {
+                panic!("Invalid data type for RunEndEncodedArray. The data 
type should be DataType::RunEndEncoded");
+            }
+        }
+
+        // Safety: `validate_data` checks below
+        //    1. The given array data has exactly two child arrays.
+        //    2. The first child array (run_ends) has valid data type.
+        //    3. run_ends array does not have null values
+        //    4. run_ends array has non-zero and strictly increasing values.
+        //    5. The length of run_ends array and values array are the same.
+        data.validate_data().unwrap();
+
+        let run_ends = PrimitiveArray::<R>::from(data.child_data()[0].clone());
+        let values = make_array(data.child_data()[1].clone());
+        Self {
+            data,
+            run_ends,
+            values,
+        }
+    }
+}
+
+impl<R: ArrowRunEndIndexType> From<RunEndEncodedArray<R>> for ArrayData {
+    fn from(array: RunEndEncodedArray<R>) -> Self {
+        array.data
+    }
+}
+
+impl<T: ArrowRunEndIndexType> Array for RunEndEncodedArray<T> {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data(&self) -> &ArrayData {
+        &self.data
+    }
+
+    fn into_data(self) -> ArrayData {
+        self.into()
+    }
+}
+
+impl<R: ArrowRunEndIndexType> std::fmt::Debug for RunEndEncodedArray<R> {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        writeln!(
+            f,
+            "RunEndEncodedArray {{run_ends: {:?}, values: {:?}}}",
+            self.run_ends, self.values
+        )
+    }
+}
+
+/// Constructs a `RunEndEncodedArray` from an iterator of optional strings.
+///
+/// # Example:
+/// ```
+/// use arrow_array::{RunEndEncodedArray, PrimitiveArray, StringArray, 
types::Int16Type};
+///
+/// let test = vec!["a", "a", "b", "c", "c"];
+/// let array: RunEndEncodedArray<Int16Type> = test
+///     .iter()
+///     .map(|&x| if x == "b" { None } else { Some(x) })
+///     .collect();
+/// assert_eq!(
+///     "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  3,\n  
5,\n], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
+///     format!("{:?}", array)
+/// );
+/// ```
+impl<'a, T: ArrowRunEndIndexType> FromIterator<Option<&'a str>>
+    for RunEndEncodedArray<T>
+{
+    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
+        let it = iter.into_iter();
+        let (lower, _) = it.size_hint();
+        let mut builder = StringREEArrayBuilder::with_capacity(lower, 256);
+        it.for_each(|i| {
+            if let Some(i) = i {
+                builder
+                    .append_value(i)
+                    .expect("Unable to append a value to a run end encoded 
array.");
+            } else {
+                builder
+                    .append_null()
+                    .expect("Unable to append null value to run end encoded 
array.");
+            }
+        });
+
+        builder.finish()
+    }
+}
+
+/// Constructs a `RunEndEncodedArray` from an iterator of strings.
+///
+/// # Example:
+///
+/// ```
+/// use arrow_array::{RunEndEncodedArray, PrimitiveArray, StringArray, 
types::Int16Type};
+///
+/// let test = vec!["a", "a", "b", "c"];
+/// let array: RunEndEncodedArray<Int16Type> = test.into_iter().collect();
+/// assert_eq!(
+///     "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  3,\n  
4,\n], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
+///     format!("{:?}", array)
+/// );
+/// ```
+impl<'a, T: ArrowRunEndIndexType> FromIterator<&'a str> for 
RunEndEncodedArray<T> {
+    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
+        let it = iter.into_iter();
+        let (lower, _) = it.size_hint();
+        let mut builder = StringREEArrayBuilder::with_capacity(lower, 256);
+        it.for_each(|i| {
+            builder
+                .append_value(i)
+                .expect("Unable to append a value to a dictionary array.");
+        });
+
+        builder.finish()
+    }
+}
+
+///
+/// A [`RunEndEncodedArray`] array where run ends are stored using `i16` data 
type.
+///
+/// # Example: Using `collect`
+/// ```
+/// # use arrow_array::{Array, Int16RunEndEncodedArray, Int16Array, 
StringArray};
+/// # use std::sync::Arc;
+///
+/// let array: Int16RunEndEncodedArray = vec!["a", "a", "b", "c", 
"c"].into_iter().collect();
+/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+/// assert_eq!(array.run_ends(), &Int16Array::from(vec![2, 3, 5]));
+/// assert_eq!(array.values(), &values);
+/// ```
+pub type Int16RunEndEncodedArray = RunEndEncodedArray<Int16Type>;
+
+///
+/// A [`RunEndEncodedArray`] array where run ends are stored using `i32` data 
type.
+///
+/// # Example: Using `collect`
+/// ```
+/// # use arrow_array::{Array, Int32RunEndEncodedArray, Int32Array, 
StringArray};
+/// # use std::sync::Arc;
+///
+/// let array: Int32RunEndEncodedArray = vec!["a", "a", "b", "c", 
"c"].into_iter().collect();
+/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+/// assert_eq!(array.run_ends(), &Int32Array::from(vec![2, 3, 5]));
+/// assert_eq!(array.values(), &values);
+/// ```
+pub type Int32RunEndEncodedArray = RunEndEncodedArray<Int32Type>;
+
+///
+/// A [`RunEndEncodedArray`] array where run ends are stored using `i64` data 
type.
+///
+/// # Example: Using `collect`
+/// ```
+/// # use arrow_array::{Array, Int64RunEndEncodedArray, Int64Array, 
StringArray};
+/// # use std::sync::Arc;
+///
+/// let array: Int64RunEndEncodedArray = vec!["a", "a", "b", "c", 
"c"].into_iter().collect();
+/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+/// assert_eq!(array.run_ends(), &Int64Array::from(vec![2, 3, 5]));
+/// assert_eq!(array.values(), &values);
+/// ```
+pub type Int64RunEndEncodedArray = RunEndEncodedArray<Int64Type>;
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use super::*;
+    use crate::builder::PrimitiveREEArrayBuilder;
+    use crate::types::{Int16Type, Int32Type, UInt32Type};
+    use crate::{Array, Int16Array, Int32Array, StringArray};
+    use arrow_buffer::{Buffer, ToByteSlice};
+    use arrow_schema::Field;
+
+    #[test]
+    fn test_ree_array() {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int8)
+            .len(8)
+            .add_buffer(Buffer::from(
+                &[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
+            ))
+            .build()
+            .unwrap();
+
+        // Construct a run_ends array:
+        let run_ends_data = ArrayData::builder(DataType::Int16)
+            .len(8)
+            .add_buffer(Buffer::from(
+                &[4_i16, 6, 7, 9, 13, 18, 20, 22].to_byte_slice(),
+            ))
+            .build()
+            .unwrap();
+
+        // Construct a run ends encoded array from the above two
+        let run_ends_type = Field::new("run_ends", DataType::Int16, false);
+        let value_type = Field::new("values", DataType::Int8, true);
+        let ree_array_type =
+            DataType::RunEndEncoded(Box::new(run_ends_type), 
Box::new(value_type));
+        let dict_data = ArrayData::builder(ree_array_type)
+            .add_child_data(run_ends_data.clone())
+            .add_child_data(value_data.clone())
+            .build()
+            .unwrap();
+        let ree_array = Int16RunEndEncodedArray::from(dict_data);
+
+        let values = ree_array.values();
+        assert_eq!(&value_data, values.data());
+        assert_eq!(&DataType::Int8, values.data_type());
+
+        let run_ends = ree_array.run_ends();
+        assert_eq!(&run_ends_data, run_ends.data());
+        assert_eq!(&DataType::Int16, run_ends.data_type());
+    }
+
+    #[test]
+    fn test_ree_array_fmt_debug() {
+        let mut builder =
+            PrimitiveREEArrayBuilder::<Int16Type, 
UInt32Type>::with_capacity(3);
+        builder.append_value(12345678).unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(22345678).unwrap();
+        let array = builder.finish();
+        assert_eq!(
+            "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  1,\n  
2,\n  3,\n], values: PrimitiveArray<UInt32>\n[\n  12345678,\n  null,\n  
22345678,\n]}\n",
+            format!("{:?}", array)
+        );
+
+        let mut builder =
+            PrimitiveREEArrayBuilder::<Int16Type, 
UInt32Type>::with_capacity(20);
+        for _ in 0..20 {
+            builder.append_value(1).unwrap();
+        }
+        let array = builder.finish();
+        assert_eq!(
+            "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  20,\n], 
values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
+            format!("{:?}", array)
+        );
+    }
+
+    #[test]
+    fn test_ree_array_from_iter() {
+        let test = vec!["a", "a", "b", "c"];
+        let array: RunEndEncodedArray<Int16Type> = test
+            .iter()
+            .map(|&x| if x == "b" { None } else { Some(x) })
+            .collect();
+        assert_eq!(
+            "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  
3,\n  4,\n], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
+            format!("{:?}", array)
+        );
+
+        let array: RunEndEncodedArray<Int16Type> = test.into_iter().collect();
+        assert_eq!(
+            "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  
3,\n  4,\n], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
+            format!("{:?}", array)
+        );
+    }
+
+    #[test]
+    fn test_ree_array_run_ends_as_primitive_array() {
+        let test = vec!["a", "b", "c", "a"];
+        let array: RunEndEncodedArray<Int16Type> = test.into_iter().collect();
+
+        let run_ends = array.run_ends();
+        assert_eq!(&DataType::Int16, run_ends.data_type());
+        assert_eq!(0, run_ends.null_count());
+        assert_eq!(&[1, 2, 3, 4], run_ends.values());
+    }
+
+    #[test]
+    fn test_ree_array_as_primitive_array_with_null() {
+        let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
+        let array: RunEndEncodedArray<Int32Type> = test.into_iter().collect();
+
+        let run_ends = array.run_ends();
+        assert_eq!(&DataType::Int32, run_ends.data_type());
+        assert_eq!(0, run_ends.null_count());
+        assert_eq!(5, run_ends.len());
+        assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
+
+        let values_data = array.values();
+        assert_eq!(2, values_data.null_count());
+        assert_eq!(5, values_data.len());
+    }
+
+    #[test]
+    fn test_ree_array_all_nulls() {
+        let test = vec![None, None, None];
+        let array: RunEndEncodedArray<Int32Type> = test.into_iter().collect();
+
+        let run_ends = array.run_ends();
+        assert_eq!(1, run_ends.len());
+        assert_eq!(&[3], run_ends.values());
+
+        let values_data = array.values();
+        assert_eq!(1, values_data.null_count());

Review Comment:
   Perhaps we could also add `assert_eq!(array.null_count(), 2);`
   
   Technically this is what the specification states, I happen to think this is 
a little bit strange and will ask for clarification on the mailing list
   



##########
arrow-array/src/builder/generic_byte_ree_array_builder.rs:
##########
@@ -0,0 +1,423 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    types::{
+        ArrowRunEndIndexType, BinaryType, ByteArrayType, LargeBinaryType, 
LargeUtf8Type,
+        Utf8Type,
+    },
+    ArrowPrimitiveType, RunEndEncodedArray,
+};
+
+use super::{GenericByteBuilder, PrimitiveBuilder};
+
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::ArrowError;
+
+/// Array builder for [`RunEndEncodedArray`] for String and Binary types.
+///
+/// # Example:
+///
+/// ```
+///
+/// # use arrow_array::builder::GenericByteREEArrayBuilder;
+/// # use arrow_array::{GenericByteArray, BinaryArray};
+/// # use arrow_array::types::{BinaryType, Int16Type};
+/// # use arrow_array::{Array, Int16Array};
+///
+/// let mut builder =
+/// GenericByteREEArrayBuilder::<Int16Type, BinaryType>::new();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_null().unwrap();
+/// builder.append_value(b"def").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///     array.run_ends(),
+///     &Int16Array::from(vec![Some(2), Some(3), Some(4)])
+/// );
+///
+/// let av = array.values();
+///
+/// assert!(!av.is_null(0));
+/// assert!(av.is_null(1));
+/// assert!(!av.is_null(2));
+///
+/// // Values are polymorphic and so require a downcast.
+/// let ava: &BinaryArray = av.as_any().downcast_ref::<BinaryArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), b"abc");
+/// assert_eq!(ava.value(2), b"def");
+/// ```
+#[derive(Debug)]
+pub struct GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    run_ends_builder: PrimitiveBuilder<R>,
+    values_builder: GenericByteBuilder<V>,
+    current_value: Option<Vec<u8>>,
+    current_run_end_index: usize,
+}
+
+impl<R, V> Default for GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<R, V> GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    /// Creates a new `GenericByteREEArrayBuilder`
+    pub fn new() -> Self {
+        Self {
+            run_ends_builder: PrimitiveBuilder::new(),
+            values_builder: GenericByteBuilder::<V>::new(),
+            current_value: None,
+            current_run_end_index: 0,
+        }
+    }
+
+    /// Creates a new `GenericByteREEArrayBuilder` with the provided capacity
+    ///
+    /// `capacity`: the expected number of run-end encoded values.
+    /// `data_capacity`: the expected number of bytes of run end encoded values
+    pub fn with_capacity(capacity: usize, data_capacity: usize) -> Self {
+        Self {
+            run_ends_builder: PrimitiveBuilder::with_capacity(capacity),
+            values_builder: GenericByteBuilder::<V>::with_capacity(
+                capacity,
+                data_capacity,
+            ),
+            current_value: None,
+            current_run_end_index: 0,
+        }
+    }
+}
+
+impl<R, V> GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowRunEndIndexType,
+    V: ByteArrayType,
+{
+    /// Appends optional value to the logical array encoded by the 
RunEndEncodedArray.
+    pub fn append_option(
+        &mut self,
+        input_value: Option<impl AsRef<V::Native>>,
+    ) -> Result<(), ArrowError> {
+        match input_value {
+            Some(value) => self.append_value(value)?,
+            None => self.append_null()?,
+        }
+        Ok(())
+    }
+
+    /// Appends value to the logical array encoded by the RunEndEncodedArray.
+    pub fn append_value(
+        &mut self,
+        input_value: impl AsRef<V::Native>,
+    ) -> Result<(), ArrowError> {
+        let value: &[u8] = input_value.as_ref().as_ref();
+        match self.current_value.as_deref() {
+            None if self.current_run_end_index > 0 => {
+                self.append_run_end()?;
+                self.current_value = Some(value.to_owned());

Review Comment:
   Might be a premature optimisation, but it occurs to me that discarding the 
`Vec<u8>` every single time and allocating a new one is quite wasteful. It 
might be better to instead reuse the same `Vec<u8>` and just clear it. With a 
separate `bool` recording occupancy.



##########
arrow-array/src/builder/generic_byte_ree_array_builder.rs:
##########
@@ -0,0 +1,423 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    types::{
+        ArrowRunEndIndexType, BinaryType, ByteArrayType, LargeBinaryType, 
LargeUtf8Type,
+        Utf8Type,
+    },
+    ArrowPrimitiveType, RunEndEncodedArray,
+};
+
+use super::{GenericByteBuilder, PrimitiveBuilder};
+
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::ArrowError;
+
+/// Array builder for [`RunEndEncodedArray`] for String and Binary types.
+///
+/// # Example:
+///
+/// ```
+///
+/// # use arrow_array::builder::GenericByteREEArrayBuilder;
+/// # use arrow_array::{GenericByteArray, BinaryArray};
+/// # use arrow_array::types::{BinaryType, Int16Type};
+/// # use arrow_array::{Array, Int16Array};
+///
+/// let mut builder =
+/// GenericByteREEArrayBuilder::<Int16Type, BinaryType>::new();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_null().unwrap();
+/// builder.append_value(b"def").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///     array.run_ends(),
+///     &Int16Array::from(vec![Some(2), Some(3), Some(4)])
+/// );
+///
+/// let av = array.values();
+///
+/// assert!(!av.is_null(0));
+/// assert!(av.is_null(1));
+/// assert!(!av.is_null(2));
+///
+/// // Values are polymorphic and so require a downcast.
+/// let ava: &BinaryArray = av.as_any().downcast_ref::<BinaryArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), b"abc");
+/// assert_eq!(ava.value(2), b"def");
+/// ```
+#[derive(Debug)]
+pub struct GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    run_ends_builder: PrimitiveBuilder<R>,
+    values_builder: GenericByteBuilder<V>,
+    current_value: Option<Vec<u8>>,

Review Comment:
   ```suggestion
       current_value: Option<Vec<u8>>,
   ```
   
   I think this needs to use some `V::Owned` where something like the following 
is added to `ByteArrayType`
   
   ```
   type Owned: From<Self::Native> + AsRef<Self::Native>;
   ```



##########
arrow-array/src/builder/generic_bytes_builder.rs:
##########
@@ -90,6 +90,15 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
             .append(T::Offset::from_usize(self.value_builder.len()).unwrap());
     }
 
+    /// Appends a byte array slice into the builder.

Review Comment:
   This is unsound, it would allow appending non-UTF-8 data to a StringArray



##########
arrow-array/src/builder/primitive_ree_array_builder.rs:
##########
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{types::ArrowRunEndIndexType, ArrowPrimitiveType, 
RunEndEncodedArray};
+
+use super::PrimitiveBuilder;
+
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::ArrowError;
+
+/// Array builder for [`RunEndEncodedArray`] that encodes primitive values.
+///
+/// # Example:
+///
+/// ```
+///
+/// # use arrow_array::builder::PrimitiveREEArrayBuilder;
+/// # use arrow_array::types::{UInt32Type, Int16Type};
+/// # use arrow_array::{Array, UInt32Array, Int16Array};
+///
+/// let mut builder =
+/// PrimitiveREEArrayBuilder::<Int16Type, UInt32Type>::new();
+/// builder.append_value(1234).unwrap();
+/// builder.append_value(1234).unwrap();
+/// builder.append_value(1234).unwrap();
+/// builder.append_null().unwrap();
+/// builder.append_value(5678).unwrap();
+/// builder.append_value(5678).unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///     array.run_ends(),
+///     &Int16Array::from(vec![Some(3), Some(4), Some(6)])
+/// );
+///
+/// let av = array.values();
+///
+/// assert!(!av.is_null(0));
+/// assert!(av.is_null(1));
+/// assert!(!av.is_null(2));
+///
+/// // Values are polymorphic and so require a downcast.
+/// let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap();

Review Comment:
   ```suggestion
   /// let ava0 = as_primitive_array::<UInt32Type>(av.as_ref());
   ```



##########
arrow-array/src/builder/generic_byte_ree_array_builder.rs:
##########
@@ -0,0 +1,423 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::{
+    types::{
+        ArrowRunEndIndexType, BinaryType, ByteArrayType, LargeBinaryType, 
LargeUtf8Type,
+        Utf8Type,
+    },
+    ArrowPrimitiveType, RunEndEncodedArray,
+};
+
+use super::{GenericByteBuilder, PrimitiveBuilder};
+
+use arrow_buffer::ArrowNativeType;
+use arrow_schema::ArrowError;
+
+/// Array builder for [`RunEndEncodedArray`] for String and Binary types.
+///
+/// # Example:
+///
+/// ```
+///
+/// # use arrow_array::builder::GenericByteREEArrayBuilder;
+/// # use arrow_array::{GenericByteArray, BinaryArray};
+/// # use arrow_array::types::{BinaryType, Int16Type};
+/// # use arrow_array::{Array, Int16Array};
+///
+/// let mut builder =
+/// GenericByteREEArrayBuilder::<Int16Type, BinaryType>::new();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_null().unwrap();
+/// builder.append_value(b"def").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///     array.run_ends(),
+///     &Int16Array::from(vec![Some(2), Some(3), Some(4)])
+/// );
+///
+/// let av = array.values();
+///
+/// assert!(!av.is_null(0));
+/// assert!(av.is_null(1));
+/// assert!(!av.is_null(2));
+///
+/// // Values are polymorphic and so require a downcast.
+/// let ava: &BinaryArray = av.as_any().downcast_ref::<BinaryArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), b"abc");
+/// assert_eq!(ava.value(2), b"def");
+/// ```
+#[derive(Debug)]
+pub struct GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    run_ends_builder: PrimitiveBuilder<R>,
+    values_builder: GenericByteBuilder<V>,
+    current_value: Option<Vec<u8>>,
+    current_run_end_index: usize,
+}
+
+impl<R, V> Default for GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<R, V> GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowPrimitiveType,
+    V: ByteArrayType,
+{
+    /// Creates a new `GenericByteREEArrayBuilder`
+    pub fn new() -> Self {
+        Self {
+            run_ends_builder: PrimitiveBuilder::new(),
+            values_builder: GenericByteBuilder::<V>::new(),
+            current_value: None,
+            current_run_end_index: 0,
+        }
+    }
+
+    /// Creates a new `GenericByteREEArrayBuilder` with the provided capacity
+    ///
+    /// `capacity`: the expected number of run-end encoded values.
+    /// `data_capacity`: the expected number of bytes of run end encoded values
+    pub fn with_capacity(capacity: usize, data_capacity: usize) -> Self {
+        Self {
+            run_ends_builder: PrimitiveBuilder::with_capacity(capacity),
+            values_builder: GenericByteBuilder::<V>::with_capacity(
+                capacity,
+                data_capacity,
+            ),
+            current_value: None,
+            current_run_end_index: 0,
+        }
+    }
+}
+
+impl<R, V> GenericByteREEArrayBuilder<R, V>
+where
+    R: ArrowRunEndIndexType,
+    V: ByteArrayType,
+{
+    /// Appends optional value to the logical array encoded by the 
RunEndEncodedArray.
+    pub fn append_option(
+        &mut self,
+        input_value: Option<impl AsRef<V::Native>>,
+    ) -> Result<(), ArrowError> {
+        match input_value {
+            Some(value) => self.append_value(value)?,
+            None => self.append_null()?,
+        }
+        Ok(())
+    }
+
+    /// Appends value to the logical array encoded by the RunEndEncodedArray.
+    pub fn append_value(
+        &mut self,
+        input_value: impl AsRef<V::Native>,
+    ) -> Result<(), ArrowError> {
+        let value: &[u8] = input_value.as_ref().as_ref();
+        match self.current_value.as_deref() {
+            None if self.current_run_end_index > 0 => {
+                self.append_run_end()?;
+                self.current_value = Some(value.to_owned());
+            }
+            None if self.current_run_end_index == 0 => {
+                self.current_value = Some(value.to_owned());
+            }
+            Some(current_value) if current_value != value => {
+                self.append_run_end()?;
+                self.current_value = Some(value.to_owned());
+            }
+            _ => {}
+        }
+        self.current_run_end_index = self
+            .current_run_end_index
+            .checked_add(1)
+            .ok_or(ArrowError::RunEndIndexOverflowError)?;
+        Ok(())
+    }
+
+    /// Appends null to the logical array encoded by the RunEndEncodedArray.
+    pub fn append_null(&mut self) -> Result<(), ArrowError> {
+        if self.current_value.is_some() {
+            self.append_run_end()?;
+            self.current_value = None;
+        }
+        self.current_run_end_index = self
+            .current_run_end_index
+            .checked_add(1)
+            .ok_or(ArrowError::RunEndIndexOverflowError)?;
+        Ok(())
+    }
+
+    /// Creates the RunEndEncodedArray and resets the builder.
+    /// Panics if RunEndEncodedArray cannot be built.
+    pub fn finish(&mut self) -> RunEndEncodedArray<R> {
+        // write the last run end to the array.
+        self.append_run_end().unwrap();
+
+        // reset the run end index to zero.
+        self.current_value = None;
+        self.current_run_end_index = 0;
+
+        // build the run encoded array by adding run_ends and values array as 
its children.
+        let run_ends_array = self.run_ends_builder.finish();
+        let values_array = self.values_builder.finish();
+        RunEndEncodedArray::<R>::try_new(&run_ends_array, 
&values_array).unwrap()
+    }
+
+    /// Creates the RunEndEncodedArray and without resetting the builder.
+    /// Panics if RunEndEncodedArray cannot be built.
+    pub fn finish_cloned(&mut self) -> RunEndEncodedArray<R> {
+        // write the last run end to the array.
+        self.append_run_end().unwrap();
+
+        // build the run encoded array by adding run_ends and values array as 
its children.
+        let run_ends_array = self.run_ends_builder.finish_cloned();
+        let values_array = self.values_builder.finish_cloned();
+        RunEndEncodedArray::<R>::try_new(&run_ends_array, 
&values_array).unwrap()
+    }
+
+    // Appends the current run to the array
+    fn append_run_end(&mut self) -> Result<(), ArrowError> {
+        let run_end_index = R::Native::from_usize(self.current_run_end_index)
+            .ok_or_else(|| {
+                ArrowError::ParseError(format!(
+                    "Cannot convert the value {} from `usize` to native form 
of arrow datatype {}",
+                    self.current_run_end_index,
+                    R::DATA_TYPE
+                ))
+            })?;
+        self.run_ends_builder.append_value(run_end_index);
+        match self.current_value.as_deref() {
+            Some(value) => self.values_builder.append_slice(value),
+            None => self.values_builder.append_null(),
+        }
+        Ok(())
+    }
+}
+
+/// Array builder for [`RunEndEncodedArray`] that encodes strings 
([`Utf8Type`]).
+///
+/// ```
+/// // Create a run-end encoded array with run-end indexes data type as `i16`.
+/// // The encoded values are Strings.
+///
+/// # use arrow_array::builder::StringREEArrayBuilder;
+/// # use arrow_array::{Int16Array, StringArray};
+/// # use arrow_array::types::Int16Type;
+///
+/// let mut builder = StringREEArrayBuilder::<Int16Type>::new();
+///
+/// // The builder builds the dictionary value by value
+/// builder.append_value("abc").unwrap();
+/// builder.append_null();
+/// builder.append_value("def").unwrap();
+/// builder.append_value("def").unwrap();
+/// builder.append_value("abc").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///   array.run_ends(),
+///   &Int16Array::from(vec![Some(1), Some(2), Some(4), Some(5)])
+/// );
+///
+/// // Values are polymorphic and so require a downcast.
+/// let av = array.values();
+/// let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), "abc");
+/// assert!(av.is_null(1));
+/// assert_eq!(ava.value(2), "def");
+/// assert_eq!(ava.value(3), "abc");
+///
+/// ```
+pub type StringREEArrayBuilder<K> = GenericByteREEArrayBuilder<K, Utf8Type>;
+
+/// Array builder for [`RunEndEncodedArray`] that encodes large strings 
([`LargeUtf8Type`]). See [`StringREEArrayBuilder`] for an example.
+pub type LargeStringREEArrayBuilder<K> = GenericByteREEArrayBuilder<K, 
LargeUtf8Type>;
+
+/// Array builder for [`RunEndEncodedArray`] that encodes binary 
values([`BinaryType`]).
+///
+/// ```
+/// // Create a run-end encoded array with run-end indexes data type as `i16`.
+/// // The encoded data is binary values.
+///
+/// # use arrow_array::builder::BinaryREEArrayBuilder;
+/// # use arrow_array::{BinaryArray, Int16Array};
+/// # use arrow_array::types::Int16Type;
+///
+/// let mut builder = BinaryREEArrayBuilder::<Int16Type>::new();
+///
+/// // The builder builds the dictionary value by value
+/// builder.append_value(b"abc").unwrap();
+/// builder.append_null();
+/// builder.append_value(b"def").unwrap();
+/// builder.append_value(b"def").unwrap();
+/// builder.append_value(b"abc").unwrap();
+/// let array = builder.finish();
+///
+/// assert_eq!(
+///   array.run_ends(),
+///   &Int16Array::from(vec![Some(1), Some(2), Some(4), Some(5)])
+/// );
+///
+/// // Values are polymorphic and so require a downcast.
+/// let av = array.values();
+/// let ava: &BinaryArray = av.as_any().downcast_ref::<BinaryArray>().unwrap();
+///
+/// assert_eq!(ava.value(0), b"abc");
+/// assert!(av.is_null(1));
+/// assert_eq!(ava.value(2), b"def");
+/// assert_eq!(ava.value(3), b"abc");
+///
+/// ```
+pub type BinaryREEArrayBuilder<K> = GenericByteREEArrayBuilder<K, BinaryType>;
+
+/// Array builder for [`RunEndEncodedArray`] that encodes large binary 
values([`LargeBinaryType`]).
+/// See documentation of [`BinaryREEArrayBuilder`] for an example.
+pub type LargeBinaryREEArrayBuilder<K> = GenericByteREEArrayBuilder<K, 
LargeBinaryType>;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::array::Array;
+    use crate::types::Int16Type;
+    use crate::GenericByteArray;
+    use crate::Int16Array;
+
+    fn test_bytes_ree_array_buider<T>(values: Vec<&T::Native>)

Review Comment:
   ```suggestion
       fn test_bytes_ree_array_buider<T>(values: [&T::Native; 2])
   ```
   
   Or something, at least to me it isn't clear from the signature that anything 
beyond the first two elements will be ignored



##########
arrow-array/src/types.rs:
##########
@@ -240,6 +244,22 @@ impl ArrowDictionaryKeyType for UInt32Type {}
 
 impl ArrowDictionaryKeyType for UInt64Type {}
 
+/// A subtype of primitive type that is used as run-ends index
+/// in RunEndEncodedArray.
+/// See <https://arrow.apache.org/docs/format/Columnar.html>
+///
+/// # Sealed: The implementation of this trait is sealed to avoid accidental 
misuse.
+pub trait ArrowRunEndIndexType: ArrowPrimitiveType + private::Sealed {}

Review Comment:
   The `Arrow` prefix is kind of unfortunate, we have currently use both e.g. 
`ArrowDictionaryKeyType` but then `OffsetSizeTrait` or `ByteArrayType`. The 
latter is more "rusty" but I don't feel especially strongly



##########
arrow-array/src/types.rs:
##########
@@ -240,6 +244,22 @@ impl ArrowDictionaryKeyType for UInt32Type {}
 
 impl ArrowDictionaryKeyType for UInt64Type {}
 
+/// A subtype of primitive type that is used as run-ends index
+/// in RunEndEncodedArray.
+/// See <https://arrow.apache.org/docs/format/Columnar.html>
+///
+/// # Sealed: The implementation of this trait is sealed to avoid accidental 
misuse.

Review Comment:
   ```suggestion
   /// Note: The implementation of this trait is sealed to avoid accidental 
misuse.
   ```



##########
arrow-array/src/types.rs:
##########
@@ -32,6 +32,10 @@ use half::f16;
 use std::marker::PhantomData;
 use std::ops::{Add, Sub};
 
+mod private {
+    pub trait Sealed {}
+}

Review Comment:
   ```suggestion
   mod run {
       pub trait RunEndTypeSealed {}
       
       impl RunEndTypeSealed for Int16Type {}
       impl RunEndTypeSealed for Int32Type {}
       impl RunEndTypeSealed for Int64Type {}
   }
   ```
   
   To be consistent with other types



##########
arrow-array/src/array/run_end_encoded_array.rs:
##########
@@ -0,0 +1,519 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+
+use arrow_data::{ArrayData, ArrayDataBuilder};
+use arrow_schema::{ArrowError, DataType, Field};
+
+use crate::{
+    builder::StringREEArrayBuilder,
+    make_array,
+    types::{ArrowRunEndIndexType, Int16Type, Int32Type, Int64Type},
+    Array, ArrayRef, PrimitiveArray,
+};
+
+///
+/// A run-end encoding (REE) is a variation of [run-length encoding 
(RLE)](https://en.wikipedia.org/wiki/Run-length_encoding).
+/// This encoding is good for representing data containing same values 
repeated consecutively
+/// called runs. Each run is represented by the value of data and the index at 
which the run ends.
+///
+/// [`RunEndEncodedArray`] has `run_ends` array and `values` array of same 
length.
+/// The `run_ends` array stores the indexes at which the run ends. The 
`values` array
+/// stores the value of the run. Below example illustrates how a logical array 
is represented in
+/// [`RunEndEncodedArray`]
+///
+///
+/// ```text
+/// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┐
+///   ┌─────────────────┐  ┌─────────┐       ┌─────────────────┐
+/// │ │        A        │  │    2    │ │     │        A        │     
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        D        │  │    3    │ │     │        A        │    run length 
of 'A' = runs_ends[0] - 0 = 2
+///   ├─────────────────┤  ├─────────┤       ├─────────────────┤
+/// │ │        B        │  │    6    │ │     │        D        │    run length 
of 'D' = run_ends[1] - run_ends[0] = 1
+///   └─────────────────┘  └─────────┘       ├─────────────────┤
+/// │        values          run_ends  │     │        B        │     
+///                                          ├─────────────────┤
+/// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─┘     │        B        │     
+///                                          ├─────────────────┤
+///           RunEndEncodedArray             │        B        │    run length 
of 'B' = run_ends[2] - run_ends[1] = 3
+///               length = 3                 └─────────────────┘
+///  
+///                                             Logical array
+///                                                Contents
+/// ```
+
+pub struct RunEndEncodedArray<R: ArrowRunEndIndexType> {
+    data: ArrayData,
+    run_ends: PrimitiveArray<R>,
+    values: ArrayRef,
+}
+
+impl<R: ArrowRunEndIndexType> RunEndEncodedArray<R> {
+    /// Attempts to create RunEndEncodedArray using given run_ends (index 
where a run ends)
+    /// and the values (value of the run). Returns an error if the given data 
is not compatible
+    /// with RunEndEncoded specification.
+    pub fn try_new(
+        run_ends: &PrimitiveArray<R>,
+        values: &dyn Array,
+    ) -> Result<Self, ArrowError> {
+        let run_ends_type = run_ends.data_type().clone();
+        let values_type = values.data_type().clone();
+        let ree_array_type = DataType::RunEndEncoded(
+            Box::new(Field::new("run_ends", run_ends_type, false)),
+            Box::new(Field::new("values", values_type, true)),
+        );
+        let builder = ArrayDataBuilder::new(ree_array_type)
+            .add_child_data(run_ends.data().clone())
+            .add_child_data(values.data().clone());
+
+        // `build_unchecked` is used to avoid recursive validation of child 
arrays.
+        let array_data = unsafe { builder.build_unchecked() };
+
+        // Safety: `validate_data` checks below
+        //    1. run_ends array does not have null values
+        //    2. run_ends array has non-zero and strictly increasing values.
+        //    3. The length of run_ends array and values array are the same.
+        array_data.validate_data()?;
+
+        Ok(array_data.into())
+    }
+    /// Returns a reference to run_ends array
+    pub fn run_ends(&self) -> &PrimitiveArray<R> {
+        &self.run_ends
+    }
+
+    /// Returns a reference to values array
+    pub fn values(&self) -> &ArrayRef {
+        &self.values
+    }
+}
+
+impl<R: ArrowRunEndIndexType> From<ArrayData> for RunEndEncodedArray<R> {
+    fn from(data: ArrayData) -> Self {
+        match data.data_type() {
+            DataType::RunEndEncoded(run_ends_data_type, _) => {
+                assert_eq!(
+                    &R::DATA_TYPE,
+                    run_ends_data_type.data_type(),
+                    "Data type mismatch for run_ends array, expected {} got 
{}",
+                    R::DATA_TYPE,
+                    run_ends_data_type.data_type()
+                );
+            }
+            _ => {
+                panic!("Invalid data type for RunEndEncodedArray. The data 
type should be DataType::RunEndEncoded");
+            }
+        }
+
+        // Safety: `validate_data` checks below
+        //    1. The given array data has exactly two child arrays.
+        //    2. The first child array (run_ends) has valid data type.
+        //    3. run_ends array does not have null values
+        //    4. run_ends array has non-zero and strictly increasing values.
+        //    5. The length of run_ends array and values array are the same.
+        data.validate_data().unwrap();
+
+        let run_ends = PrimitiveArray::<R>::from(data.child_data()[0].clone());
+        let values = make_array(data.child_data()[1].clone());
+        Self {
+            data,
+            run_ends,
+            values,
+        }
+    }
+}
+
+impl<R: ArrowRunEndIndexType> From<RunEndEncodedArray<R>> for ArrayData {
+    fn from(array: RunEndEncodedArray<R>) -> Self {
+        array.data
+    }
+}
+
+impl<T: ArrowRunEndIndexType> Array for RunEndEncodedArray<T> {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data(&self) -> &ArrayData {
+        &self.data
+    }
+
+    fn into_data(self) -> ArrayData {
+        self.into()
+    }
+}
+
+impl<R: ArrowRunEndIndexType> std::fmt::Debug for RunEndEncodedArray<R> {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        writeln!(
+            f,
+            "RunEndEncodedArray {{run_ends: {:?}, values: {:?}}}",
+            self.run_ends, self.values
+        )
+    }
+}
+
+/// Constructs a `RunEndEncodedArray` from an iterator of optional strings.
+///
+/// # Example:
+/// ```
+/// use arrow_array::{RunEndEncodedArray, PrimitiveArray, StringArray, 
types::Int16Type};
+///
+/// let test = vec!["a", "a", "b", "c", "c"];
+/// let array: RunEndEncodedArray<Int16Type> = test
+///     .iter()
+///     .map(|&x| if x == "b" { None } else { Some(x) })
+///     .collect();
+/// assert_eq!(
+///     "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  3,\n  
5,\n], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
+///     format!("{:?}", array)
+/// );
+/// ```
+impl<'a, T: ArrowRunEndIndexType> FromIterator<Option<&'a str>>
+    for RunEndEncodedArray<T>
+{
+    fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
+        let it = iter.into_iter();
+        let (lower, _) = it.size_hint();
+        let mut builder = StringREEArrayBuilder::with_capacity(lower, 256);
+        it.for_each(|i| {
+            if let Some(i) = i {
+                builder
+                    .append_value(i)
+                    .expect("Unable to append a value to a run end encoded 
array.");
+            } else {
+                builder
+                    .append_null()
+                    .expect("Unable to append null value to run end encoded 
array.");
+            }
+        });
+
+        builder.finish()
+    }
+}
+
+/// Constructs a `RunEndEncodedArray` from an iterator of strings.
+///
+/// # Example:
+///
+/// ```
+/// use arrow_array::{RunEndEncodedArray, PrimitiveArray, StringArray, 
types::Int16Type};
+///
+/// let test = vec!["a", "a", "b", "c"];
+/// let array: RunEndEncodedArray<Int16Type> = test.into_iter().collect();
+/// assert_eq!(
+///     "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  3,\n  
4,\n], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
+///     format!("{:?}", array)
+/// );
+/// ```
+impl<'a, T: ArrowRunEndIndexType> FromIterator<&'a str> for 
RunEndEncodedArray<T> {
+    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
+        let it = iter.into_iter();
+        let (lower, _) = it.size_hint();
+        let mut builder = StringREEArrayBuilder::with_capacity(lower, 256);
+        it.for_each(|i| {
+            builder
+                .append_value(i)
+                .expect("Unable to append a value to a dictionary array.");
+        });
+
+        builder.finish()
+    }
+}
+
+///
+/// A [`RunEndEncodedArray`] array where run ends are stored using `i16` data 
type.
+///
+/// # Example: Using `collect`
+/// ```
+/// # use arrow_array::{Array, Int16RunEndEncodedArray, Int16Array, 
StringArray};
+/// # use std::sync::Arc;
+///
+/// let array: Int16RunEndEncodedArray = vec!["a", "a", "b", "c", 
"c"].into_iter().collect();
+/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+/// assert_eq!(array.run_ends(), &Int16Array::from(vec![2, 3, 5]));
+/// assert_eq!(array.values(), &values);
+/// ```
+pub type Int16RunEndEncodedArray = RunEndEncodedArray<Int16Type>;
+
+///
+/// A [`RunEndEncodedArray`] array where run ends are stored using `i32` data 
type.
+///
+/// # Example: Using `collect`
+/// ```
+/// # use arrow_array::{Array, Int32RunEndEncodedArray, Int32Array, 
StringArray};
+/// # use std::sync::Arc;
+///
+/// let array: Int32RunEndEncodedArray = vec!["a", "a", "b", "c", 
"c"].into_iter().collect();
+/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+/// assert_eq!(array.run_ends(), &Int32Array::from(vec![2, 3, 5]));
+/// assert_eq!(array.values(), &values);
+/// ```
+pub type Int32RunEndEncodedArray = RunEndEncodedArray<Int32Type>;
+
+///
+/// A [`RunEndEncodedArray`] array where run ends are stored using `i64` data 
type.
+///
+/// # Example: Using `collect`
+/// ```
+/// # use arrow_array::{Array, Int64RunEndEncodedArray, Int64Array, 
StringArray};
+/// # use std::sync::Arc;
+///
+/// let array: Int64RunEndEncodedArray = vec!["a", "a", "b", "c", 
"c"].into_iter().collect();
+/// let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+/// assert_eq!(array.run_ends(), &Int64Array::from(vec![2, 3, 5]));
+/// assert_eq!(array.values(), &values);
+/// ```
+pub type Int64RunEndEncodedArray = RunEndEncodedArray<Int64Type>;
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use super::*;
+    use crate::builder::PrimitiveREEArrayBuilder;
+    use crate::types::{Int16Type, Int32Type, UInt32Type};
+    use crate::{Array, Int16Array, Int32Array, StringArray};
+    use arrow_buffer::{Buffer, ToByteSlice};
+    use arrow_schema::Field;
+
+    #[test]
+    fn test_ree_array() {
+        // Construct a value array
+        let value_data = ArrayData::builder(DataType::Int8)
+            .len(8)
+            .add_buffer(Buffer::from(
+                &[10_i8, 11, 12, 13, 14, 15, 16, 17].to_byte_slice(),
+            ))
+            .build()
+            .unwrap();
+
+        // Construct a run_ends array:
+        let run_ends_data = ArrayData::builder(DataType::Int16)
+            .len(8)
+            .add_buffer(Buffer::from(
+                &[4_i16, 6, 7, 9, 13, 18, 20, 22].to_byte_slice(),
+            ))
+            .build()
+            .unwrap();
+
+        // Construct a run ends encoded array from the above two
+        let run_ends_type = Field::new("run_ends", DataType::Int16, false);
+        let value_type = Field::new("values", DataType::Int8, true);
+        let ree_array_type =
+            DataType::RunEndEncoded(Box::new(run_ends_type), 
Box::new(value_type));
+        let dict_data = ArrayData::builder(ree_array_type)
+            .add_child_data(run_ends_data.clone())
+            .add_child_data(value_data.clone())
+            .build()
+            .unwrap();
+        let ree_array = Int16RunEndEncodedArray::from(dict_data);
+
+        let values = ree_array.values();
+        assert_eq!(&value_data, values.data());
+        assert_eq!(&DataType::Int8, values.data_type());
+
+        let run_ends = ree_array.run_ends();
+        assert_eq!(&run_ends_data, run_ends.data());
+        assert_eq!(&DataType::Int16, run_ends.data_type());
+    }
+
+    #[test]
+    fn test_ree_array_fmt_debug() {
+        let mut builder =
+            PrimitiveREEArrayBuilder::<Int16Type, 
UInt32Type>::with_capacity(3);
+        builder.append_value(12345678).unwrap();
+        builder.append_null().unwrap();
+        builder.append_value(22345678).unwrap();
+        let array = builder.finish();
+        assert_eq!(
+            "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  1,\n  
2,\n  3,\n], values: PrimitiveArray<UInt32>\n[\n  12345678,\n  null,\n  
22345678,\n]}\n",
+            format!("{:?}", array)
+        );
+
+        let mut builder =
+            PrimitiveREEArrayBuilder::<Int16Type, 
UInt32Type>::with_capacity(20);
+        for _ in 0..20 {
+            builder.append_value(1).unwrap();
+        }
+        let array = builder.finish();
+        assert_eq!(
+            "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  20,\n], 
values: PrimitiveArray<UInt32>\n[\n  1,\n]}\n",
+            format!("{:?}", array)
+        );
+    }
+
+    #[test]
+    fn test_ree_array_from_iter() {
+        let test = vec!["a", "a", "b", "c"];
+        let array: RunEndEncodedArray<Int16Type> = test
+            .iter()
+            .map(|&x| if x == "b" { None } else { Some(x) })
+            .collect();
+        assert_eq!(
+            "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  
3,\n  4,\n], values: StringArray\n[\n  \"a\",\n  null,\n  \"c\",\n]}\n",
+            format!("{:?}", array)
+        );
+
+        let array: RunEndEncodedArray<Int16Type> = test.into_iter().collect();
+        assert_eq!(
+            "RunEndEncodedArray {run_ends: PrimitiveArray<Int16>\n[\n  2,\n  
3,\n  4,\n], values: StringArray\n[\n  \"a\",\n  \"b\",\n  \"c\",\n]}\n",
+            format!("{:?}", array)
+        );
+    }
+
+    #[test]
+    fn test_ree_array_run_ends_as_primitive_array() {
+        let test = vec!["a", "b", "c", "a"];
+        let array: RunEndEncodedArray<Int16Type> = test.into_iter().collect();
+
+        let run_ends = array.run_ends();
+        assert_eq!(&DataType::Int16, run_ends.data_type());
+        assert_eq!(0, run_ends.null_count());
+        assert_eq!(&[1, 2, 3, 4], run_ends.values());
+    }
+
+    #[test]
+    fn test_ree_array_as_primitive_array_with_null() {
+        let test = vec![Some("a"), None, Some("b"), None, None, Some("a")];
+        let array: RunEndEncodedArray<Int32Type> = test.into_iter().collect();
+
+        let run_ends = array.run_ends();
+        assert_eq!(&DataType::Int32, run_ends.data_type());
+        assert_eq!(0, run_ends.null_count());
+        assert_eq!(5, run_ends.len());
+        assert_eq!(&[1, 2, 3, 5, 6], run_ends.values());
+
+        let values_data = array.values();
+        assert_eq!(2, values_data.null_count());
+        assert_eq!(5, values_data.len());
+    }
+
+    #[test]
+    fn test_ree_array_all_nulls() {
+        let test = vec![None, None, None];
+        let array: RunEndEncodedArray<Int32Type> = test.into_iter().collect();
+
+        let run_ends = array.run_ends();
+        assert_eq!(1, run_ends.len());
+        assert_eq!(&[3], run_ends.values());
+
+        let values_data = array.values();
+        assert_eq!(1, values_data.null_count());
+    }
+
+    #[test]
+    fn test_ree_array_try_new() {
+        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
+            .into_iter()
+            .collect();
+        let run_ends: Int32Array =
+            [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
+
+        let array = RunEndEncodedArray::<Int32Type>::try_new(&run_ends, 
&values).unwrap();
+        assert_eq!(array.run_ends().data_type(), &DataType::Int32);
+        assert_eq!(array.values().data_type(), &DataType::Utf8);
+
+        assert_eq!(array.run_ends.null_count(), 0);
+        assert_eq!(array.values().null_count(), 1);
+
+        assert_eq!(
+            "RunEndEncodedArray {run_ends: PrimitiveArray<Int32>\n[\n  1,\n  
2,\n  3,\n  4,\n], values: StringArray\n[\n  \"foo\",\n  \"bar\",\n  null,\n  
\"baz\",\n]}\n",
+            format!("{:?}", array)
+        );
+    }
+
+    #[test]
+    fn test_ree_array_int16_type_definition() {
+        let array: Int16RunEndEncodedArray =
+            vec!["a", "a", "b", "c", "c"].into_iter().collect();
+        let values: Arc<dyn Array> = Arc::new(StringArray::from(vec!["a", "b", 
"c"]));
+        assert_eq!(array.run_ends(), &Int16Array::from(vec![2, 3, 5]));
+        assert_eq!(array.values(), &values);
+    }
+
+    #[test]
+    fn test_ree_array_length_mismatch() {
+        let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
+            .into_iter()
+            .collect();
+        let run_ends: Int32Array = [Some(1), Some(2), 
Some(3)].into_iter().collect();
+
+        let actual = RunEndEncodedArray::<Int32Type>::try_new(&run_ends, 
&values);
+        let expected = ArrowError::InvalidArgumentError("The run_ends array 
length should be the same as values array length. Run_ends array length is 3, 
values array length is 4".to_string());
+        assert_eq!(
+            format!("{}", expected),
+            format!("{}", actual.err().unwrap())
+        );
+    }
+
+    #[test]
+    fn test_ree_array_run_ends_with_null() {
+        let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
+            .into_iter()
+            .collect();
+        let run_ends: Int32Array = [Some(1), None, 
Some(3)].into_iter().collect();
+
+        let actual = RunEndEncodedArray::<Int32Type>::try_new(&run_ends, 
&values);
+        let expected = ArrowError::InvalidArgumentError("Found null values in 
run_ends array. The run_ends array should not have null values.".to_string());

Review Comment:
   :+1:



##########
arrow-schema/src/datatype.rs:
##########
@@ -346,6 +358,13 @@ impl DataType {
         )
     }
 
+    /// Returns true if this type is valid for run-ends array in 
RunEndEncodedArray
+    #[inline]
+    pub fn is_ree_run_ends_type(&self) -> bool {

Review Comment:
   ```suggestion
       pub fn is_run_ends_type(&self) -> bool {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to