This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 744412f75 Refactor Binary Builder and String Builder Constructors
(#2592)
744412f75 is described below
commit 744412f751d926311851c30e7271e3f4f14757f7
Author: Vrishabh <[email protected]>
AuthorDate: Sat Aug 27 18:14:17 2022 +0530
Refactor Binary Builder and String Builder Constructors (#2592)
* Binary and String array builder refactor
* Fix minor typo
* Fix errors
* Fix clippy lints
---
arrow/benches/builder.rs | 2 +-
arrow/benches/cast_kernels.rs | 4 ++--
arrow/benches/string_dictionary_builder.rs | 5 ++++-
arrow/src/array/array_dictionary.rs | 4 ++--
arrow/src/array/array_string.rs | 2 +-
arrow/src/array/builder/generic_binary_builder.rs | 23 +++++++++++-----------
arrow/src/array/builder/generic_string_builder.rs | 13 ++++++++----
arrow/src/array/builder/map_builder.rs | 2 +-
arrow/src/array/builder/mod.rs | 2 +-
.../src/array/builder/string_dictionary_builder.rs | 4 ++--
arrow/src/array/builder/struct_builder.rs | 6 +++---
arrow/src/array/transform/mod.rs | 4 ++--
arrow/src/compute/kernels/cast.rs | 8 ++++----
arrow/src/compute/kernels/comparison.rs | 14 ++++++-------
arrow/src/compute/kernels/filter.rs | 4 ++--
arrow/src/compute/kernels/regexp.rs | 8 +++++---
arrow/src/compute/kernels/take.rs | 2 +-
arrow/src/json/reader.rs | 5 +++--
arrow/src/util/integration_util.rs | 8 ++++----
arrow/src/util/pretty.rs | 2 +-
parquet/src/arrow/array_reader/map_array.rs | 2 +-
21 files changed, 67 insertions(+), 57 deletions(-)
diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index 8040b445c..c2ebcb3da 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -98,7 +98,7 @@ fn bench_string(c: &mut Criterion) {
));
group.bench_function("bench_string", |b| {
b.iter(|| {
- let mut builder = StringBuilder::new(64);
+ let mut builder = StringBuilder::new();
for _ in 0..NUM_BATCHES * BATCH_SIZE {
builder.append_value(SAMPLE_STRING);
}
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index 31e54a9ed..ac8fc08d9 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -45,7 +45,7 @@ fn build_utf8_date_array(size: usize, with_nulls: bool) ->
ArrayRef {
// use random numbers to avoid spurious compiler optimizations wrt to
branching
let mut rng = seedable_rng();
- let mut builder = StringBuilder::new(size);
+ let mut builder = StringBuilder::new();
let range = Uniform::new(0, 737776);
for _ in 0..size {
@@ -66,7 +66,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool)
-> ArrayRef {
// use random numbers to avoid spurious compiler optimizations wrt to
branching
let mut rng = seedable_rng();
- let mut builder = StringBuilder::new(size);
+ let mut builder = StringBuilder::new();
let range = Uniform::new(0, 1608071414123);
for _ in 0..size {
diff --git a/arrow/benches/string_dictionary_builder.rs
b/arrow/benches/string_dictionary_builder.rs
index 267288650..1a3b95917 100644
--- a/arrow/benches/string_dictionary_builder.rs
+++ b/arrow/benches/string_dictionary_builder.rs
@@ -44,7 +44,10 @@ fn criterion_benchmark(c: &mut Criterion) {
let strings = build_strings(dict_size, total_size, key_len);
b.iter(|| {
let keys = Int32Builder::with_capacity(strings.len());
- let values = StringBuilder::new((key_len + 1) * dict_size);
+ let values = StringBuilder::with_capacity(
+ key_len + 1,
+ (key_len + 1) * dict_size,
+ );
let mut builder = StringDictionaryBuilder::new(keys,
values);
for val in &strings {
diff --git a/arrow/src/array/array_dictionary.rs
b/arrow/src/array/array_dictionary.rs
index c08bb2260..79f2969df 100644
--- a/arrow/src/array/array_dictionary.rs
+++ b/arrow/src/array/array_dictionary.rs
@@ -330,7 +330,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator<Option<&'a
str>> for Dictionary
let it = iter.into_iter();
let (lower, _) = it.size_hint();
let key_builder = PrimitiveBuilder::<T>::with_capacity(lower);
- let value_builder = StringBuilder::new(256);
+ let value_builder = StringBuilder::with_capacity(256, 1024);
let mut builder = StringDictionaryBuilder::new(key_builder,
value_builder);
it.for_each(|i| {
if let Some(i) = i {
@@ -368,7 +368,7 @@ impl<'a, T: ArrowDictionaryKeyType> FromIterator<&'a str>
for DictionaryArray<T>
let it = iter.into_iter();
let (lower, _) = it.size_hint();
let key_builder = PrimitiveBuilder::<T>::with_capacity(lower);
- let value_builder = StringBuilder::new(256);
+ let value_builder = StringBuilder::with_capacity(256, 1024);
let mut builder = StringDictionaryBuilder::new(key_builder,
value_builder);
it.for_each(|i| {
builder
diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs
index 5dde2ea64..62743a20a 100644
--- a/arrow/src/array/array_string.rs
+++ b/arrow/src/array/array_string.rs
@@ -499,7 +499,7 @@ mod tests {
#[test]
fn test_nested_string_array() {
- let string_builder = StringBuilder::new(3);
+ let string_builder = StringBuilder::with_capacity(3, 10);
let mut list_of_string_builder = ListBuilder::new(string_builder);
list_of_string_builder.values().append_value("foo");
diff --git a/arrow/src/array/builder/generic_binary_builder.rs
b/arrow/src/array/builder/generic_binary_builder.rs
index aca2e1d96..26501ba09 100644
--- a/arrow/src/array/builder/generic_binary_builder.rs
+++ b/arrow/src/array/builder/generic_binary_builder.rs
@@ -34,15 +34,8 @@ pub struct GenericBinaryBuilder<OffsetSize: OffsetSizeTrait>
{
impl<OffsetSize: OffsetSizeTrait> GenericBinaryBuilder<OffsetSize> {
/// Creates a new [`GenericBinaryBuilder`].
- /// `capacity` is the number of bytes in the values array.
- pub fn new(capacity: usize) -> Self {
- let mut offsets_builder = BufferBuilder::<OffsetSize>::new(1024);
- offsets_builder.append(OffsetSize::zero());
- Self {
- value_builder: UInt8BufferBuilder::new(capacity),
- offsets_builder,
- null_buffer_builder: NullBufferBuilder::new(1024),
- }
+ pub fn new() -> Self {
+ Self::with_capacity(1024, 1024)
}
/// Creates a new [`GenericBinaryBuilder`],
@@ -100,6 +93,12 @@ impl<OffsetSize: OffsetSizeTrait>
GenericBinaryBuilder<OffsetSize> {
}
}
+impl<OffsetSize: OffsetSizeTrait> Default for GenericBinaryBuilder<OffsetSize>
{
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for
GenericBinaryBuilder<OffsetSize> {
/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &dyn Any {
@@ -138,7 +137,7 @@ mod tests {
use crate::array::{Array, OffsetSizeTrait};
fn _test_generic_binary_builder<O: OffsetSizeTrait>() {
- let mut builder = GenericBinaryBuilder::<O>::new(20);
+ let mut builder = GenericBinaryBuilder::<O>::new();
builder.append_value(b"hello");
builder.append_value(b"");
@@ -168,7 +167,7 @@ mod tests {
}
fn _test_generic_binary_builder_all_nulls<O: OffsetSizeTrait>() {
- let mut builder = GenericBinaryBuilder::<O>::new(10);
+ let mut builder = GenericBinaryBuilder::<O>::new();
builder.append_null();
builder.append_null();
builder.append_null();
@@ -194,7 +193,7 @@ mod tests {
}
fn _test_generic_binary_builder_reset<O: OffsetSizeTrait>() {
- let mut builder = GenericBinaryBuilder::<O>::new(20);
+ let mut builder = GenericBinaryBuilder::<O>::new();
builder.append_value(b"hello");
builder.append_value(b"");
diff --git a/arrow/src/array/builder/generic_string_builder.rs
b/arrow/src/array/builder/generic_string_builder.rs
index 02c34bdd3..8f69f5d9c 100644
--- a/arrow/src/array/builder/generic_string_builder.rs
+++ b/arrow/src/array/builder/generic_string_builder.rs
@@ -29,10 +29,9 @@ pub struct GenericStringBuilder<OffsetSize: OffsetSizeTrait>
{
impl<OffsetSize: OffsetSizeTrait> GenericStringBuilder<OffsetSize> {
/// Creates a new [`GenericStringBuilder`],
- /// `capacity` is the number of bytes of string data to pre-allocate space
for in this builder
- pub fn new(capacity: usize) -> Self {
+ pub fn new() -> Self {
Self {
- builder: GenericBinaryBuilder::new(capacity),
+ builder: GenericBinaryBuilder::new(),
}
}
@@ -82,6 +81,12 @@ impl<OffsetSize: OffsetSizeTrait>
GenericStringBuilder<OffsetSize> {
}
}
+impl<OffsetSize: OffsetSizeTrait> Default for GenericStringBuilder<OffsetSize>
{
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for
GenericStringBuilder<OffsetSize> {
/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &dyn Any {
@@ -121,7 +126,7 @@ mod tests {
use crate::array::{Array, ArrayBuilder, OffsetSizeTrait};
fn _test_generic_string_array_builder<O: OffsetSizeTrait>() {
- let mut builder = GenericStringBuilder::<O>::new(20);
+ let mut builder = GenericStringBuilder::<O>::new();
let owned = "arrow".to_owned();
builder.append_value("hello");
diff --git a/arrow/src/array/builder/map_builder.rs
b/arrow/src/array/builder/map_builder.rs
index ffc9033a1..766e8a56b 100644
--- a/arrow/src/array/builder/map_builder.rs
+++ b/arrow/src/array/builder/map_builder.rs
@@ -201,7 +201,7 @@ mod tests {
#[test]
fn test_map_array_builder() {
- let string_builder = StringBuilder::new(4);
+ let string_builder = StringBuilder::new();
let int_builder = Int32Builder::with_capacity(4);
let mut builder = MapBuilder::new(None, string_builder, int_builder);
diff --git a/arrow/src/array/builder/mod.rs b/arrow/src/array/builder/mod.rs
index ecacfc1ea..c02acb326 100644
--- a/arrow/src/array/builder/mod.rs
+++ b/arrow/src/array/builder/mod.rs
@@ -75,7 +75,7 @@ pub use union_builder::UnionBuilder;
/// let mut data_builders: Vec<Box<dyn ArrayBuilder>> = vec![
/// Box::new(Float64Builder::new()),
/// Box::new(Int64Builder::new()),
-/// Box::new(StringBuilder::new(1024)),
+/// Box::new(StringBuilder::new()),
/// ];
///
/// // Fill
diff --git a/arrow/src/array/builder/string_dictionary_builder.rs
b/arrow/src/array/builder/string_dictionary_builder.rs
index d962d0638..6ad4e9075 100644
--- a/arrow/src/array/builder/string_dictionary_builder.rs
+++ b/arrow/src/array/builder/string_dictionary_builder.rs
@@ -43,7 +43,7 @@ use std::sync::Arc;
/// // It can thus hold up to 256 distinct string values.
///
/// let key_builder = PrimitiveBuilder::<Int8Type>::with_capacity(100);
-/// let value_builder = StringBuilder::new(100);
+/// let value_builder = StringBuilder::new();
/// let mut builder = StringDictionaryBuilder::new(key_builder, value_builder);
///
/// // The builder builds the dictionary value by value
@@ -291,7 +291,7 @@ mod tests {
#[test]
fn test_string_dictionary_builder() {
let key_builder = PrimitiveBuilder::<Int8Type>::with_capacity(5);
- let value_builder = StringBuilder::new(2);
+ let value_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(key_builder,
value_builder);
builder.append("abc").unwrap();
builder.append_null();
diff --git a/arrow/src/array/builder/struct_builder.rs
b/arrow/src/array/builder/struct_builder.rs
index f2666670b..c5db09119 100644
--- a/arrow/src/array/builder/struct_builder.rs
+++ b/arrow/src/array/builder/struct_builder.rs
@@ -107,14 +107,14 @@ pub fn make_builder(datatype: &DataType, capacity: usize)
-> Box<dyn ArrayBuilde
DataType::UInt64 => Box::new(UInt64Builder::with_capacity(capacity)),
DataType::Float32 => Box::new(Float32Builder::with_capacity(capacity)),
DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
- DataType::Binary => Box::new(BinaryBuilder::new(capacity)),
+ DataType::Binary => Box::new(BinaryBuilder::with_capacity(1024,
capacity)),
DataType::FixedSizeBinary(len) => {
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
}
DataType::Decimal128(precision, scale) => Box::new(
Decimal128Builder::with_capacity(capacity, *precision, *scale),
),
- DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
+ DataType::Utf8 => Box::new(StringBuilder::with_capacity(1024,
capacity)),
DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
DataType::Time32(TimeUnit::Second) => {
@@ -255,7 +255,7 @@ mod tests {
#[test]
fn test_struct_array_builder() {
- let string_builder = StringBuilder::new(4);
+ let string_builder = StringBuilder::new();
let int_builder = Int32Builder::new();
let mut fields = Vec::new();
diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs
index c63821fc2..48859922a 100644
--- a/arrow/src/array/transform/mod.rs
+++ b/arrow/src/array/transform/mod.rs
@@ -1507,7 +1507,7 @@ mod tests {
#[test]
fn test_list_of_strings_append() -> Result<()> {
// [["alpha", "beta", None]]
- let mut builder = ListBuilder::new(StringBuilder::new(32));
+ let mut builder = ListBuilder::new(StringBuilder::new());
builder.values().append_value("Hello");
builder.values().append_value("Arrow");
builder.values().append_null();
@@ -1515,7 +1515,7 @@ mod tests {
let a = builder.finish();
// [["alpha", "beta"], [None], ["gamma", "delta", None]]
- let mut builder = ListBuilder::new(StringBuilder::new(32));
+ let mut builder = ListBuilder::new(StringBuilder::new());
builder.values().append_value("alpha");
builder.values().append_value("beta");
builder.append(true);
diff --git a/arrow/src/compute/kernels/cast.rs
b/arrow/src/compute/kernels/cast.rs
index ea37b3b04..3df0c861c 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -2355,7 +2355,7 @@ where
let values = cast_values.as_any().downcast_ref::<StringArray>().unwrap();
let keys_builder = PrimitiveBuilder::<K>::with_capacity(values.len());
- let values_builder = StringBuilder::new(values.len());
+ let values_builder = StringBuilder::with_capacity(1024, values.len());
let mut b = StringDictionaryBuilder::new(keys_builder, values_builder);
// copy each element one at a time
@@ -4753,7 +4753,7 @@ mod tests {
use DataType::*;
let keys_builder = PrimitiveBuilder::<Int8Type>::new();
- let values_builder = StringBuilder::new(10);
+ let values_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(keys_builder,
values_builder);
builder.append("one").unwrap();
builder.append_null();
@@ -4845,7 +4845,7 @@ mod tests {
// string values (and encode the expected behavior here);
let keys_builder = PrimitiveBuilder::<Int32Type>::new();
- let values_builder = StringBuilder::new(10);
+ let values_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(keys_builder,
values_builder);
// add 200 distinct values (which can be stored by a
@@ -5388,7 +5388,7 @@ mod tests {
fn make_dictionary_utf8<K: ArrowDictionaryKeyType>() -> ArrayRef {
let keys_builder = PrimitiveBuilder::<K>::new();
// Pick Int32 arbitrarily for dictionary values
- let values_builder = StringBuilder::new(2);
+ let values_builder = StringBuilder::new();
let mut b = StringDictionaryBuilder::new(keys_builder, values_builder);
b.append("foo").unwrap();
b.append("bar").unwrap();
diff --git a/arrow/src/compute/kernels/comparison.rs
b/arrow/src/compute/kernels/comparison.rs
index f873079a0..928b5acf4 100644
--- a/arrow/src/compute/kernels/comparison.rs
+++ b/arrow/src/compute/kernels/comparison.rs
@@ -4265,7 +4265,7 @@ mod tests {
// contains(null, null) = false
#[test]
fn test_contains_utf8() {
- let values_builder = StringBuilder::new(10);
+ let values_builder = StringBuilder::new();
let mut builder = ListBuilder::new(values_builder);
builder.values().append_value("Lorem");
@@ -5253,7 +5253,7 @@ mod tests {
#[test]
fn test_eq_dyn_utf8_scalar_with_dict() {
let key_builder = PrimitiveBuilder::<Int8Type>::new();
- let value_builder = StringBuilder::new(100);
+ let value_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(key_builder,
value_builder);
builder.append("abc").unwrap();
builder.append_null();
@@ -5281,7 +5281,7 @@ mod tests {
#[test]
fn test_lt_dyn_utf8_scalar_with_dict() {
let key_builder = PrimitiveBuilder::<Int8Type>::new();
- let value_builder = StringBuilder::new(100);
+ let value_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(key_builder,
value_builder);
builder.append("abc").unwrap();
builder.append_null();
@@ -5310,7 +5310,7 @@ mod tests {
#[test]
fn test_lt_eq_dyn_utf8_scalar_with_dict() {
let key_builder = PrimitiveBuilder::<Int8Type>::new();
- let value_builder = StringBuilder::new(100);
+ let value_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(key_builder,
value_builder);
builder.append("abc").unwrap();
builder.append_null();
@@ -5339,7 +5339,7 @@ mod tests {
#[test]
fn test_gt_eq_dyn_utf8_scalar_with_dict() {
let key_builder = PrimitiveBuilder::<Int8Type>::new();
- let value_builder = StringBuilder::new(100);
+ let value_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(key_builder,
value_builder);
builder.append("abc").unwrap();
builder.append_null();
@@ -5369,7 +5369,7 @@ mod tests {
#[test]
fn test_gt_dyn_utf8_scalar_with_dict() {
let key_builder = PrimitiveBuilder::<Int8Type>::new();
- let value_builder = StringBuilder::new(100);
+ let value_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(key_builder,
value_builder);
builder.append("abc").unwrap();
builder.append_null();
@@ -5398,7 +5398,7 @@ mod tests {
#[test]
fn test_neq_dyn_utf8_scalar_with_dict() {
let key_builder = PrimitiveBuilder::<Int8Type>::new();
- let value_builder = StringBuilder::new(100);
+ let value_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(key_builder,
value_builder);
builder.append("abc").unwrap();
builder.append_null();
diff --git a/arrow/src/compute/kernels/filter.rs
b/arrow/src/compute/kernels/filter.rs
index 621d1a465..81be3a1d1 100644
--- a/arrow/src/compute/kernels/filter.rs
+++ b/arrow/src/compute/kernels/filter.rs
@@ -1416,7 +1416,7 @@ mod tests {
#[test]
fn test_filter_map() {
let mut builder =
- MapBuilder::new(None, StringBuilder::new(16),
Int64Builder::with_capacity(4));
+ MapBuilder::new(None, StringBuilder::new(),
Int64Builder::with_capacity(4));
// [{"key1": 1}, {"key2": 2, "key3": 3}, null, {"key1": 1}
builder.keys().append_value("key1");
builder.values().append_value(1);
@@ -1438,7 +1438,7 @@ mod tests {
let got = filter(&maparray, &indices).unwrap();
let mut builder =
- MapBuilder::new(None, StringBuilder::new(8),
Int64Builder::with_capacity(2));
+ MapBuilder::new(None, StringBuilder::new(),
Int64Builder::with_capacity(2));
builder.keys().append_value("key1");
builder.values().append_value(1);
builder.append(true).unwrap();
diff --git a/arrow/src/compute/kernels/regexp.rs
b/arrow/src/compute/kernels/regexp.rs
index b52a3e231..1c5fa1927 100644
--- a/arrow/src/compute/kernels/regexp.rs
+++ b/arrow/src/compute/kernels/regexp.rs
@@ -35,7 +35,8 @@ pub fn regexp_match<OffsetSize: OffsetSizeTrait>(
flags_array: Option<&GenericStringArray<OffsetSize>>,
) -> Result<ArrayRef> {
let mut patterns: HashMap<String, Regex> = HashMap::new();
- let builder: GenericStringBuilder<OffsetSize> =
GenericStringBuilder::new(0);
+ let builder: GenericStringBuilder<OffsetSize> =
+ GenericStringBuilder::with_capacity(0, 0);
let mut list_builder = ListBuilder::new(builder);
let complete_pattern = match flags_array {
@@ -118,7 +119,7 @@ mod tests {
pattern_values.push("");
let pattern = StringArray::from(pattern_values);
let actual = regexp_match(&array, &pattern, None).unwrap();
- let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new(0);
+ let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new();
let mut expected_builder = ListBuilder::new(elem_builder);
expected_builder.values().append_value("005");
expected_builder.append(true);
@@ -141,7 +142,8 @@ mod tests {
let pattern = StringArray::from(vec![r"x.*-(\d*)-.*"; 4]);
let flags = StringArray::from(vec!["i"; 4]);
let actual = regexp_match(&array, &pattern, Some(&flags)).unwrap();
- let elem_builder: GenericStringBuilder<i32> =
GenericStringBuilder::new(0);
+ let elem_builder: GenericStringBuilder<i32> =
+ GenericStringBuilder::with_capacity(0, 0);
let mut expected_builder = ListBuilder::new(elem_builder);
expected_builder.append(false);
expected_builder.values().append_value("7");
diff --git a/arrow/src/compute/kernels/take.rs
b/arrow/src/compute/kernels/take.rs
index dafc204fb..3272c8454 100644
--- a/arrow/src/compute/kernels/take.rs
+++ b/arrow/src/compute/kernels/take.rs
@@ -2040,7 +2040,7 @@ mod tests {
#[test]
fn test_take_dict() {
let keys_builder = Int16Builder::new();
- let values_builder = StringBuilder::new(4);
+ let values_builder = StringBuilder::new();
let mut dict_builder = StringDictionaryBuilder::new(keys_builder,
values_builder);
diff --git a/arrow/src/json/reader.rs b/arrow/src/json/reader.rs
index cf986097a..2eb55889a 100644
--- a/arrow/src/json/reader.rs
+++ b/arrow/src/json/reader.rs
@@ -798,7 +798,8 @@ impl Decoder {
{
let mut builder: Box<dyn ArrayBuilder> = match data_type {
DataType::Utf8 => {
- let values_builder = StringBuilder::new(rows.len() * 5);
+ let values_builder =
+ StringBuilder::with_capacity(rows.len(), rows.len() * 5);
Box::new(ListBuilder::new(values_builder))
}
DataType::Dictionary(_, _) => {
@@ -902,7 +903,7 @@ impl Decoder {
T: ArrowPrimitiveType + ArrowDictionaryKeyType,
{
let key_builder = PrimitiveBuilder::<T>::with_capacity(row_len);
- let values_builder = StringBuilder::new(row_len * 5);
+ let values_builder = StringBuilder::with_capacity(row_len, row_len *
5);
StringDictionaryBuilder::new(key_builder, values_builder)
}
diff --git a/arrow/src/util/integration_util.rs
b/arrow/src/util/integration_util.rs
index 36751ed6a..c100a137c 100644
--- a/arrow/src/util/integration_util.rs
+++ b/arrow/src/util/integration_util.rs
@@ -573,7 +573,7 @@ pub fn array_from_json(
Ok(Arc::new(b.finish()))
}
DataType::Binary => {
- let mut b = BinaryBuilder::new(json_col.count);
+ let mut b = BinaryBuilder::with_capacity(json_col.count, 1024);
for (is_valid, value) in json_col
.validity
.as_ref()
@@ -592,7 +592,7 @@ pub fn array_from_json(
Ok(Arc::new(b.finish()))
}
DataType::LargeBinary => {
- let mut b = LargeBinaryBuilder::new(json_col.count);
+ let mut b = LargeBinaryBuilder::with_capacity(json_col.count,
1024);
for (is_valid, value) in json_col
.validity
.as_ref()
@@ -611,7 +611,7 @@ pub fn array_from_json(
Ok(Arc::new(b.finish()))
}
DataType::Utf8 => {
- let mut b = StringBuilder::new(json_col.count);
+ let mut b = StringBuilder::with_capacity(json_col.count, 1024);
for (is_valid, value) in json_col
.validity
.as_ref()
@@ -627,7 +627,7 @@ pub fn array_from_json(
Ok(Arc::new(b.finish()))
}
DataType::LargeUtf8 => {
- let mut b = LargeStringBuilder::new(json_col.count);
+ let mut b = LargeStringBuilder::with_capacity(json_col.count,
1024);
for (is_valid, value) in json_col
.validity
.as_ref()
diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs
index f6ed8dea3..b0013619b 100644
--- a/arrow/src/util/pretty.rs
+++ b/arrow/src/util/pretty.rs
@@ -242,7 +242,7 @@ mod tests {
let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type,
true)]));
let keys_builder = PrimitiveBuilder::<Int32Type>::with_capacity(10);
- let values_builder = StringBuilder::new(10);
+ let values_builder = StringBuilder::new();
let mut builder = StringDictionaryBuilder::new(keys_builder,
values_builder);
builder.append("one")?;
diff --git a/parquet/src/arrow/array_reader/map_array.rs
b/parquet/src/arrow/array_reader/map_array.rs
index 8fef86fc4..bb80fdbdc 100644
--- a/parquet/src/arrow/array_reader/map_array.rs
+++ b/parquet/src/arrow/array_reader/map_array.rs
@@ -162,7 +162,7 @@ mod tests {
)]);
// Create builders for map
- let string_builder = StringBuilder::new(5);
+ let string_builder = StringBuilder::new();
let ints_builder: PrimitiveBuilder<Int32Type> =
PrimitiveBuilder::new();
let mut map_builder = MapBuilder::new(None, string_builder,
ints_builder);