judahrand commented on code in PR #40643:
URL: https://github.com/apache/arrow/pull/40643#discussion_r1530716702
##########
python/pyarrow/tests/test_table.py:
##########
@@ -1246,53 +1184,141 @@ def test_table_to_batches():
table.to_batches(max_chunksize=0)
-def test_table_basics():
[email protected](
+ ('cls'),
+ [
+ (pa.Table),
+ (pa.RecordBatch)
+ ]
+)
+def test_table_basics(cls):
data = [
- pa.array(range(5), type='int64'),
- pa.array([-10, -5, 0, 5, 10], type='int64')
+ pa.array(range(5), type='int16'),
+ pa.array([-10, -5, 0, None, 10], type='int32')
]
- table = pa.table(data, names=('a', 'b'))
+ table = cls.from_arrays(data, names=('a', 'b'))
table.validate()
+
+ assert not table.schema.metadata
assert len(table) == 5
assert table.num_rows == 5
- assert table.num_columns == 2
+ assert table.num_columns == len(data)
assert table.shape == (5, 2)
- assert table.get_total_buffer_size() == 2 * (5 * 8)
- assert table.nbytes == 2 * (5 * 8)
+ # (only the second array has a null bitmap)
+ assert table.get_total_buffer_size() == (5 * 2) + (5 * 4 + 1)
+ assert table.nbytes == (5 * 2) + (5 * 4 + 1)
assert sys.getsizeof(table) >= object.__sizeof__(
table) + table.get_total_buffer_size()
pydict = table.to_pydict()
assert pydict == OrderedDict([
('a', [0, 1, 2, 3, 4]),
- ('b', [-10, -5, 0, 5, 10])
+ ('b', [-10, -5, 0, None, 10])
])
assert isinstance(pydict, dict)
- assert table == pa.table(pydict, schema=table.schema)
+ assert table == cls.from_pydict(pydict, schema=table.schema)
+
+ with pytest.raises(IndexError):
+ # bounds checking
+ table[2]
columns = []
for col in table.itercolumns():
- columns.append(col)
- for chunk in col.iterchunks():
- assert chunk is not None
- with pytest.raises(IndexError):
- col.chunk(-1)
+ if cls is pa.Table:
+ assert type(col) is pa.ChunkedArray
+
+ for chunk in col.iterchunks():
+ assert chunk is not None
+
+ with pytest.raises(IndexError):
+ col.chunk(-1)
- with pytest.raises(IndexError):
- col.chunk(col.num_chunks)
+ with pytest.raises(IndexError):
+ col.chunk(col.num_chunks)
+
+ else:
+ assert issubclass(type(col), pa.Array)
+
+ columns.append(col)
assert table.columns == columns
- assert table == pa.table(columns, names=table.column_names)
- assert table != pa.table(columns[1:], names=table.column_names[1:])
+ assert table == cls.from_arrays(columns, names=table.column_names)
+ assert table != cls.from_arrays(columns[1:], names=table.column_names[1:])
assert table != columns
+ # Schema passed explicitly
+ schema = pa.schema([pa.field('c0', pa.int16(),
+ metadata={'key': 'value'}),
+ pa.field('c1', pa.int32())],
+ metadata={b'foo': b'bar'})
+ table = cls.from_arrays(data, schema=schema)
+ assert table.schema == schema
+
wr = weakref.ref(table)
assert wr() is not None
del table
assert wr() is None
+def test_table_str():
Review Comment:
https://github.com/apache/arrow/pull/40643/commits/b2b30159b43ff028cd9ff0d41b5ae0a4ee931c7b
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]