Re: [PR] GH-36399: [Python] Add missing `shape` property to `RecordBatch` [arrow]

via GitHub Tue, 19 Mar 2024 08:52:19 -0700


jorisvandenbossche commented on code in PR #40643:
URL: https://github.com/apache/arrow/pull/40643#discussion_r1530118054



##########
python/pyarrow/table.pxi:
##########
@@ -1974,6 +1974,28 @@ cdef class _Tabular(_PandasConvertible):
     def num_rows(self):
         raise NotImplementedError
 
+    @property
+    def shape(self):
+        """
+        Dimensions of the table or record batch: (#rows, #columns).
+
+        Returns
+        -------
+        (int, int)
+            Number of rows and number of columns.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> import pandas as pd
+        >>> df = pd.DataFrame({'n_legs': [None, 4, 5, None],
+        ...                    'animals': ["Flamingo", "Horse", None, 
"Centipede"]})
+        >>> table = pa.Table.from_pandas(df)

Review Comment:
   ```suggestion
           >>> table = pa.table({'n_legs': [None, 4, 5, None],
           ...                   'animals': ["Flamingo", "Horse", None, 
"Centipede"]})
   ```
   
   I know this is copied from the existing docstring, but while we are editing 
this anyway



##########
python/pyarrow/tests/test_table.py:
##########
@@ -1246,53 +1184,141 @@ def test_table_to_batches():
         table.to_batches(max_chunksize=0)
 
 
-def test_table_basics():
[email protected](
+    ('cls'),
+    [
+        (pa.Table),
+        (pa.RecordBatch)
+    ]
+)
+def test_table_basics(cls):
     data = [
-        pa.array(range(5), type='int64'),
-        pa.array([-10, -5, 0, 5, 10], type='int64')
+        pa.array(range(5), type='int16'),
+        pa.array([-10, -5, 0, None, 10], type='int32')
     ]
-    table = pa.table(data, names=('a', 'b'))
+    table = cls.from_arrays(data, names=('a', 'b'))
     table.validate()
+
+    assert not table.schema.metadata
     assert len(table) == 5
     assert table.num_rows == 5
-    assert table.num_columns == 2
+    assert table.num_columns == len(data)
     assert table.shape == (5, 2)
-    assert table.get_total_buffer_size() == 2 * (5 * 8)
-    assert table.nbytes == 2 * (5 * 8)
+    # (only the second array has a null bitmap)
+    assert table.get_total_buffer_size() == (5 * 2) + (5 * 4 + 1)
+    assert table.nbytes == (5 * 2) + (5 * 4 + 1)
     assert sys.getsizeof(table) >= object.__sizeof__(
         table) + table.get_total_buffer_size()
 
     pydict = table.to_pydict()
     assert pydict == OrderedDict([
         ('a', [0, 1, 2, 3, 4]),
-        ('b', [-10, -5, 0, 5, 10])
+        ('b', [-10, -5, 0, None, 10])
     ])
     assert isinstance(pydict, dict)
-    assert table == pa.table(pydict, schema=table.schema)
+    assert table == cls.from_pydict(pydict, schema=table.schema)
+
+    with pytest.raises(IndexError):
+        # bounds checking
+        table[2]
 
     columns = []
     for col in table.itercolumns():
-        columns.append(col)
-        for chunk in col.iterchunks():
-            assert chunk is not None
 
-        with pytest.raises(IndexError):
-            col.chunk(-1)
+        if cls is pa.Table:
+            assert type(col) is pa.ChunkedArray
+
+            for chunk in col.iterchunks():
+                assert chunk is not None
+
+            with pytest.raises(IndexError):
+                col.chunk(-1)
 
-        with pytest.raises(IndexError):
-            col.chunk(col.num_chunks)
+            with pytest.raises(IndexError):
+                col.chunk(col.num_chunks)
+
+        else:
+            assert issubclass(type(col), pa.Array)
+
+        columns.append(col)
 
     assert table.columns == columns
-    assert table == pa.table(columns, names=table.column_names)
-    assert table != pa.table(columns[1:], names=table.column_names[1:])
+    assert table == cls.from_arrays(columns, names=table.column_names)
+    assert table != cls.from_arrays(columns[1:], names=table.column_names[1:])
     assert table != columns
 
+    # Schema passed explicitly
+    schema = pa.schema([pa.field('c0', pa.int16(),
+                                 metadata={'key': 'value'}),
+                        pa.field('c1', pa.int32())],
+                       metadata={b'foo': b'bar'})
+    table = cls.from_arrays(data, schema=schema)
+    assert table.schema == schema
+
     wr = weakref.ref(table)
     assert wr() is not None
     del table
     assert wr() is None
 
 
+def test_table_str():

Review Comment:
   There is already a `test_table_repr_to_string` that should cover this, I 
think?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] GH-36399: [Python] Add missing `shape` property to `RecordBatch` [arrow]

Reply via email to