jorisvandenbossche commented on a change in pull request #12311: URL: https://github.com/apache/arrow/pull/12311#discussion_r796615210
########## File path: python/pyarrow/table.pxi ########## @@ -1771,6 +1771,22 @@ cdef class Table(_PandasConvertible): raise TypeError(type(item)) result = pyarrow_wrap_table(CTable.Make(c_schema, columns)) + + # In case of an empty dataframe with RangeIndex -> create an empty Table with + # number of rows equal to Index length + if arrays == [] and schema is not None: + try: + kind = schema.pandas_metadata["index_columns"][0]["kind"] + if kind =="range": + start = schema.pandas_metadata["index_columns"][0]["start"] + stop = schema.pandas_metadata["index_columns"][0]["stop"] + step = schema.pandas_metadata["index_columns"][0]["step"] + n_rows = (stop - start - 1)//step + 1 + result = pyarrow_wrap_table( + CTable.MakeWithRows(c_schema, columns, n_rows)) + except IndexError: + pass Review comment: I would maybe move this check to the `from_pandas` method, as it is specific to pandas input, and not something you should ever get from directly using `from_arrays` ########## File path: python/pyarrow/table.pxi ########## @@ -1771,6 +1771,22 @@ cdef class Table(_PandasConvertible): raise TypeError(type(item)) result = pyarrow_wrap_table(CTable.Make(c_schema, columns)) + + # In case of an empty dataframe with RangeIndex -> create an empty Table with + # number of rows equal to Index length + if arrays == [] and schema is not None: Review comment: ```suggestion if len(arrays) == 0 and schema is not None: ``` ########## File path: python/pyarrow/tests/test_table.py ########## @@ -339,23 +339,29 @@ def test_chunked_array_to_pandas_preserve_name(): tm.assert_series_equal(result, expected) -@pytest.mark.xfail @pytest.mark.pandas def test_table_roundtrip_to_pandas_empty_dataframe(): # https://issues.apache.org/jira/browse/ARROW-10643 + # The conversion should not results in a table with 0 rows if the original + # DataFrame has a RangeIndex but is empty. import pandas as pd data = pd.DataFrame(index=pd.RangeIndex(0, 10, 1)) table = pa.table(data) result = table.to_pandas() - # TODO the conversion results in a table with 0 rows if the original - # DataFrame has a RangeIndex (i.e. no index column in the converted - # Arrow table) assert table.num_rows == 10 assert data.shape == (10, 0) assert result.shape == (10, 0) + data = pd.DataFrame(index=pd.RangeIndex(0, 10, 3)) + table = pa.table(data) + result = table.to_pandas() + + assert table.num_rows == 4 + assert data.shape == (4, 0) + assert result.shape == (4, 0) + Review comment: Maybe add a `assert result.index.equals(data.index)`, if that passes -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org