[iceberg] branch master updated: Python: Reduce the use of mock objects (#6438)

blue Sun, 18 Dec 2022 14:26:40 -0800

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git



The following commit(s) were added to refs/heads/master by this push:
     new 04e9ff7d1e Python: Reduce the use of mock objects (#6438)
04e9ff7d1e is described below

commit 04e9ff7d1e9d7b7cc5811a1ebdf3ebf098e9505a
Author: Fokko Driesprong <[email protected]>
AuthorDate: Sun Dec 18 23:26:26 2022 +0100

    Python: Reduce the use of mock objects (#6438)
---
 python/pyiceberg/avro/file.py                |   7 +-
 python/pyiceberg/avro/reader.py              |  17 +-
 python/pyiceberg/manifest.py                 |  10 +-
 python/pyiceberg/typedef.py                  |  20 ++
 python/tests/avro/test_reader.py             | 262 +++++++++++++--------------
 python/tests/catalog/test_base.py            |  24 ++-
 python/tests/catalog/test_hive.py            |   5 +-
 python/tests/conftest.py                     |  55 +-----
 python/tests/expressions/test_evaluator.py   |  18 +-
 python/tests/expressions/test_expressions.py |  96 ++++++----
 python/tests/expressions/test_visitors.py    |  65 ++-----
 python/tests/io/test_fsspec.py               |   9 +-
 python/tests/io/test_io.py                   | 198 +++-----------------
 python/tests/table/test_metadata.py          |  70 +++----
 python/tests/utils/test_manifest.py          |   6 +-
 15 files changed, 335 insertions(+), 527 deletions(-)

diff --git a/python/pyiceberg/avro/file.py b/python/pyiceberg/avro/file.py
index 68887a61ec..cc725e9db7 100644
--- a/python/pyiceberg/avro/file.py
+++ b/python/pyiceberg/avro/file.py
@@ -28,11 +28,12 @@ from typing import Optional, Type
 
 from pyiceberg.avro.codecs import KNOWN_CODECS, Codec
 from pyiceberg.avro.decoder import BinaryDecoder
-from pyiceberg.avro.reader import AvroStruct, ConstructReader, Reader
+from pyiceberg.avro.reader import ConstructReader, Reader
 from pyiceberg.avro.resolver import resolve
 from pyiceberg.io import InputFile, InputStream
 from pyiceberg.io.memory import MemoryInputStream
 from pyiceberg.schema import Schema, visit
+from pyiceberg.typedef import Record
 from pyiceberg.types import (
     FixedType,
     MapType,
@@ -101,7 +102,7 @@ class Block:
     def has_next(self) -> bool:
         return self.position < self.block_records
 
-    def __next__(self) -> AvroStruct:
+    def __next__(self) -> Record:
         if self.has_next():
             self.position += 1
             return self.reader.read(self.block_decoder)
@@ -168,7 +169,7 @@ class AvroFile:
         )
         return block_records
 
-    def __next__(self) -> AvroStruct:
+    def __next__(self) -> Record:
         if self.block and self.block.has_next():
             return next(self.block)
 
diff --git a/python/pyiceberg/avro/reader.py b/python/pyiceberg/avro/reader.py
index 6ea2b07ced..136498ed34 100644
--- a/python/pyiceberg/avro/reader.py
+++ b/python/pyiceberg/avro/reader.py
@@ -43,7 +43,7 @@ from uuid import UUID
 
 from pyiceberg.avro.decoder import BinaryDecoder
 from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType
-from pyiceberg.typedef import StructProtocol
+from pyiceberg.typedef import Record, StructProtocol
 from pyiceberg.types import (
     BinaryType,
     BooleanType,
@@ -103,17 +103,6 @@ def _skip_map_array(decoder: BinaryDecoder, skip_entry: 
Callable[[], None]) -> N
         block_count = decoder.read_int()
 
 
-@dataclass(frozen=True)
-class AvroStruct(StructProtocol):
-    _data: List[Union[Any, StructProtocol]] = dataclassfield()
-
-    def set(self, pos: int, value: Any) -> None:
-        self._data[pos] = value
-
-    def get(self, pos: int) -> Any:
-        return self._data[pos]
-
-
 class Reader(Singleton):
     @abstractmethod
     def read(self, decoder: BinaryDecoder) -> Any:
@@ -275,7 +264,7 @@ class OptionReader(Reader):
 class StructReader(Reader):
     fields: Tuple[Tuple[Optional[int], Reader], ...] = dataclassfield()
 
-    def read(self, decoder: BinaryDecoder) -> AvroStruct:
+    def read(self, decoder: BinaryDecoder) -> Record:
         result: List[Union[Any, StructProtocol]] = [None] * len(self.fields)
         for (pos, field) in self.fields:
             if pos is not None:
@@ -283,7 +272,7 @@ class StructReader(Reader):
             else:
                 field.skip(decoder)
 
-        return AvroStruct(result)
+        return Record(*result)
 
     def skip(self, decoder: BinaryDecoder) -> None:
         for _, field in self.fields:
diff --git a/python/pyiceberg/manifest.py b/python/pyiceberg/manifest.py
index 75ec0b6a14..9f7f394159 100644
--- a/python/pyiceberg/manifest.py
+++ b/python/pyiceberg/manifest.py
@@ -28,9 +28,9 @@ from typing import (
 from pydantic import Field
 
 from pyiceberg.avro.file import AvroFile
-from pyiceberg.avro.reader import AvroStruct
 from pyiceberg.io import FileIO, InputFile
 from pyiceberg.schema import Schema
+from pyiceberg.typedef import Record
 from pyiceberg.types import (
     IcebergType,
     ListType,
@@ -158,14 +158,14 @@ def read_manifest_list(input_file: InputFile) -> 
Iterator[ManifestFile]:
 
 
 @singledispatch
-def _convert_pos_to_dict(schema: Union[Schema, IcebergType], struct: 
AvroStruct) -> Dict[str, Any]:
+def _convert_pos_to_dict(schema: Union[Schema, IcebergType], struct: Record) 
-> Dict[str, Any]:
     """Converts the positions in the field names
 
     This makes it easy to map it onto a Pydantic model. Might change later on 
depending on the performance
 
      Args:
          schema (Schema | IcebergType): The schema of the file
-         struct (AvroStruct): The struct containing the data by positions
+         struct (Record): The struct containing the data by positions
 
      Raises:
          NotImplementedError: If attempting to handle an unknown type in the 
schema
@@ -174,12 +174,12 @@ def _convert_pos_to_dict(schema: Union[Schema, 
IcebergType], struct: AvroStruct)
 
 
 @_convert_pos_to_dict.register
-def _(schema: Schema, struct: AvroStruct) -> Dict[str, Any]:
+def _(schema: Schema, struct: Record) -> Dict[str, Any]:
     return _convert_pos_to_dict(schema.as_struct(), struct)
 
 
 @_convert_pos_to_dict.register
-def _(struct_type: StructType, values: AvroStruct) -> Dict[str, Any]:
+def _(struct_type: StructType, values: Record) -> Dict[str, Any]:
     """Iterates over all the fields in the dict, and gets the data from the 
struct"""
     return (
         {field.name: _convert_pos_to_dict(field.field_type, values.get(pos)) 
for pos, field in enumerate(struct_type.fields)}
diff --git a/python/pyiceberg/typedef.py b/python/pyiceberg/typedef.py
index 436b1a6e79..6ad668f006 100644
--- a/python/pyiceberg/typedef.py
+++ b/python/pyiceberg/typedef.py
@@ -14,12 +14,15 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from __future__ import annotations
+
 from abc import abstractmethod
 from decimal import Decimal
 from typing import (
     Any,
     Callable,
     Dict,
+    List,
     Protocol,
     Tuple,
     TypeVar,
@@ -78,3 +81,20 @@ class StructProtocol(Protocol):  # pragma: no cover
     @abstractmethod
     def set(self, pos: int, value: Any) -> None:
         ...
+
+
+class Record(StructProtocol):
+    _data: List[Union[Any, StructProtocol]]
+
+    def __init__(self, *data: Union[Any, StructProtocol]) -> None:
+        self._data = list(data)
+
+    def set(self, pos: int, value: Any) -> None:
+        self._data[pos] = value
+
+    def get(self, pos: int) -> Any:
+        return self._data[pos]
+
+    def __eq__(self, other: Any) -> bool:
+        # For testing
+        return True if isinstance(other, Record) and other._data == self._data 
else False
diff --git a/python/tests/avro/test_reader.py b/python/tests/avro/test_reader.py
index fc37f9f9a3..2528882333 100644
--- a/python/tests/avro/test_reader.py
+++ b/python/tests/avro/test_reader.py
@@ -21,7 +21,6 @@ import pytest
 
 from pyiceberg.avro.file import AvroFile
 from pyiceberg.avro.reader import (
-    AvroStruct,
     BinaryReader,
     BooleanReader,
     ConstructReader,
@@ -37,8 +36,10 @@ from pyiceberg.avro.reader import (
     TimestamptzReader,
     UUIDReader,
 )
+from pyiceberg.io.pyarrow import PyArrowFileIO
 from pyiceberg.manifest import _convert_pos_to_dict
 from pyiceberg.schema import Schema, visit
+from pyiceberg.typedef import Record
 from pyiceberg.types import (
     BinaryType,
     BooleanType,
@@ -60,11 +61,10 @@ from pyiceberg.types import (
     TimeType,
     UUIDType,
 )
-from tests.io.test_io import LocalInputFile
 
 
 def test_read_header(generated_manifest_entry_file: str, 
iceberg_manifest_entry_schema: Schema) -> None:
-    with AvroFile(LocalInputFile(generated_manifest_entry_file)) as reader:
+    with AvroFile(PyArrowFileIO().new_input(generated_manifest_entry_file)) as 
reader:
         header = reader._read_header()
 
     assert header.magic == b"Obj\x01"
@@ -257,147 +257,141 @@ def test_read_header(generated_manifest_entry_file: 
str, iceberg_manifest_entry_
 
 
 def test_read_manifest_entry_file(generated_manifest_entry_file: str) -> None:
-    with AvroFile(LocalInputFile(generated_manifest_entry_file)) as reader:
+    with AvroFile(PyArrowFileIO().new_input(generated_manifest_entry_file)) as 
reader:
         # Consume the generator
         records = list(reader)
 
     assert len(records) == 2, f"Expected 2 records, got {len(records)}"
-    assert records[0] == AvroStruct(
-        _data=[
-            1,
-            8744736658442914487,
-            AvroStruct(
-                _data=[
-                    
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet",
-                    "PARQUET",
-                    AvroStruct(_data=[None]),
-                    19513,
-                    388872,
-                    67108864,
-                    {
-                        1: 53,
-                        2: 98153,
-                        3: 98693,
-                        4: 53,
-                        5: 53,
-                        6: 53,
-                        7: 17425,
-                        8: 18528,
-                        9: 53,
-                        10: 44788,
-                        11: 35571,
-                        12: 53,
-                        13: 1243,
-                        14: 2355,
-                        15: 12750,
-                        16: 4029,
-                        17: 110,
-                        18: 47194,
-                        19: 2948,
-                    },
-                    {
-                        1: 19513,
-                        2: 19513,
-                        3: 19513,
-                        4: 19513,
-                        5: 19513,
-                        6: 19513,
-                        7: 19513,
-                        8: 19513,
-                        9: 19513,
-                        10: 19513,
-                        11: 19513,
-                        12: 19513,
-                        13: 19513,
-                        14: 19513,
-                        15: 19513,
-                        16: 19513,
-                        17: 19513,
-                        18: 19513,
-                        19: 19513,
-                    },
-                    {
-                        1: 19513,
-                        2: 0,
-                        3: 0,
-                        4: 19513,
-                        5: 19513,
-                        6: 19513,
-                        7: 0,
-                        8: 0,
-                        9: 19513,
-                        10: 0,
-                        11: 0,
-                        12: 19513,
-                        13: 0,
-                        14: 0,
-                        15: 0,
-                        16: 0,
-                        17: 0,
-                        18: 0,
-                        19: 0,
-                    },
-                    {16: 0, 17: 0, 18: 0, 19: 0, 10: 0, 11: 0, 12: 0, 13: 0, 
14: 0, 15: 0},
-                    {
-                        2: b"2020-04-01 00:00",
-                        3: b"2020-04-01 00:12",
-                        7: b"\x03\x00\x00\x00",
-                        8: b"\x01\x00\x00\x00",
-                        10: b"\xf6(\\\x8f\xc2\x05S\xc0",
-                        11: b"\x00\x00\x00\x00\x00\x00\x00\x00",
-                        13: b"\x00\x00\x00\x00\x00\x00\x00\x00",
-                        14: b"\x00\x00\x00\x00\x00\x00\xe0\xbf",
-                        15: b")\\\x8f\xc2\xf5(\x08\xc0",
-                        16: b"\x00\x00\x00\x00\x00\x00\x00\x00",
-                        17: b"\x00\x00\x00\x00\x00\x00\x00\x00",
-                        18: b"\xf6(\\\x8f\xc2\xc5S\xc0",
-                        19: b"\x00\x00\x00\x00\x00\x00\x04\xc0",
-                    },
-                    {
-                        2: b"2020-04-30 23:5:",
-                        3: b"2020-05-01 00:41",
-                        7: b"\t\x01\x00\x00",
-                        8: b"\t\x01\x00\x00",
-                        10: b"\xcd\xcc\xcc\xcc\xcc,_@",
-                        11: b"\x1f\x85\xebQ\\\xe2\xfe@",
-                        13: b"\x00\x00\x00\x00\x00\x00\x12@",
-                        14: b"\x00\x00\x00\x00\x00\x00\xe0?",
-                        15: b"q=\n\xd7\xa3\xf01@",
-                        16: b"\x00\x00\x00\x00\x00`B@",
-                        17: b"333333\xd3?",
-                        18: b"\x00\x00\x00\x00\x00\x18b@",
-                        19: b"\x00\x00\x00\x00\x00\x00\x04@",
-                    },
-                    None,
-                    [4],
-                    0,
-                ]
-            ),
-        ]
+    assert records[0] == Record(
+        1,
+        8744736658442914487,
+        Record(
+            
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet",
+            "PARQUET",
+            Record(None),
+            19513,
+            388872,
+            67108864,
+            {
+                1: 53,
+                2: 98153,
+                3: 98693,
+                4: 53,
+                5: 53,
+                6: 53,
+                7: 17425,
+                8: 18528,
+                9: 53,
+                10: 44788,
+                11: 35571,
+                12: 53,
+                13: 1243,
+                14: 2355,
+                15: 12750,
+                16: 4029,
+                17: 110,
+                18: 47194,
+                19: 2948,
+            },
+            {
+                1: 19513,
+                2: 19513,
+                3: 19513,
+                4: 19513,
+                5: 19513,
+                6: 19513,
+                7: 19513,
+                8: 19513,
+                9: 19513,
+                10: 19513,
+                11: 19513,
+                12: 19513,
+                13: 19513,
+                14: 19513,
+                15: 19513,
+                16: 19513,
+                17: 19513,
+                18: 19513,
+                19: 19513,
+            },
+            {
+                1: 19513,
+                2: 0,
+                3: 0,
+                4: 19513,
+                5: 19513,
+                6: 19513,
+                7: 0,
+                8: 0,
+                9: 19513,
+                10: 0,
+                11: 0,
+                12: 19513,
+                13: 0,
+                14: 0,
+                15: 0,
+                16: 0,
+                17: 0,
+                18: 0,
+                19: 0,
+            },
+            {16: 0, 17: 0, 18: 0, 19: 0, 10: 0, 11: 0, 12: 0, 13: 0, 14: 0, 
15: 0},
+            {
+                2: b"2020-04-01 00:00",
+                3: b"2020-04-01 00:12",
+                7: b"\x03\x00\x00\x00",
+                8: b"\x01\x00\x00\x00",
+                10: b"\xf6(\\\x8f\xc2\x05S\xc0",
+                11: b"\x00\x00\x00\x00\x00\x00\x00\x00",
+                13: b"\x00\x00\x00\x00\x00\x00\x00\x00",
+                14: b"\x00\x00\x00\x00\x00\x00\xe0\xbf",
+                15: b")\\\x8f\xc2\xf5(\x08\xc0",
+                16: b"\x00\x00\x00\x00\x00\x00\x00\x00",
+                17: b"\x00\x00\x00\x00\x00\x00\x00\x00",
+                18: b"\xf6(\\\x8f\xc2\xc5S\xc0",
+                19: b"\x00\x00\x00\x00\x00\x00\x04\xc0",
+            },
+            {
+                2: b"2020-04-30 23:5:",
+                3: b"2020-05-01 00:41",
+                7: b"\t\x01\x00\x00",
+                8: b"\t\x01\x00\x00",
+                10: b"\xcd\xcc\xcc\xcc\xcc,_@",
+                11: b"\x1f\x85\xebQ\\\xe2\xfe@",
+                13: b"\x00\x00\x00\x00\x00\x00\x12@",
+                14: b"\x00\x00\x00\x00\x00\x00\xe0?",
+                15: b"q=\n\xd7\xa3\xf01@",
+                16: b"\x00\x00\x00\x00\x00`B@",
+                17: b"333333\xd3?",
+                18: b"\x00\x00\x00\x00\x00\x18b@",
+                19: b"\x00\x00\x00\x00\x00\x00\x04@",
+            },
+            None,
+            [4],
+            0,
+        ),
     )
 
 
 def test_read_manifest_file_file(generated_manifest_file_file: str) -> None:
-    with AvroFile(LocalInputFile(generated_manifest_file_file)) as reader:
+    with AvroFile(PyArrowFileIO().new_input(generated_manifest_file_file)) as 
reader:
         # Consume the generator
         records = list(reader)
 
     assert len(records) == 1, f"Expected 1 records, got {len(records)}"
     actual = records[0]
-    expected = AvroStruct(
-        _data=[
-            actual.get(0),
-            7989,
-            0,
-            9182715666859759686,
-            3,
-            0,
-            0,
-            [AvroStruct(_data=[True, False, b"\x01\x00\x00\x00", 
b"\x02\x00\x00\x00"])],
-            237993,
-            0,
-            0,
-        ]
+    expected = Record(
+        actual.get(0),
+        7989,
+        0,
+        9182715666859759686,
+        3,
+        0,
+        0,
+        [Record(True, False, b"\x01\x00\x00\x00", b"\x02\x00\x00\x00")],
+        237993,
+        0,
+        0,
     )
     assert actual == expected
 
@@ -407,7 +401,7 @@ def test_null_list_convert_pos_to_dict() -> None:
         Schema(
             NestedField(name="field", field_id=1, 
field_type=ListType(element_id=2, element=StringType(), element_required=False))
         ),
-        AvroStruct([None]),
+        Record(None),
     )
     assert data["field"] is None
 
@@ -421,7 +415,7 @@ def test_null_dict_convert_pos_to_dict() -> None:
                 field_type=MapType(key_id=2, key_type=StringType(), 
value_id=3, value_type=StringType(), value_required=False),
             )
         ),
-        AvroStruct([None]),
+        Record(None),
     )
     assert data["field"] is None
 
@@ -438,7 +432,7 @@ def test_null_struct_convert_pos_to_dict() -> None:
                 required=False,
             )
         ),
-        AvroStruct([None]),
+        Record(None),
     )
     assert data["field"] is None
 
diff --git a/python/tests/catalog/test_base.py 
b/python/tests/catalog/test_base.py
index ab9557448e..7281ff839c 100644
--- a/python/tests/catalog/test_base.py
+++ b/python/tests/catalog/test_base.py
@@ -42,10 +42,10 @@ from pyiceberg.io import load_file_io
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, 
PartitionField, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.table import Table
+from pyiceberg.table.metadata import TableMetadataV1
 from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
 from pyiceberg.transforms import IdentityTransform
 from pyiceberg.typedef import EMPTY_DICT
-from tests.table.test_metadata import EXAMPLE_TABLE_METADATA_V1
 
 
 class InMemoryCatalog(Catalog):
@@ -80,7 +80,27 @@ class InMemoryCatalog(Catalog):
 
             table = Table(
                 identifier=identifier,
-                metadata=EXAMPLE_TABLE_METADATA_V1,
+                metadata=TableMetadataV1(
+                    **{
+                        "format-version": 1,
+                        "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
+                        "location": "s3://bucket/test/location",
+                        "last-updated-ms": 1602638573874,
+                        "last-column-id": 3,
+                        "schema": {
+                            "type": "struct",
+                            "fields": [
+                                {"id": 1, "name": "x", "required": True, 
"type": "long"},
+                                {"id": 2, "name": "y", "required": True, 
"type": "long", "doc": "comment"},
+                                {"id": 3, "name": "z", "required": True, 
"type": "long"},
+                            ],
+                        },
+                        "partition-spec": [{"name": "x", "transform": 
"identity", "source-id": 1, "field-id": 1000}],
+                        "properties": {},
+                        "current-snapshot-id": -1,
+                        "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 
1602638573822}],
+                    }
+                ),
                 
metadata_location=f's3://warehouse/{"/".join(identifier)}/metadata/metadata.json',
                 io=load_file_io(),
             )
diff --git a/python/tests/catalog/test_hive.py 
b/python/tests/catalog/test_hive.py
index f9a47e516c..1cb7cba04b 100644
--- a/python/tests/catalog/test_hive.py
+++ b/python/tests/catalog/test_hive.py
@@ -42,6 +42,7 @@ from pyiceberg.exceptions import (
     NoSuchNamespaceError,
     NoSuchTableError,
 )
+from pyiceberg.io.pyarrow import PyArrowFileIO
 from pyiceberg.partitioning import PartitionField, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.serializers import ToOutputFile
@@ -68,7 +69,6 @@ from pyiceberg.types import (
     NestedField,
     StringType,
 )
-from tests.conftest import LocalFileIO
 
 HIVE_CATALOG_NAME = "hive"
 HIVE_METASTORE_FAKE_URL = "thrift://unknown:9083"
@@ -78,7 +78,8 @@ HIVE_METASTORE_FAKE_URL = "thrift://unknown:9083"
 def hive_table(tmp_path_factory: pytest.TempPathFactory, 
example_table_metadata_v2: Dict[str, Any]) -> HiveTable:
     metadata_path = str(tmp_path_factory.mktemp("metadata") / 
f"{uuid.uuid4()}.metadata.json")
     metadata = TableMetadataV2(**example_table_metadata_v2)
-    ToOutputFile.table_metadata(metadata, 
LocalFileIO().new_output(str(metadata_path)), True)
+
+    ToOutputFile.table_metadata(metadata, 
PyArrowFileIO().new_output(location=str(metadata_path)), True)
 
     return HiveTable(
         tableName="new_tabl2e",
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index fffc50547f..f809cfc503 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -31,8 +31,6 @@ from typing import (
     Callable,
     Dict,
     Generator,
-    Type,
-    Union,
 )
 from unittest.mock import MagicMock
 from urllib.parse import urlparse
@@ -49,14 +47,9 @@ import pytest
 from moto import mock_glue, mock_s3
 
 from pyiceberg import schema
-from pyiceberg.io import (
-    FileIO,
-    InputFile,
-    OutputFile,
-    OutputStream,
-    fsspec,
-)
+from pyiceberg.io import OutputFile, OutputStream, fsspec
 from pyiceberg.io.fsspec import FsspecFileIO
+from pyiceberg.io.pyarrow import PyArrowFile, PyArrowFileIO
 from pyiceberg.schema import Schema
 from pyiceberg.types import (
     BinaryType,
@@ -72,7 +65,6 @@ from pyiceberg.types import (
     StructType,
 )
 from tests.catalog.test_base import InMemoryCatalog
-from tests.io.test_io import LocalInputFile
 
 
 def pytest_addoption(parser: pytest.Parser) -> None:
@@ -85,21 +77,6 @@ def pytest_addoption(parser: pytest.Parser) -> None:
     )
 
 
-class FooStruct:
-    """An example of an object that abides by StructProtocol"""
-
-    content: Dict[int, Any]
-
-    def __init__(self) -> None:
-        self.content = {}
-
-    def get(self, pos: int) -> Any:
-        return self.content[pos]
-
-    def set(self, pos: int, value: Any) -> None:
-        self.content[pos] = value
-
-
 @pytest.fixture(scope="session")
 def table_schema_simple() -> Schema:
     return schema.Schema(
@@ -162,11 +139,6 @@ def table_schema_nested() -> Schema:
     )
 
 
[email protected](scope="session")
-def foo_struct() -> FooStruct:
-    return FooStruct()
-
-
 @pytest.fixture(scope="session")
 def all_avro_types() -> Dict[str, Any]:
     return {
@@ -922,8 +894,8 @@ class LocalOutputFile(OutputFile):
     def exists(self) -> bool:
         return os.path.exists(self._path)
 
-    def to_input_file(self) -> LocalInputFile:
-        return LocalInputFile(location=self.location)
+    def to_input_file(self) -> PyArrowFile:
+        return PyArrowFileIO().new_input(location=self.location)
 
     def create(self, overwrite: bool = False) -> OutputStream:
         output_file = open(self._path, "wb" if overwrite else "xb")
@@ -932,25 +904,6 @@ class LocalOutputFile(OutputFile):
         return output_file
 
 
-class LocalFileIO(FileIO):
-    """A FileIO implementation for local files (for test use only)"""
-
-    def new_input(self, location: str) -> LocalInputFile:
-        return LocalInputFile(location=location)
-
-    def new_output(self, location: str) -> LocalOutputFile:
-        return LocalOutputFile(location=location)
-
-    def delete(self, location: Union[str, InputFile, OutputFile]) -> None:
-        location = location.location if isinstance(location, (InputFile, 
OutputFile)) else location
-        os.remove(location)
-
-
[email protected](scope="session", autouse=True)
-def LocalFileIOFixture() -> Type[LocalFileIO]:
-    return LocalFileIO
-
-
 @pytest.fixture(scope="session")
 def generated_manifest_entry_file(avro_schema_manifest_entry: Dict[str, Any]) 
-> Generator[str, None, None]:
     from fastavro import parse_schema, writer
diff --git a/python/tests/expressions/test_evaluator.py 
b/python/tests/expressions/test_evaluator.py
index 4d4025a268..95766b0024 100644
--- a/python/tests/expressions/test_evaluator.py
+++ b/python/tests/expressions/test_evaluator.py
@@ -14,8 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from typing import Any, List
-
 from pyiceberg.expressions import (
     AlwaysFalse,
     AlwaysTrue,
@@ -37,7 +35,7 @@ from pyiceberg.expressions import (
 )
 from pyiceberg.expressions.visitors import expression_evaluator
 from pyiceberg.schema import Schema
-from pyiceberg.typedef import StructProtocol
+from pyiceberg.typedef import Record
 from pyiceberg.types import (
     DoubleType,
     LongType,
@@ -45,20 +43,6 @@ from pyiceberg.types import (
     StringType,
 )
 
-
-class Record(StructProtocol):
-    data: List[Any]
-
-    def __init__(self, *values: Any) -> None:
-        self.data = list(values)
-
-    def get(self, pos: int) -> Any:
-        return self.data[pos]
-
-    def set(self, pos: int, value: Any) -> None:
-        self.data[pos] = value
-
-
 SIMPLE_SCHEMA = Schema(
     NestedField(id=1, name="id", field_type=LongType()), NestedField(id=2, 
name="data", field_type=StringType(), required=False)
 )
diff --git a/python/tests/expressions/test_expressions.py 
b/python/tests/expressions/test_expressions.py
index 233b80a385..7b26d45a28 100644
--- a/python/tests/expressions/test_expressions.py
+++ b/python/tests/expressions/test_expressions.py
@@ -61,6 +61,7 @@ from pyiceberg.expressions import (
 from pyiceberg.expressions.literals import Literal, literal
 from pyiceberg.expressions.visitors import _from_byte_buffer
 from pyiceberg.schema import Accessor, Schema
+from pyiceberg.typedef import Record
 from pyiceberg.types import (
     DoubleType,
     FloatType,
@@ -70,8 +71,29 @@ from pyiceberg.types import (
     NestedField,
     StringType,
 )
-from tests.conftest import FooStruct
-from tests.expressions.test_visitors import ExpressionA, ExpressionB
+from pyiceberg.utils.singleton import Singleton
+
+
+class ExpressionA(BooleanExpression, Singleton):
+    def __invert__(self) -> BooleanExpression:
+        return ExpressionB()
+
+    def __repr__(self) -> str:
+        return "ExpressionA()"
+
+    def __str__(self) -> str:
+        return "testexpra"
+
+
+class ExpressionB(BooleanExpression, Singleton):
+    def __invert__(self) -> BooleanExpression:
+        return ExpressionA()
+
+    def __repr__(self) -> str:
+        return "ExpressionB()"
+
+    def __str__(self) -> str:
+        return "testexprb"
 
 
 def test_isnull_inverse() -> None:
@@ -580,36 +602,38 @@ def test_invert_always() -> None:
     assert ~AlwaysTrue() == AlwaysFalse()
 
 
-def test_accessor_base_class(foo_struct: FooStruct) -> None:
+def test_accessor_base_class() -> None:
     """Test retrieving a value at a position of a container using an 
accessor"""
 
+    struct = Record(*([None] * 12))
+
     uuid_value = uuid.uuid4()
 
-    foo_struct.set(0, "foo")
-    foo_struct.set(1, "bar")
-    foo_struct.set(2, "baz")
-    foo_struct.set(3, 1)
-    foo_struct.set(4, 2)
-    foo_struct.set(5, 3)
-    foo_struct.set(6, 1.234)
-    foo_struct.set(7, Decimal("1.234"))
-    foo_struct.set(8, uuid_value)
-    foo_struct.set(9, True)
-    foo_struct.set(10, False)
-    foo_struct.set(11, b"\x19\x04\x9e?")
-
-    assert Accessor(position=0).get(foo_struct) == "foo"
-    assert Accessor(position=1).get(foo_struct) == "bar"
-    assert Accessor(position=2).get(foo_struct) == "baz"
-    assert Accessor(position=3).get(foo_struct) == 1
-    assert Accessor(position=4).get(foo_struct) == 2
-    assert Accessor(position=5).get(foo_struct) == 3
-    assert Accessor(position=6).get(foo_struct) == 1.234
-    assert Accessor(position=7).get(foo_struct) == Decimal("1.234")
-    assert Accessor(position=8).get(foo_struct) == uuid_value
-    assert Accessor(position=9).get(foo_struct) is True
-    assert Accessor(position=10).get(foo_struct) is False
-    assert Accessor(position=11).get(foo_struct) == b"\x19\x04\x9e?"
+    struct.set(0, "foo")
+    struct.set(1, "bar")
+    struct.set(2, "baz")
+    struct.set(3, 1)
+    struct.set(4, 2)
+    struct.set(5, 3)
+    struct.set(6, 1.234)
+    struct.set(7, Decimal("1.234"))
+    struct.set(8, uuid_value)
+    struct.set(9, True)
+    struct.set(10, False)
+    struct.set(11, b"\x19\x04\x9e?")
+
+    assert Accessor(position=0).get(struct) == "foo"
+    assert Accessor(position=1).get(struct) == "bar"
+    assert Accessor(position=2).get(struct) == "baz"
+    assert Accessor(position=3).get(struct) == 1
+    assert Accessor(position=4).get(struct) == 2
+    assert Accessor(position=5).get(struct) == 3
+    assert Accessor(position=6).get(struct) == 1.234
+    assert Accessor(position=7).get(struct) == Decimal("1.234")
+    assert Accessor(position=8).get(struct) == uuid_value
+    assert Accessor(position=9).get(struct) is True
+    assert Accessor(position=10).get(struct) is False
+    assert Accessor(position=11).get(struct) == b"\x19\x04\x9e?"
 
 
 @pytest.fixture
@@ -876,11 +900,13 @@ def test_less_than_or_equal() -> None:
     assert less_than_or_equal == eval(repr(less_than_or_equal))
 
 
-def test_bound_reference_eval(table_schema_simple: Schema, foo_struct: 
FooStruct) -> None:
+def test_bound_reference_eval(table_schema_simple: Schema) -> None:
     """Test creating a BoundReference and evaluating it on a StructProtocol"""
-    foo_struct.set(pos=1, value="foovalue")
-    foo_struct.set(pos=2, value=123)
-    foo_struct.set(pos=3, value=True)
+    struct = Record(None, None, None, None)
+
+    struct.set(pos=1, value="foovalue")
+    struct.set(pos=2, value=123)
+    struct.set(pos=3, value=True)
 
     position1_accessor = Accessor(position=1)
     position2_accessor = Accessor(position=2)
@@ -894,9 +920,9 @@ def test_bound_reference_eval(table_schema_simple: Schema, 
foo_struct: FooStruct
     bound_ref2 = BoundReference(field=field2, accessor=position2_accessor)
     bound_ref3 = BoundReference(field=field3, accessor=position3_accessor)
 
-    assert bound_ref1.eval(foo_struct) == "foovalue"
-    assert bound_ref2.eval(foo_struct) == 123
-    assert bound_ref3.eval(foo_struct) is True
+    assert bound_ref1.eval(struct) == "foovalue"
+    assert bound_ref2.eval(struct) == 123
+    assert bound_ref3.eval(struct) is True
 
 
 def test_non_primitive_from_byte_buffer() -> None:
diff --git a/python/tests/expressions/test_visitors.py 
b/python/tests/expressions/test_visitors.py
index e179ccf05f..19915cc6b9 100644
--- a/python/tests/expressions/test_visitors.py
+++ b/python/tests/expressions/test_visitors.py
@@ -79,29 +79,6 @@ from pyiceberg.types import (
     PrimitiveType,
     StringType,
 )
-from pyiceberg.utils.singleton import Singleton
-
-
-class ExpressionA(BooleanExpression, Singleton):
-    def __invert__(self) -> BooleanExpression:
-        return ExpressionB()
-
-    def __repr__(self) -> str:
-        return "ExpressionA()"
-
-    def __str__(self) -> str:
-        return "testexpra"
-
-
-class ExpressionB(BooleanExpression, Singleton):
-    def __invert__(self) -> BooleanExpression:
-        return ExpressionA()
-
-    def __repr__(self) -> str:
-        return "ExpressionB()"
-
-    def __str__(self) -> str:
-        return "testexprb"
 
 
 class ExampleVisitor(BooleanExpressionVisitor[List[str]]):
@@ -135,33 +112,13 @@ class ExampleVisitor(BooleanExpressionVisitor[List[str]]):
         return self.visit_history
 
     def visit_unbound_predicate(self, predicate: UnboundPredicate[Any]) -> 
List[str]:
-        self.visit_history.append("UNBOUND PREDICATE")
+        self.visit_history.append(str(predicate.__class__.__name__).upper())
         return self.visit_history
 
     def visit_bound_predicate(self, predicate: BoundPredicate[Any]) -> 
List[str]:
-        self.visit_history.append("BOUND PREDICATE")
+        self.visit_history.append(str(predicate.__class__.__name__).upper())
         return self.visit_history
 
-    def visit_test_expression_a(self) -> List[str]:
-        self.visit_history.append("ExpressionA")
-        return self.visit_history
-
-    def visit_test_expression_b(self) -> List[str]:
-        self.visit_history.append("ExpressionB")
-        return self.visit_history
-
-
[email protected](ExpressionA)
-def _(obj: ExpressionA, visitor: ExampleVisitor) -> List[str]:
-    """Visit a ExpressionA with a BooleanExpressionVisitor"""
-    return visitor.visit_test_expression_a()
-
-
[email protected](ExpressionB)
-def _(obj: ExpressionB, visitor: ExampleVisitor) -> List[str]:
-    """Visit a ExpressionB with a BooleanExpressionVisitor"""
-    return visitor.visit_test_expression_b()
-
 
 class 
FooBoundBooleanExpressionVisitor(BoundBooleanExpressionVisitor[List[str]]):
     """A test implementation of a BoundBooleanExpressionVisitor
@@ -250,26 +207,26 @@ class 
FooBoundBooleanExpressionVisitor(BoundBooleanExpressionVisitor[List[str]])
 def test_boolean_expression_visitor() -> None:
     """Test post-order traversal of boolean expression visit method"""
     expr = And(
-        Or(Not(ExpressionA()), Not(ExpressionB()), ExpressionA(), 
ExpressionB()),
-        Not(ExpressionA()),
-        ExpressionB(),
+        Or(Not(EqualTo("a", 1)), Not(NotEqualTo("b", 0)), EqualTo("a", 1), 
NotEqualTo("b", 0)),
+        Not(EqualTo("a", 1)),
+        NotEqualTo("b", 0),
     )
     visitor = ExampleVisitor()
     result = visit(expr, visitor=visitor)
     assert result == [
-        "ExpressionA",
+        "EQUALTO",
         "NOT",
-        "ExpressionB",
+        "NOTEQUALTO",
         "NOT",
         "OR",
-        "ExpressionA",
+        "EQUALTO",
         "OR",
-        "ExpressionB",
+        "NOTEQUALTO",
         "OR",
-        "ExpressionA",
+        "EQUALTO",
         "NOT",
         "AND",
-        "ExpressionB",
+        "NOTEQUALTO",
         "AND",
     ]
 
diff --git a/python/tests/io/test_fsspec.py b/python/tests/io/test_fsspec.py
index 22cad25ea4..fff308b212 100644
--- a/python/tests/io/test_fsspec.py
+++ b/python/tests/io/test_fsspec.py
@@ -16,7 +16,6 @@
 # under the License.
 
 import uuid
-from typing import Generator
 
 import pytest
 from botocore.awsrequest import AWSRequest
@@ -25,12 +24,12 @@ from requests_mock import Mocker
 from pyiceberg.exceptions import SignError
 from pyiceberg.io import fsspec
 from pyiceberg.io.fsspec import FsspecFileIO, s3v4_rest_signer
-from tests.io.test_io import LocalInputFile
+from pyiceberg.io.pyarrow import PyArrowFileIO
 
 
 @pytest.mark.s3
 def test_fsspec_new_input_file(fsspec_fileio: FsspecFileIO) -> None:
-    """Test creating a new input file from an fsspec file-io"""
+    """Test creating a new input file from a fsspec file-io"""
     filename = str(uuid.uuid4())
 
     input_file = fsspec_fileio.new_input(f"s3://warehouse/{filename}")
@@ -193,10 +192,10 @@ def 
test_fsspec_converting_an_outputfile_to_an_inputfile(fsspec_fileio: FsspecFi
 
 
 @pytest.mark.s3
-def test_writing_avro_file(generated_manifest_entry_file: Generator[str, None, 
None], fsspec_fileio: FsspecFileIO) -> None:
+def test_writing_avro_file(generated_manifest_entry_file: str, fsspec_fileio: 
FsspecFileIO) -> None:
     """Test that bytes match when reading a local avro file, writing it using 
fsspec file-io, and then reading it again"""
     filename = str(uuid.uuid4())
-    with LocalInputFile(generated_manifest_entry_file).open() as f:
+    with 
PyArrowFileIO().new_input(location=generated_manifest_entry_file).open() as f:
         b1 = f.read()
         with 
fsspec_fileio.new_output(location=f"s3://warehouse/{filename}").create() as 
out_f:
             out_f.write(b1)
diff --git a/python/tests/io/test_io.py b/python/tests/io/test_io.py
index a83cba1af3..6cb5dbc7d3 100644
--- a/python/tests/io/test_io.py
+++ b/python/tests/io/test_io.py
@@ -17,20 +17,12 @@
 
 import os
 import tempfile
-from typing import Type, Union
-from unittest.mock import patch
-from urllib.parse import ParseResult, urlparse
 
 import pytest
 
 from pyiceberg.io import (
     ARROW_FILE_IO,
     PY_IO_IMPL,
-    FileIO,
-    InputFile,
-    InputStream,
-    OutputFile,
-    OutputStream,
     _import_file_io,
     load_file_io,
 )
@@ -38,102 +30,7 @@ from pyiceberg.io.fsspec import FsspecFileIO
 from pyiceberg.io.pyarrow import PyArrowFileIO
 
 
-class LocalInputFile(InputFile):
-    """An InputFile implementation for local files (for test use only)"""
-
-    def __init__(self, location: str) -> None:
-
-        parsed_location = urlparse(location)  # Create a ParseResult from the 
uri
-        if parsed_location.scheme and parsed_location.scheme != "file":  # 
Validate that a uri is provided with a scheme of `file`
-            raise ValueError("LocalInputFile location must have a scheme of 
`file`")
-        elif parsed_location.netloc:
-            raise ValueError(f"Network location is not allowed for 
LocalInputFile: {parsed_location.netloc}")
-
-        super().__init__(location=location)
-        self._parsed_location = parsed_location
-
-    @property
-    def parsed_location(self) -> ParseResult:
-        """The parsed location
-
-        Returns:
-            ParseResult: The parsed results which has attributes `scheme`, 
`netloc`, `path`,
-            `params`, `query`, and `fragments`.
-        """
-        return self._parsed_location
-
-    def __len__(self) -> int:
-        return os.path.getsize(self.parsed_location.path)
-
-    def exists(self) -> bool:
-        return os.path.exists(self.parsed_location.path)
-
-    def open(self) -> InputStream:
-        input_file = open(self.parsed_location.path, "rb")
-        if not isinstance(input_file, InputStream):
-            raise TypeError("Object returned from LocalInputFile.open() does 
not match the OutputStream protocol.")
-        return input_file
-
-
-class LocalOutputFile(OutputFile):
-    """An OutputFile implementation for local files (for test use only)"""
-
-    def __init__(self, location: str) -> None:
-        parsed_location = urlparse(location)  # Create a ParseResult from the 
uri
-        if parsed_location.scheme and parsed_location.scheme != "file":  # 
Validate that a uri is provided with a scheme of `file`
-            raise ValueError("LocalOutputFile location must have a scheme of 
`file`")
-        elif parsed_location.netloc:
-            raise ValueError(f"Network location is not allowed for 
LocalOutputFile: {parsed_location.netloc}")
-
-        super().__init__(location=location)
-        self._parsed_location = parsed_location
-
-    @property
-    def parsed_location(self) -> ParseResult:
-        """The parsed location
-
-        Returns:
-            ParseResult: The parsed results which has attributes `scheme`, 
`netloc`, `path`,
-            `params`, `query`, and `fragments`.
-        """
-        return self._parsed_location
-
-    def __len__(self) -> int:
-        return os.path.getsize(self.parsed_location.path)
-
-    def exists(self) -> bool:
-        return os.path.exists(self.parsed_location.path)
-
-    def to_input_file(self) -> LocalInputFile:
-        return LocalInputFile(location=self.location)
-
-    def create(self, overwrite: bool = False) -> OutputStream:
-        output_file = open(self.parsed_location.path, "wb" if overwrite else 
"xb")
-        if not issubclass(type(output_file), OutputStream):
-            raise TypeError("Object returned from LocalOutputFile.create(...) 
does not match the OutputStream protocol.")
-        return output_file
-
-
-class LocalFileIO(FileIO):
-    """A FileIO implementation for local files (for test use only)"""
-
-    def new_input(self, location: str) -> LocalInputFile:
-        return LocalInputFile(location=location)
-
-    def new_output(self, location: str) -> LocalOutputFile:
-        return LocalOutputFile(location=location)
-
-    def delete(self, location: Union[str, InputFile, OutputFile]) -> None:
-        location = location.location if isinstance(location, (InputFile, 
OutputFile)) else location
-        parsed_location = urlparse(location)
-        try:
-            os.remove(parsed_location.path)
-        except FileNotFoundError as e:
-            raise FileNotFoundError(f"Cannot delete file, does not exist: 
{parsed_location.path}") from e
-
-
[email protected]("CustomFileIO", [LocalFileIO, PyArrowFileIO])
-def test_custom_local_input_file(CustomFileIO: Type[FileIO]) -> None:
+def test_custom_local_input_file() -> None:
     """Test initializing an InputFile implementation to read a local file"""
     with tempfile.TemporaryDirectory() as tmpdirname:
         file_location = os.path.join(tmpdirname, "foo.txt")
@@ -145,7 +42,7 @@ def test_custom_local_input_file(CustomFileIO: Type[FileIO]) 
-> None:
 
         # Instantiate the input file
         absolute_file_location = os.path.abspath(file_location)
-        input_file = 
CustomFileIO().new_input(location=f"{absolute_file_location}")
+        input_file = 
PyArrowFileIO().new_input(location=f"{absolute_file_location}")
 
         # Test opening and reading the file
         f = input_file.open()
@@ -154,15 +51,14 @@ def test_custom_local_input_file(CustomFileIO: 
Type[FileIO]) -> None:
         assert len(input_file) == 3
 
 
[email protected]("CustomFileIO", [LocalFileIO, PyArrowFileIO])
-def test_custom_local_output_file(CustomFileIO: Type[FileIO]) -> None:
+def test_custom_local_output_file() -> None:
     """Test initializing an OutputFile implementation to write to a local 
file"""
     with tempfile.TemporaryDirectory() as tmpdirname:
         file_location = os.path.join(tmpdirname, "foo.txt")
 
         # Instantiate the output file
         absolute_file_location = os.path.abspath(file_location)
-        output_file = 
CustomFileIO().new_output(location=f"{absolute_file_location}")
+        output_file = 
PyArrowFileIO().new_output(location=f"{absolute_file_location}")
 
         # Create the output file and write to it
         f = output_file.create()
@@ -175,8 +71,7 @@ def test_custom_local_output_file(CustomFileIO: 
Type[FileIO]) -> None:
         assert len(output_file) == 3
 
 
[email protected]("CustomFileIO", [LocalFileIO, PyArrowFileIO])
-def test_custom_local_output_file_with_overwrite(CustomFileIO: Type[FileIO]) 
-> None:
+def test_custom_local_output_file_with_overwrite() -> None:
     """Test initializing an OutputFile implementation to overwrite a local 
file"""
     with tempfile.TemporaryDirectory() as tmpdirname:
         output_file_location = os.path.join(tmpdirname, "foo.txt")
@@ -186,7 +81,7 @@ def 
test_custom_local_output_file_with_overwrite(CustomFileIO: Type[FileIO]) ->
             write_file.write(b"foo")
 
         # Instantiate an output file
-        output_file = 
CustomFileIO().new_output(location=f"{output_file_location}")
+        output_file = 
PyArrowFileIO().new_output(location=f"{output_file_location}")
 
         # Confirm that a FileExistsError is raised when overwrite=False
         with pytest.raises(FileExistsError):
@@ -200,8 +95,7 @@ def 
test_custom_local_output_file_with_overwrite(CustomFileIO: Type[FileIO]) ->
             assert f.read() == b"bar"
 
 
[email protected]("CustomFileIO", [LocalFileIO, PyArrowFileIO])
-def test_custom_file_exists(CustomFileIO: Type[FileIO]) -> None:
+def test_custom_file_exists() -> None:
     """Test that the exists property returns the proper value for existing and 
non-existing files"""
     with tempfile.TemporaryDirectory() as tmpdirname:
         file_location = os.path.join(tmpdirname, "foo.txt")
@@ -218,30 +112,29 @@ def test_custom_file_exists(CustomFileIO: Type[FileIO]) 
-> None:
         non_existent_absolute_file_location = 
os.path.abspath(nonexistent_file_location)
 
         # Create InputFile instances
-        input_file = 
CustomFileIO().new_input(location=f"{absolute_file_location}")
-        non_existent_input_file = 
CustomFileIO().new_input(location=f"{non_existent_absolute_file_location}")
+        input_file = 
PyArrowFileIO().new_input(location=f"{absolute_file_location}")
+        non_existent_input_file = 
PyArrowFileIO().new_input(location=f"{non_existent_absolute_file_location}")
 
         # Test opening and reading the file
         assert input_file.exists()
         assert not non_existent_input_file.exists()
 
         # Create OutputFile instances
-        file = CustomFileIO().new_output(location=f"{absolute_file_location}")
-        non_existent_file = 
CustomFileIO().new_output(location=f"{non_existent_absolute_file_location}")
+        file = PyArrowFileIO().new_output(location=f"{absolute_file_location}")
+        non_existent_file = 
PyArrowFileIO().new_output(location=f"{non_existent_absolute_file_location}")
 
         # Test opening and reading the file
         assert file.exists()
         assert not non_existent_file.exists()
 
 
[email protected]("CustomFileIO", [LocalFileIO, PyArrowFileIO])
-def test_output_file_to_input_file(CustomFileIO: Type[FileIO]) -> None:
+def test_output_file_to_input_file() -> None:
     """Test initializing an InputFile using the `to_input_file()` method on an 
OutputFile instance"""
     with tempfile.TemporaryDirectory() as tmpdirname:
         output_file_location = os.path.join(tmpdirname, "foo.txt")
 
         # Create an output file instance
-        output_file = 
CustomFileIO().new_output(location=f"{output_file_location}")
+        output_file = 
PyArrowFileIO().new_output(location=f"{output_file_location}")
 
         # Create the output file and write to it
         with output_file.create() as output_stream:
@@ -254,20 +147,17 @@ def test_output_file_to_input_file(CustomFileIO: 
Type[FileIO]) -> None:
 
 
 @pytest.mark.parametrize(
-    "CustomFileIO,string_uri",
+    "string_uri",
     [
-        (LocalFileIO, "foo/bar.parquet"),
-        (LocalFileIO, "file:///foo/bar.parquet"),
-        (LocalFileIO, "file:/foo/bar/baz.parquet"),
-        (PyArrowFileIO, "foo/bar/baz.parquet"),
-        (PyArrowFileIO, "file:/foo/bar/baz.parquet"),
-        (PyArrowFileIO, "file:/foo/bar/baz.parquet"),
+        "foo/bar/baz.parquet",
+        "file:/foo/bar/baz.parquet",
+        "file:/foo/bar/baz.parquet",
     ],
 )
-def test_custom_file_io_locations(CustomFileIO: Type[FileIO], string_uri: str) 
-> None:
+def test_custom_file_io_locations(string_uri: str) -> None:
     """Test that the location property is maintained as the value of the 
location argument"""
     # Instantiate the file-io and create a new input and output file
-    file_io = CustomFileIO()
+    file_io = PyArrowFileIO()
     input_file = file_io.new_input(location=string_uri)
     assert input_file.location == string_uri
 
@@ -275,32 +165,7 @@ def test_custom_file_io_locations(CustomFileIO: 
Type[FileIO], string_uri: str) -
     assert output_file.location == string_uri
 
 
[email protected](
-    "string_uri_w_netloc",
-    ["file://localhost:80/foo/bar.parquet", "file://foo/bar.parquet"],
-)
-def test_raise_on_network_location_in_input_file(string_uri_w_netloc: str) -> 
None:
-    """Test raising a ValueError when providing a network location to a 
LocalInputFile"""
-    with pytest.raises(ValueError) as exc_info:
-        LocalInputFile(location=string_uri_w_netloc)
-
-    assert ("Network location is not allowed for LocalInputFile") in 
str(exc_info.value)
-
-
[email protected](
-    "string_uri_w_netloc",
-    ["file://localhost:80/foo/bar.parquet", "file://foo/bar.parquet"],
-)
-def test_raise_on_network_location_in_output_file(string_uri_w_netloc: str) -> 
None:
-    """Test raising a ValueError when providing a network location to a 
LocalOutputFile"""
-    with pytest.raises(ValueError) as exc_info:
-        LocalInputFile(location=string_uri_w_netloc)
-
-    assert ("Network location is not allowed for LocalInputFile") in 
str(exc_info.value)
-
-
[email protected]("CustomFileIO", [LocalFileIO, PyArrowFileIO])
-def test_deleting_local_file_using_file_io(CustomFileIO: Type[FileIO]) -> None:
+def test_deleting_local_file_using_file_io() -> None:
     """Test deleting a local file using FileIO.delete(...)"""
     with tempfile.TemporaryDirectory() as tmpdirname:
         # Write to the temporary file
@@ -309,7 +174,7 @@ def test_deleting_local_file_using_file_io(CustomFileIO: 
Type[FileIO]) -> None:
             f.write(b"foo")
 
         # Instantiate the file-io
-        file_io = CustomFileIO()
+        file_io = PyArrowFileIO()
 
         # Confirm that the file initially exists
         assert os.path.exists(output_file_location)
@@ -321,15 +186,14 @@ def test_deleting_local_file_using_file_io(CustomFileIO: 
Type[FileIO]) -> None:
         assert not os.path.exists(output_file_location)
 
 
[email protected]("CustomFileIO", [LocalFileIO, PyArrowFileIO])
-def test_raise_file_not_found_error_for_fileio_delete(CustomFileIO: 
Type[FileIO]) -> None:
+def test_raise_file_not_found_error_for_fileio_delete() -> None:
     """Test raising a FileNotFound error when trying to delete a non-existent 
file"""
     with tempfile.TemporaryDirectory() as tmpdirname:
         # Write to the temporary file
         output_file_location = os.path.join(tmpdirname, "foo.txt")
 
         # Instantiate the file-io
-        file_io = CustomFileIO()
+        file_io = PyArrowFileIO()
 
         # Delete the non-existent file using the file-io implementations 
delete method
         with pytest.raises(FileNotFoundError) as exc_info:
@@ -341,8 +205,7 @@ def 
test_raise_file_not_found_error_for_fileio_delete(CustomFileIO: Type[FileIO]
         assert not os.path.exists(output_file_location)
 
 
[email protected]("CustomFileIO", [LocalFileIO, PyArrowFileIO])
-def test_deleting_local_file_using_file_io_input_file(CustomFileIO: 
Type[FileIO]) -> None:
+def test_deleting_local_file_using_file_io_input_file() -> None:
     """Test deleting a local file by passing an InputFile instance to 
FileIO.delete(...)"""
     with tempfile.TemporaryDirectory() as tmpdirname:
         # Write to the temporary file
@@ -351,13 +214,13 @@ def 
test_deleting_local_file_using_file_io_input_file(CustomFileIO: Type[FileIO]
             f.write(b"foo")
 
         # Instantiate the file-io
-        file_io = CustomFileIO()
+        file_io = PyArrowFileIO()
 
         # Confirm that the file initially exists
         assert os.path.exists(file_location)
 
         # Instantiate the custom InputFile
-        input_file = CustomFileIO().new_input(location=f"{file_location}")
+        input_file = PyArrowFileIO().new_input(location=f"{file_location}")
 
         # Delete the file using the file-io implementations delete method
         file_io.delete(input_file)
@@ -366,8 +229,7 @@ def 
test_deleting_local_file_using_file_io_input_file(CustomFileIO: Type[FileIO]
         assert not os.path.exists(file_location)
 
 
[email protected]("CustomFileIO", [LocalFileIO, PyArrowFileIO])
-def test_deleting_local_file_using_file_io_output_file(CustomFileIO: 
Type[FileIO]) -> None:
+def test_deleting_local_file_using_file_io_output_file() -> None:
     """Test deleting a local file by passing an OutputFile instance to 
FileIO.delete(...)"""
     with tempfile.TemporaryDirectory() as tmpdirname:
         # Write to the temporary file
@@ -376,13 +238,13 @@ def 
test_deleting_local_file_using_file_io_output_file(CustomFileIO: Type[FileIO
             f.write(b"foo")
 
         # Instantiate the file-io
-        file_io = CustomFileIO()
+        file_io = PyArrowFileIO()
 
         # Confirm that the file initially exists
         assert os.path.exists(file_location)
 
         # Instantiate the custom OutputFile
-        output_file = CustomFileIO().new_output(location=f"{file_location}")
+        output_file = PyArrowFileIO().new_output(location=f"{file_location}")
 
         # Delete the file using the file-io implementations delete method
         file_io.delete(output_file)
@@ -426,14 +288,12 @@ def test_load_file_io_location_no_schema() -> None:
     assert isinstance(load_file_io({"location": "/no-schema/"}), PyArrowFileIO)
 
 
[email protected]("pyiceberg.io.SCHEMA_TO_FILE_IO", {"test": 
["tests.io.test_io.LocalFileIO"]})
 def test_mock_warehouse_location_file_io() -> None:
     # For testing the selection logic
     io = load_file_io({"warehouse": "test://some-path/"})
     assert io.properties["warehouse"] == "test://some-path/"
 
 
[email protected]("pyiceberg.io.SCHEMA_TO_FILE_IO", {"test": 
["tests.io.test_io.LocalFileIO"]})
 def test_mock_table_location_file_io() -> None:
     # For testing the selection logic
     io = load_file_io({}, "test://some-path/")
diff --git a/python/tests/table/test_metadata.py 
b/python/tests/table/test_metadata.py
index 9f46ca6737..8c464c0e5a 100644
--- a/python/tests/table/test_metadata.py
+++ b/python/tests/table/test_metadata.py
@@ -14,6 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+# pylint: disable=redefined-outer-name
 
 import io
 import json
@@ -49,30 +50,33 @@ from pyiceberg.types import (
     StructType,
 )
 
-EXAMPLE_TABLE_METADATA_V1 = {
-    "format-version": 1,
-    "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
-    "location": "s3://bucket/test/location",
-    "last-updated-ms": 1602638573874,
-    "last-column-id": 3,
-    "schema": {
-        "type": "struct",
-        "fields": [
-            {"id": 1, "name": "x", "required": True, "type": "long"},
-            {"id": 2, "name": "y", "required": True, "type": "long", "doc": 
"comment"},
-            {"id": 3, "name": "z", "required": True, "type": "long"},
-        ],
-    },
-    "partition-spec": [{"name": "x", "transform": "identity", "source-id": 1, 
"field-id": 1000}],
-    "properties": {},
-    "current-snapshot-id": -1,
-    "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}],
-}
+
[email protected](scope="session")
+def example_table_metadata_v1() -> Dict[str, Any]:
+    return {
+        "format-version": 1,
+        "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
+        "location": "s3://bucket/test/location",
+        "last-updated-ms": 1602638573874,
+        "last-column-id": 3,
+        "schema": {
+            "type": "struct",
+            "fields": [
+                {"id": 1, "name": "x", "required": True, "type": "long"},
+                {"id": 2, "name": "y", "required": True, "type": "long", 
"doc": "comment"},
+                {"id": 3, "name": "z", "required": True, "type": "long"},
+            ],
+        },
+        "partition-spec": [{"name": "x", "transform": "identity", "source-id": 
1, "field-id": 1000}],
+        "properties": {},
+        "current-snapshot-id": -1,
+        "snapshots": [{"snapshot-id": 1925, "timestamp-ms": 1602638573822}],
+    }
 
 
-def test_from_dict_v1() -> None:
+def test_from_dict_v1(example_table_metadata_v1: Dict[str, Any]) -> None:
     """Test initialization of a TableMetadata instance from a dictionary"""
-    TableMetadataUtil.parse_obj(EXAMPLE_TABLE_METADATA_V1)
+    TableMetadataUtil.parse_obj(example_table_metadata_v1)
 
 
 def test_from_dict_v2(example_table_metadata_v2: Dict[str, Any]) -> None:
@@ -110,9 +114,9 @@ def test_v2_metadata_parsing(example_table_metadata_v2: 
Dict[str, Any]) -> None:
     assert table_metadata.default_sort_order_id == 3
 
 
-def test_v1_metadata_parsing_directly() -> None:
+def test_v1_metadata_parsing_directly(example_table_metadata_v1: Dict[str, 
Any]) -> None:
     """Test retrieving values from a TableMetadata instance of version 1"""
-    table_metadata = TableMetadataV1(**EXAMPLE_TABLE_METADATA_V1)
+    table_metadata = TableMetadataV1(**example_table_metadata_v1)
 
     assert isinstance(table_metadata, TableMetadataV1)
 
@@ -174,8 +178,8 @@ def test_updating_metadata(example_table_metadata_v2: 
Dict[str, Any]) -> None:
     assert table_metadata.schemas[-1] == Schema(**new_schema)
 
 
-def test_serialize_v1() -> None:
-    table_metadata = TableMetadataV1(**EXAMPLE_TABLE_METADATA_V1)
+def test_serialize_v1(example_table_metadata_v1: Dict[str, Any]) -> None:
+    table_metadata = TableMetadataV1(**example_table_metadata_v1)
     table_metadata_json = table_metadata.json()
     expected = """{"location": "s3://bucket/test/location", "table-uuid": 
"d20125c8-7284-442c-9aea-15fee620737c", "last-updated-ms": 1602638573874, 
"last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1, 
"name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": 
"long", "required": true, "doc": "comment"}, {"id": 3, "name": "z", "type": 
"long", "required": true}], "schema-id": 0, "identifier-field-ids": []}], 
"current-schema-id": 0, "partition-specs": [...]
     assert table_metadata_json == expected
@@ -187,17 +191,17 @@ def test_serialize_v2(example_table_metadata_v2: 
Dict[str, Any]) -> None:
     assert table_metadata == expected
 
 
-def test_migrate_v1_schemas() -> None:
-    table_metadata = TableMetadataV1(**EXAMPLE_TABLE_METADATA_V1)
+def test_migrate_v1_schemas(example_table_metadata_v1: Dict[str, Any]) -> None:
+    table_metadata = TableMetadataV1(**example_table_metadata_v1)
 
     assert isinstance(table_metadata, TableMetadataV1)
     assert len(table_metadata.schemas) == 1
     assert table_metadata.schemas[0] == table_metadata.schema_
 
 
-def test_migrate_v1_partition_specs() -> None:
+def test_migrate_v1_partition_specs(example_table_metadata_v1: Dict[str, Any]) 
-> None:
     # Copy the example, and add a spec
-    table_metadata = TableMetadataV1(**EXAMPLE_TABLE_METADATA_V1)
+    table_metadata = TableMetadataV1(**example_table_metadata_v1)
     assert isinstance(table_metadata, TableMetadataV1)
     assert len(table_metadata.partition_specs) == 1
     # Spec ID gets added automatically
@@ -390,7 +394,7 @@ def test_invalid_partition_spec() -> None:
     assert "default-spec-id 1 can't be found" in str(exc_info.value)
 
 
-def test_v1_writing_metadata() -> None:
+def test_v1_writing_metadata(example_table_metadata_v1: Dict[str, Any]) -> 
None:
     """
     https://iceberg.apache.org/spec/#version-2
 
@@ -398,14 +402,14 @@ def test_v1_writing_metadata() -> None:
         - Table metadata field last-sequence-number should not be written
     """
 
-    table_metadata = TableMetadataV1(**EXAMPLE_TABLE_METADATA_V1)
+    table_metadata = TableMetadataV1(**example_table_metadata_v1)
     metadata_v1_json = table_metadata.json()
     metadata_v1 = json.loads(metadata_v1_json)
 
     assert "last-sequence-number" not in metadata_v1
 
 
-def test_v1_metadata_for_v2() -> None:
+def test_v1_metadata_for_v2(example_table_metadata_v1: Dict[str, Any]) -> None:
     """
     https://iceberg.apache.org/spec/#version-2
 
@@ -413,7 +417,7 @@ def test_v1_metadata_for_v2() -> None:
         - Table metadata field last-sequence-number must default to 0
     """
 
-    table_metadata = TableMetadataV1(**EXAMPLE_TABLE_METADATA_V1).to_v2()
+    table_metadata = TableMetadataV1(**example_table_metadata_v1).to_v2()
 
     assert table_metadata.last_sequence_number == 0
 
diff --git a/python/tests/utils/test_manifest.py 
b/python/tests/utils/test_manifest.py
index 25ba4730dc..91c111abc1 100644
--- a/python/tests/utils/test_manifest.py
+++ b/python/tests/utils/test_manifest.py
@@ -16,6 +16,7 @@
 # under the License.
 
 from pyiceberg.io import load_file_io
+from pyiceberg.io.pyarrow import PyArrowFileIO
 from pyiceberg.manifest import (
     DataFile,
     DataFileContent,
@@ -30,11 +31,10 @@ from pyiceberg.manifest import (
 )
 from pyiceberg.table import Snapshot
 from pyiceberg.table.snapshots import Operation, Summary
-from tests.io.test_io import LocalInputFile
 
 
 def test_read_manifest_entry(generated_manifest_entry_file: str) -> None:
-    input_file = LocalInputFile(generated_manifest_entry_file)
+    input_file = 
PyArrowFileIO().new_input(location=generated_manifest_entry_file)
     assert list(read_manifest_entry(input_file)) == [
         ManifestEntry(
             status=1,
@@ -268,7 +268,7 @@ def test_read_manifest_entry(generated_manifest_entry_file: 
str) -> None:
 
 
 def test_read_manifest_list(generated_manifest_file_file: str) -> None:
-    input_file = LocalInputFile(generated_manifest_file_file)
+    input_file = PyArrowFileIO().new_input(generated_manifest_file_file)
     actual = list(read_manifest_list(input_file))
     expected = [
         ManifestFile(

[iceberg] branch master updated: Python: Reduce the use of mock objects (#6438)

Reply via email to