This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 1742a21 Python: Minor fixes for tests (#1214)
1742a21 is described below
commit 1742a2174b4ecbcfb8d41728fb77d3d26d0f4816
Author: Ryan Murray <[email protected]>
AuthorDate: Tue Jul 21 17:12:43 2020 +0100
Python: Minor fixes for tests (#1214)
In the course of implementing create table I came across minor issues:
* rename test fixtures to suppress warnings and prevent pytest from
treating them as tests
* test partition code path & fix incorrect signature when creating metadata
* handle `file:` as well as `file://` as it seems both schemas are used
* add mypy annotations in areas where I fixed code
* fix tox/mypy
---
python/iceberg/api/partition_spec.py | 4 ++--
python/iceberg/api/types/type_util.py | 5 +++--
python/iceberg/core/filesystem/local_filesystem.py | 7 +++----
python/iceberg/core/table_metadata.py | 9 ++++++---
python/tests/api/expressions/conftest.py | 10 +++++-----
python/tests/api/test_helpers.py | 2 +-
python/tests/core/conftest.py | 15 +++++++++------
python/tests/hive/test_hive_tables.py | 14 +++++++-------
8 files changed, 36 insertions(+), 30 deletions(-)
diff --git a/python/iceberg/api/partition_spec.py
b/python/iceberg/api/partition_spec.py
index 1451fbf..395001b 100644
--- a/python/iceberg/api/partition_spec.py
+++ b/python/iceberg/api/partition_spec.py
@@ -183,7 +183,7 @@ class PartitionSpec(object):
return "".join(sb)
@staticmethod
- def builder_for(schema):
+ def builder_for(schema: Schema) -> "PartitionSpecBuilder":
return PartitionSpecBuilder(schema)
@staticmethod
@@ -301,7 +301,7 @@ class PartitionSpecBuilder(object):
def add_without_field_id(self, source_id, name, transform):
return self.add(source_id, self.__next_field_id(), name, transform)
- def add(self, source_id, field_id, name, transform):
+ def add(self, source_id: int, field_id: int, name: str, transform: str) ->
"PartitionSpecBuilder":
self.check_and_add_partition_name(name)
column = self.schema.find_field(source_id)
if column is None:
diff --git a/python/iceberg/api/types/type_util.py
b/python/iceberg/api/types/type_util.py
index 68f0f59..f7f8b13 100644
--- a/python/iceberg/api/types/type_util.py
+++ b/python/iceberg/api/types/type_util.py
@@ -505,14 +505,14 @@ class CheckCompatibility(CustomOrderSchemaVisitor):
if curr_field is None:
if not field.is_optional:
- errors.add("{} is required, but is missing".format(field.name))
+ errors.append("{} is required, but is
missing".format(field.name))
return self.NO_ERRORS
self.current_type = curr_field.type
try:
if not field.is_optional and curr_field.is_optional:
- errors.add(field.name + " should be required, but is optional")
+ errors.append(field.name + " should be required, but is
optional")
for error in field_result:
if error.startswith(":"):
@@ -525,6 +525,7 @@ class CheckCompatibility(CustomOrderSchemaVisitor):
pass
finally:
self.current_type = struct
+ return errors
def list(self, list_var, element_result):
raise NotImplementedError()
diff --git a/python/iceberg/core/filesystem/local_filesystem.py
b/python/iceberg/core/filesystem/local_filesystem.py
index 826700a..2b74309 100644
--- a/python/iceberg/core/filesystem/local_filesystem.py
+++ b/python/iceberg/core/filesystem/local_filesystem.py
@@ -18,6 +18,7 @@
import errno
import os
from pathlib import Path
+from urllib.parse import urlparse
from .file_status import FileStatus
from .file_system import FileSystem
@@ -58,10 +59,8 @@ class LocalFileSystem(FileSystem):
permission=st.st_mode, owner=st.st_uid,
group=st.st_gid)
@staticmethod
- def fix_path(path):
- if path.startswith("file://"):
- path = str(path[7:])
- return path
+ def fix_path(path: str) -> str:
+ return urlparse(path).path
def create(self, path, overwrite=False):
if os.path.exists(path) and not overwrite:
diff --git a/python/iceberg/core/table_metadata.py
b/python/iceberg/core/table_metadata.py
index b289e44..8f921e6 100644
--- a/python/iceberg/core/table_metadata.py
+++ b/python/iceberg/core/table_metadata.py
@@ -17,8 +17,9 @@
import time
-from iceberg.api import PartitionSpec
+from iceberg.api import PartitionSpec, Schema
from iceberg.api.types import assign_fresh_ids
+from iceberg.core.table_operations import TableOperations
from iceberg.core.util import AtomicInteger
from iceberg.exceptions import ValidationException
@@ -28,14 +29,16 @@ class TableMetadata(object):
TABLE_FORMAT_VERSION = 1
@staticmethod
- def new_table_metadata(ops, schema, spec, location, properties=None):
+ def new_table_metadata(ops: TableOperations, schema: Schema, spec:
PartitionSpec, location: str,
+ properties: dict = None) -> "TableMetadata":
last_column_id = AtomicInteger(0)
fresh_schema = assign_fresh_ids(schema,
last_column_id.increment_and_get)
spec_builder = PartitionSpec.builder_for(fresh_schema)
for field in spec.fields:
src_name = schema.find_column_name(field.source_id)
- spec_builder.add(fresh_schema.find_field(src_name).field_id,
+ spec_builder.add(field.source_id,
+ fresh_schema.find_field(src_name).field_id,
field.name,
str(field.transform))
diff --git a/python/tests/api/expressions/conftest.py
b/python/tests/api/expressions/conftest.py
index 3ee4b4c..28cc43d 100644
--- a/python/tests/api/expressions/conftest.py
+++ b/python/tests/api/expressions/conftest.py
@@ -93,7 +93,7 @@ class TestHelpers(object):
raise AssertionError("Predicate should be a BoundPredicate")
-class TestDataFile(DataFile):
+class MockDataFile(DataFile):
def __init__(self, path, partition, record_count, value_counts=None,
null_value_counts=None,
lower_bounds=None, upper_bounds=None):
@@ -202,7 +202,7 @@ def strict_schema():
@pytest.fixture(scope="session")
def file():
- return TestDataFile("file.avro", TestHelpers.Row.of(), 50,
+ return MockDataFile("file.avro", TestHelpers.Row.of(), 50,
# value counts
{4: 50, 5: 50, 6: 50},
# null value counts
@@ -215,7 +215,7 @@ def file():
@pytest.fixture(scope="session")
def strict_file():
- return TestDataFile("file.avro",
+ return MockDataFile("file.avro",
TestHelpers.Row.of(),
50,
{4: 50, 5: 50, 6: 50},
@@ -229,12 +229,12 @@ def strict_file():
@pytest.fixture(scope="session")
def missing_stats():
- return TestDataFile("file.parquet", TestHelpers.Row.of(), 50)
+ return MockDataFile("file.parquet", TestHelpers.Row.of(), 50)
@pytest.fixture(scope="session")
def empty():
- return TestDataFile("file.parquet", TestHelpers.Row.of(), record_count=0)
+ return MockDataFile("file.parquet", TestHelpers.Row.of(), record_count=0)
@pytest.fixture(scope="session")
diff --git a/python/tests/api/test_helpers.py b/python/tests/api/test_helpers.py
index 7d51e5f..2f75690 100644
--- a/python/tests/api/test_helpers.py
+++ b/python/tests/api/test_helpers.py
@@ -74,7 +74,7 @@ class TestHelpers(object):
return None
-class TestDataFile(DataFile):
+class MockDataFile(DataFile):
def __init__(self, path, partition, record_count, value_counts=None,
null_value_counts=None,
lower_bounds=None, upper_bounds=None):
diff --git a/python/tests/core/conftest.py b/python/tests/core/conftest.py
index e446c63..8d414ac 100644
--- a/python/tests/core/conftest.py
+++ b/python/tests/core/conftest.py
@@ -20,7 +20,7 @@ import random
import tempfile
import time
-from iceberg.api import Files, PartitionSpec, Schema
+from iceberg.api import Files, PartitionSpec, PartitionSpecBuilder, Schema
from iceberg.api.types import BooleanType, IntegerType, LongType, NestedField,
StringType
from iceberg.core import (BaseSnapshot,
BaseTable,
@@ -99,7 +99,7 @@ class TestTableOperations(TableOperations):
self._current = None
self.refresh()
if self._current is not None:
- for snap in self.current.snapshots:
+ for snap in self.current().snapshots:
self.last_snapshot_id = max(self.last_snapshot_id,
snap.snapshot_id)
def current(self):
@@ -284,11 +284,14 @@ def base_scan_schema():
NestedField.required(2, "data", StringType.get())])
[email protected](scope="session")
-def ts_table(base_scan_schema):
[email protected](scope="session", params=["none", "one"])
+def ts_table(base_scan_schema, request):
with tempfile.TemporaryDirectory() as td:
- spec = PartitionSpec.unpartitioned()
- return TestTables.create(td, "test", base_scan_schema, spec)
+ if request.param == "none":
+ spec = PartitionSpec.unpartitioned()
+ else:
+ spec = PartitionSpecBuilder(base_scan_schema).add(1, 1000, "id",
"identity").build()
+ return TestTables.create(td, "test-" + request.param,
base_scan_schema, spec)
@pytest.fixture(scope="session")
diff --git a/python/tests/hive/test_hive_tables.py
b/python/tests/hive/test_hive_tables.py
index cba4337..f979d09 100644
--- a/python/tests/hive/test_hive_tables.py
+++ b/python/tests/hive/test_hive_tables.py
@@ -22,7 +22,7 @@ import mock
from pytest import raises
-class TestHMSTable(object):
+class MockHMSTable(object):
def __init__(self, params):
self.parameters = params
@@ -35,7 +35,7 @@ def test_load_tables_check_valid_props(client, current_call,
refresh_call):
"partition_spec": [],
"metadata_location": "s3://path/to/iceberg.metadata.json"}
- client.return_value.__enter__.return_value.get_table.return_value =
TestHMSTable(parameters)
+ client.return_value.__enter__.return_value.get_table.return_value =
MockHMSTable(parameters)
conf = {"hive.metastore.uris": 'thrift://hms:port'}
tables = HiveTables(conf)
@@ -49,7 +49,7 @@ def test_load_tables_check_missing_iceberg_type(client,
current_call, refresh_ca
parameters = {"partition_spec": [],
"metadata_location": "s3://path/to/iceberg.metdata.json"}
- client.return_value.__enter__.return_value.get_table.return_value =
TestHMSTable(parameters)
+ client.return_value.__enter__.return_value.get_table.return_value =
MockHMSTable(parameters)
conf = {"hive.metastore.uris": 'thrift://hms:port'}
tables = HiveTables(conf)
@@ -65,7 +65,7 @@ def test_load_tables_check_non_iceberg_type(client,
current_call, refresh_call):
"partition_spec": [],
"metadata_location": "s3://path/to/iceberg.metdata.json"}
- client.return_value.__enter__.return_value.get_table.return_value =
TestHMSTable(parameters)
+ client.return_value.__enter__.return_value.get_table.return_value =
MockHMSTable(parameters)
conf = {"hive.metastore.uris": 'thrift://hms:port'}
tables = HiveTables(conf)
@@ -81,7 +81,7 @@ def test_load_tables_check_none_table_type(client,
current_call, refresh_call):
"partition_spec": [],
"metadata_location": "s3://path/to/iceberg.metdata.json"}
- client.return_value.__enter__.return_value.get_table.return_value =
TestHMSTable(parameters)
+ client.return_value.__enter__.return_value.get_table.return_value =
MockHMSTable(parameters)
conf = {"hive.metastore.uris": 'thrift://hms:port'}
tables = HiveTables(conf)
@@ -96,7 +96,7 @@ def test_load_tables_check_no_location(client, current_call,
refresh_call):
parameters = {"table_type": "ICEBERG",
"partition_spec": []}
- client.return_value.__enter__.return_value.get_table.return_value =
TestHMSTable(parameters)
+ client.return_value.__enter__.return_value.get_table.return_value =
MockHMSTable(parameters)
conf = {"hive.metastore.uris": 'thrift://hms:port'}
tables = HiveTables(conf)
@@ -112,7 +112,7 @@ def test_load_tables_check_none_location(client,
current_call, refresh_call):
"partition_spec": [],
"metadata_location": None}
- client.return_value.__enter__.return_value.get_table.return_value =
TestHMSTable(parameters)
+ client.return_value.__enter__.return_value.get_table.return_value =
MockHMSTable(parameters)
conf = {"hive.metastore.uris": 'thrift://hms:port'}
tables = HiveTables(conf)