This is an automated email from the ASF dual-hosted git repository.
honahx pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 74caa17e Test: Add test to partition on field with a dot (#610)
74caa17e is described below
commit 74caa17e215f7f60ed924c91362f56d9a97e8642
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon May 6 14:16:30 2024 +0200
Test: Add test to partition on field with a dot (#610)
---
tests/integration/test_writes/test_writes.py | 26 ++++++++++++++++++++++++++
tests/integration/test_writes/utils.py | 17 ++++++-----------
2 files changed, 32 insertions(+), 11 deletions(-)
diff --git a/tests/integration/test_writes/test_writes.py
b/tests/integration/test_writes/test_writes.py
index 2cf2c9ef..8bebc53d 100644
--- a/tests/integration/test_writes/test_writes.py
+++ b/tests/integration/test_writes/test_writes.py
@@ -36,7 +36,11 @@ from pyiceberg.catalog import Catalog
from pyiceberg.catalog.hive import HiveCatalog
from pyiceberg.catalog.sql import SqlCatalog
from pyiceberg.exceptions import NoSuchTableError
+from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.schema import Schema
from pyiceberg.table import TableProperties, _dataframe_to_data_files
+from pyiceberg.transforms import IdentityTransform
+from pyiceberg.types import IntegerType, NestedField
from tests.conftest import TEST_DATA_WITH_NULL
from utils import _create_table
@@ -807,3 +811,25 @@ def test_hive_catalog_storage_descriptor(
assert len(tbl.scan().to_arrow()) == 3
# check if spark can read the table
assert spark.sql("SELECT * FROM
hive.default.test_storage_descriptor").count() == 3
+
+
[email protected]
[email protected]('catalog',
[pytest.lazy_fixture('session_catalog_hive'),
pytest.lazy_fixture('session_catalog')])
+def test_sanitize_character_partitioned(catalog: Catalog) -> None:
+ table_name = "default.test_table_partitioned_sanitized_character"
+ try:
+ catalog.drop_table(table_name)
+ except NoSuchTableError:
+ pass
+
+ tbl = _create_table(
+ session_catalog=catalog,
+ identifier=table_name,
+ schema=Schema(NestedField(field_id=1, name="some.id",
type=IntegerType(), required=True)),
+ partition_spec=PartitionSpec(
+ PartitionField(source_id=1, field_id=1000,
name="some.id_identity", transform=IdentityTransform())
+ ),
+ data=[pa.Table.from_arrays([range(22)],
schema=pa.schema([pa.field("some.id", pa.int32(), nullable=False)]))],
+ )
+
+ assert len(tbl.scan().to_arrow()) == 22
diff --git a/tests/integration/test_writes/utils.py
b/tests/integration/test_writes/utils.py
index 742b1e14..9f1f6df0 100644
--- a/tests/integration/test_writes/utils.py
+++ b/tests/integration/test_writes/utils.py
@@ -21,10 +21,10 @@ import pyarrow as pa
from pyiceberg.catalog import Catalog
from pyiceberg.exceptions import NoSuchTableError
-from pyiceberg.partitioning import PartitionSpec
+from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
from pyiceberg.schema import Schema
from pyiceberg.table import Table
-from pyiceberg.typedef import Properties
+from pyiceberg.typedef import EMPTY_DICT, Properties
from pyiceberg.types import (
BinaryType,
BooleanType,
@@ -62,9 +62,9 @@ TABLE_SCHEMA = Schema(
def _create_table(
session_catalog: Catalog,
identifier: str,
- properties: Properties,
+ properties: Properties = EMPTY_DICT,
data: Optional[List[pa.Table]] = None,
- partition_spec: Optional[PartitionSpec] = None,
+ partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
schema: Union[Schema, "pa.Schema"] = TABLE_SCHEMA,
) -> Table:
try:
@@ -72,14 +72,9 @@ def _create_table(
except NoSuchTableError:
pass
- if partition_spec:
- tbl = session_catalog.create_table(
- identifier=identifier, schema=schema, properties=properties,
partition_spec=partition_spec
- )
- else:
- tbl = session_catalog.create_table(identifier=identifier,
schema=schema, properties=properties)
+ tbl = session_catalog.create_table(identifier=identifier, schema=schema,
properties=properties, partition_spec=partition_spec)
- if data:
+ if data is not None:
for d in data:
tbl.append(d)