(iceberg-python) branch main updated: Test: Add test to partition on field with a dot (#610)

honahx Mon, 06 May 2024 05:16:40 -0700

This is an automated email from the ASF dual-hosted git repository.

honahx pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git



The following commit(s) were added to refs/heads/main by this push:
     new 74caa17e Test: Add test to partition on field with a dot (#610)
74caa17e is described below

commit 74caa17e215f7f60ed924c91362f56d9a97e8642
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon May 6 14:16:30 2024 +0200

    Test: Add test to partition on field with a dot (#610)
---
 tests/integration/test_writes/test_writes.py | 26 ++++++++++++++++++++++++++
 tests/integration/test_writes/utils.py       | 17 ++++++-----------
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/tests/integration/test_writes/test_writes.py 
b/tests/integration/test_writes/test_writes.py
index 2cf2c9ef..8bebc53d 100644
--- a/tests/integration/test_writes/test_writes.py
+++ b/tests/integration/test_writes/test_writes.py
@@ -36,7 +36,11 @@ from pyiceberg.catalog import Catalog
 from pyiceberg.catalog.hive import HiveCatalog
 from pyiceberg.catalog.sql import SqlCatalog
 from pyiceberg.exceptions import NoSuchTableError
+from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.schema import Schema
 from pyiceberg.table import TableProperties, _dataframe_to_data_files
+from pyiceberg.transforms import IdentityTransform
+from pyiceberg.types import IntegerType, NestedField
 from tests.conftest import TEST_DATA_WITH_NULL
 from utils import _create_table
 
@@ -807,3 +811,25 @@ def test_hive_catalog_storage_descriptor(
     assert len(tbl.scan().to_arrow()) == 3
     # check if spark can read the table
     assert spark.sql("SELECT * FROM 
hive.default.test_storage_descriptor").count() == 3
+
+
[email protected]
[email protected]('catalog', 
[pytest.lazy_fixture('session_catalog_hive'), 
pytest.lazy_fixture('session_catalog')])
+def test_sanitize_character_partitioned(catalog: Catalog) -> None:
+    table_name = "default.test_table_partitioned_sanitized_character"
+    try:
+        catalog.drop_table(table_name)
+    except NoSuchTableError:
+        pass
+
+    tbl = _create_table(
+        session_catalog=catalog,
+        identifier=table_name,
+        schema=Schema(NestedField(field_id=1, name="some.id", 
type=IntegerType(), required=True)),
+        partition_spec=PartitionSpec(
+            PartitionField(source_id=1, field_id=1000, 
name="some.id_identity", transform=IdentityTransform())
+        ),
+        data=[pa.Table.from_arrays([range(22)], 
schema=pa.schema([pa.field("some.id", pa.int32(), nullable=False)]))],
+    )
+
+    assert len(tbl.scan().to_arrow()) == 22
diff --git a/tests/integration/test_writes/utils.py 
b/tests/integration/test_writes/utils.py
index 742b1e14..9f1f6df0 100644
--- a/tests/integration/test_writes/utils.py
+++ b/tests/integration/test_writes/utils.py
@@ -21,10 +21,10 @@ import pyarrow as pa
 
 from pyiceberg.catalog import Catalog
 from pyiceberg.exceptions import NoSuchTableError
-from pyiceberg.partitioning import PartitionSpec
+from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
 from pyiceberg.schema import Schema
 from pyiceberg.table import Table
-from pyiceberg.typedef import Properties
+from pyiceberg.typedef import EMPTY_DICT, Properties
 from pyiceberg.types import (
     BinaryType,
     BooleanType,
@@ -62,9 +62,9 @@ TABLE_SCHEMA = Schema(
 def _create_table(
     session_catalog: Catalog,
     identifier: str,
-    properties: Properties,
+    properties: Properties = EMPTY_DICT,
     data: Optional[List[pa.Table]] = None,
-    partition_spec: Optional[PartitionSpec] = None,
+    partition_spec: PartitionSpec = UNPARTITIONED_PARTITION_SPEC,
     schema: Union[Schema, "pa.Schema"] = TABLE_SCHEMA,
 ) -> Table:
     try:
@@ -72,14 +72,9 @@ def _create_table(
     except NoSuchTableError:
         pass
 
-    if partition_spec:
-        tbl = session_catalog.create_table(
-            identifier=identifier, schema=schema, properties=properties, 
partition_spec=partition_spec
-        )
-    else:
-        tbl = session_catalog.create_table(identifier=identifier, 
schema=schema, properties=properties)
+    tbl = session_catalog.create_table(identifier=identifier, schema=schema, 
properties=properties, partition_spec=partition_spec)
 
-    if data:
+    if data is not None:
         for d in data:
             tbl.append(d)

(iceberg-python) branch main updated: Test: Add test to partition on field with a dot (#610)

Reply via email to