(iceberg-python) branch main updated: Treat warning as error in CI/Dev (#973)

sungwy Wed, 31 Jul 2024 17:39:45 -0700

This is an automated email from the ASF dual-hosted git repository.

sungwy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git



The following commit(s) were added to refs/heads/main by this push:
     new dafcf229 Treat warning as error in CI/Dev (#973)
dafcf229 is described below

commit dafcf22992e40f1c13d13a06efca21ee1263b33c
Author: Andre Luis Anastacio <[email protected]>
AuthorDate: Wed Jul 31 21:39:35 2024 -0300

    Treat warning as error in CI/Dev (#973)
    
    * Treat warning as error in CI/Dev
    
    This will help us avoid propagating warnings to our users, as occurred in 
#971.
    
    * fixup! Treat warning as error in CI/Dev
---
 pyproject.toml                               | 23 +++++++++++++++--------
 tests/catalog/test_sql.py                    |  2 +-
 tests/integration/test_deletes.py            |  4 ++++
 tests/integration/test_inspect_table.py      |  6 +++---
 tests/integration/test_writes/test_writes.py | 16 +++++++++-------
 5 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index efa77cfd..9aa29903 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ classifiers = [
   "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
-  "Programming Language :: Python :: 3.11"
+  "Programming Language :: Python :: 3.11",
 ]
 packages = [
   { include = "pyiceberg" },
@@ -37,7 +37,10 @@ packages = [
   { from = "vendor", include = "hive_metastore" },
   { include = "tests", format = "sdist" },
   { include = "Makefile", format = "sdist" },
-  { include = "NOTICE", format = ["sdist", "wheel"] }
+  { include = "NOTICE", format = [
+    "sdist",
+    "wheel",
+  ] },
 ]
 include = [
   { path = "dev", format = "sdist" },
@@ -62,8 +65,8 @@ pyarrow = { version = ">=9.0.0,<18.0.0", optional = true }
 pandas = { version = ">=1.0.0,<3.0.0", optional = true }
 duckdb = { version = ">=0.5.0,<2.0.0", optional = true }
 ray = [
-  { version = "==2.10.0", python = "<3.9", optional = true},
-  { version = ">=2.10.0,<3.0.0", python = ">=3.9", optional = true}
+  { version = "==2.10.0", python = "<3.9", optional = true },
+  { version = ">=2.10.0,<3.0.0", python = ">=3.9", optional = true },
 ]
 python-snappy = { version = ">=0.6.0,<1.0.0", optional = true }
 thrift = { version = ">=0.13.0,<1.0.0", optional = true }
@@ -599,13 +602,17 @@ markers = [
   "s3: marks a test as requiring access to s3 compliant storage (use with 
--aws-access-key-id, --aws-secret-access-key, and --endpoint args)",
   "adlfs: marks a test as requiring access to adlfs compliant storage (use 
with --adlfs.account-name, --adlfs.account-key, and --adlfs.endpoint args)",
   "integration: marks integration tests against Apache Spark",
-  "gcs: marks a test as requiring access to gcs compliant storage (use with 
--gs.token, --gs.project, and --gs.endpoint)"
+  "gcs: marks a test as requiring access to gcs compliant storage (use with 
--gs.token, --gs.project, and --gs.endpoint)",
 ]
 
 # Turns a warning into an error
-#filterwarnings = [
-#    "error"
-#]
+filterwarnings = [
+  "error",
+  "ignore:A plugin raised an exception during an old-style hookwrapper 
teardown.",
+  "ignore:unclosed <socket.socket",
+  # Remove this in a future release of PySpark.
+  "ignore:distutils Version classes are deprecated. Use packaging.version 
instead.",
+]
 
 [tool.black]
 line-length = 130
diff --git a/tests/catalog/test_sql.py b/tests/catalog/test_sql.py
index 2bee387a..08c318e0 100644
--- a/tests/catalog/test_sql.py
+++ b/tests/catalog/test_sql.py
@@ -355,7 +355,7 @@ def test_write_pyarrow_schema(catalog: SqlCatalog, 
table_identifier: Identifier)
     namespace = Catalog.namespace_from(table_identifier_nocatalog)
     catalog.create_namespace(namespace)
     table = catalog.create_table(table_identifier, pyarrow_table.schema)
-    table.overwrite(pyarrow_table)
+    table.append(pyarrow_table)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/integration/test_deletes.py 
b/tests/integration/test_deletes.py
index 4bddf09b..cf6a1a25 100644
--- a/tests/integration/test_deletes.py
+++ b/tests/integration/test_deletes.py
@@ -145,6 +145,7 @@ def test_rewrite_partitioned_table_with_null(spark: 
SparkSession, session_catalo
 
 @pytest.mark.integration
 @pytest.mark.parametrize("format_version", [1, 2])
[email protected]("ignore:Delete operation did not match any 
records")
 def test_partitioned_table_no_match(spark: SparkSession, session_catalog: 
RestCatalog, format_version: int) -> None:
     identifier = "default.table_partitioned_delete"
 
@@ -175,6 +176,7 @@ def test_partitioned_table_no_match(spark: SparkSession, 
session_catalog: RestCa
 
 
 @pytest.mark.integration
[email protected]("ignore:Merge on read is not yet supported, 
falling back to copy-on-write")
 def test_delete_partitioned_table_positional_deletes(spark: SparkSession, 
session_catalog: RestCatalog) -> None:
     identifier = "default.table_partitioned_delete"
 
@@ -223,6 +225,7 @@ def test_delete_partitioned_table_positional_deletes(spark: 
SparkSession, sessio
 
 
 @pytest.mark.integration
[email protected]("ignore:Merge on read is not yet supported, 
falling back to copy-on-write")
 def test_overwrite_partitioned_table(spark: SparkSession, session_catalog: 
RestCatalog) -> None:
     identifier = "default.table_partitioned_delete"
 
@@ -274,6 +277,7 @@ def test_overwrite_partitioned_table(spark: SparkSession, 
session_catalog: RestC
 
 
 @pytest.mark.integration
[email protected]("ignore:Merge on read is not yet supported, 
falling back to copy-on-write")
 def test_partitioned_table_positional_deletes_sequence_number(spark: 
SparkSession, session_catalog: RestCatalog) -> None:
     identifier = "default.table_partitioned_delete_sequence_number"
 
diff --git a/tests/integration/test_inspect_table.py 
b/tests/integration/test_inspect_table.py
index 9415d714..9f632258 100644
--- a/tests/integration/test_inspect_table.py
+++ b/tests/integration/test_inspect_table.py
@@ -79,7 +79,7 @@ def test_inspect_snapshots(
     identifier = "default.table_metadata_snapshots"
     tbl = _create_table(session_catalog, identifier, 
properties={"format-version": format_version})
 
-    tbl.overwrite(arrow_table_with_null)
+    tbl.append(arrow_table_with_null)
     # should produce a DELETE entry
     tbl.overwrite(arrow_table_with_null)
     # Since we don't rewrite, this should produce a new manifest with an ADDED 
entry
@@ -295,7 +295,7 @@ def test_inspect_refs(
     tbl = _create_table(session_catalog, identifier, 
properties={"format-version": format_version})
 
     # write data to create snapshot
-    tbl.overwrite(arrow_table_with_null)
+    tbl.append(arrow_table_with_null)
 
     # create a test branch
     spark.sql(
@@ -667,7 +667,7 @@ def test_inspect_files(
 
     tbl = _create_table(session_catalog, identifier, 
properties={"format-version": format_version})
 
-    tbl.overwrite(arrow_table_with_null)
+    tbl.append(arrow_table_with_null)
 
     # append more data
     tbl.append(arrow_table_with_null)
diff --git a/tests/integration/test_writes/test_writes.py 
b/tests/integration/test_writes/test_writes.py
index 8ea51f4b..3aaafa85 100644
--- a/tests/integration/test_writes/test_writes.py
+++ b/tests/integration/test_writes/test_writes.py
@@ -256,7 +256,7 @@ def test_data_files(spark: SparkSession, session_catalog: 
Catalog, arrow_table_w
     identifier = "default.arrow_data_files"
     tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, 
[])
 
-    tbl.overwrite(arrow_table_with_null)
+    tbl.append(arrow_table_with_null)
     # should produce a DELETE entry
     tbl.overwrite(arrow_table_with_null)
     # Since we don't rewrite, this should produce a new manifest with an ADDED 
entry
@@ -288,7 +288,7 @@ def test_python_writes_with_spark_snapshot_reads(
             .snapshot_id
         )
 
-    tbl.overwrite(arrow_table_with_null)
+    tbl.append(arrow_table_with_null)
     assert tbl.current_snapshot().snapshot_id == 
get_current_snapshot_id(identifier)  # type: ignore
     tbl.overwrite(arrow_table_with_null)
     assert tbl.current_snapshot().snapshot_id == 
get_current_snapshot_id(identifier)  # type: ignore
@@ -330,7 +330,7 @@ def 
test_python_writes_special_character_column_with_spark_reads(
     arrow_table_with_special_character_column = 
pa.Table.from_pydict(TEST_DATA_WITH_SPECIAL_CHARACTER_COLUMN, schema=pa_schema)
     tbl = _create_table(session_catalog, identifier, {"format-version": 
format_version}, schema=pa_schema)
 
-    tbl.overwrite(arrow_table_with_special_character_column)
+    tbl.append(arrow_table_with_special_character_column)
     spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
     pyiceberg_df = tbl.scan().to_pandas()
     assert spark_df.equals(pyiceberg_df)
@@ -354,7 +354,7 @@ def 
test_python_writes_dictionary_encoded_column_with_spark_reads(
 
     tbl = _create_table(session_catalog, identifier, {"format-version": 
format_version}, schema=pa_schema)
 
-    tbl.overwrite(arrow_table)
+    tbl.append(arrow_table)
     spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
     pyiceberg_df = tbl.scan().to_pandas()
     assert spark_df.equals(pyiceberg_df)
@@ -393,7 +393,7 @@ def 
test_python_writes_with_small_and_large_types_spark_reads(
     arrow_table = pa.Table.from_pydict(TEST_DATA, schema=pa_schema)
     tbl = _create_table(session_catalog, identifier, {"format-version": 
format_version}, schema=pa_schema)
 
-    tbl.overwrite(arrow_table)
+    tbl.append(arrow_table)
     spark_df = spark.sql(f"SELECT * FROM {identifier}").toPandas()
     pyiceberg_df = tbl.scan().to_pandas()
     assert spark_df.equals(pyiceberg_df)
@@ -429,7 +429,7 @@ def test_write_bin_pack_data_files(spark: SparkSession, 
session_catalog: Catalog
 
     # writes 1 data file since the table is smaller than default target file 
size
     assert arrow_table_with_null.nbytes < 
TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT
-    tbl.overwrite(arrow_table_with_null)
+    tbl.append(arrow_table_with_null)
     assert get_data_files_count(identifier) == 1
 
     # writes 1 data file as long as table is smaller than default target file 
size
@@ -820,7 +820,7 @@ def test_inspect_snapshots(
     identifier = "default.table_metadata_snapshots"
     tbl = _create_table(session_catalog, identifier, 
properties={"format-version": format_version})
 
-    tbl.overwrite(arrow_table_with_null)
+    tbl.append(arrow_table_with_null)
     # should produce a DELETE entry
     tbl.overwrite(arrow_table_with_null)
     # Since we don't rewrite, this should produce a new manifest with an ADDED 
entry
@@ -979,6 +979,7 @@ def test_table_write_subset_of_schema(session_catalog: 
Catalog, arrow_table_with
 
 @pytest.mark.integration
 @pytest.mark.parametrize("format_version", [1, 2])
[email protected]("ignore:Delete operation did not match any 
records")
 def test_table_write_out_of_order_schema(session_catalog: Catalog, 
arrow_table_with_null: pa.Table, format_version: int) -> None:
     identifier = "default.test_table_write_out_of_order_schema"
     # rotate the schema fields by 1
@@ -989,6 +990,7 @@ def test_table_write_out_of_order_schema(session_catalog: 
Catalog, arrow_table_w
     tbl = _create_table(session_catalog, identifier, {"format-version": 
format_version}, schema=rotated_schema)
 
     tbl.overwrite(arrow_table_with_null)
+
     tbl.append(arrow_table_with_null)
     # overwrite and then append should produce twice the data
     assert len(tbl.scan().to_arrow()) == len(arrow_table_with_null) * 2

(iceberg-python) branch main updated: Treat warning as error in CI/Dev (#973)

Reply via email to