This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 6c0d3070 Use 'strtobool' instead of comparing with a string. (#988)
6c0d3070 is described below

commit 6c0d307032608967ccd00cfe72d8815e6e7e01cc
Author: Andre Luis Anastacio <[email protected]>
AuthorDate: Fri Aug 2 01:43:31 2024 -0300

    Use 'strtobool' instead of comparing with a string. (#988)
    
    * Use 'strtobool' instead of comparing with a string.
    
    * Move the PropertyUtil methods to the properties module as functions
    
    * fixup! Use 'strtobool' instead of comparing with a string.
    
    * fixup! Use 'strtobool' instead of comparing with a string.
---
 pyiceberg/catalog/dynamodb.py      | 13 +++--
 pyiceberg/catalog/glue.py          | 16 +++----
 pyiceberg/catalog/hive.py          | 14 ++----
 pyiceberg/catalog/rest.py          |  3 +-
 pyiceberg/conversions.py           |  3 +-
 pyiceberg/expressions/parser.py    |  3 +-
 pyiceberg/io/fsspec.py             | 15 +++---
 pyiceberg/io/pyarrow.py            | 29 ++++++-----
 pyiceberg/table/__init__.py        | 50 ++++---------------
 pyiceberg/utils/properties.py      | 76 +++++++++++++++++++++++++++++
 tests/expressions/test_literals.py | 20 +++++---
 tests/utils/test_properties.py     | 98 ++++++++++++++++++++++++++++++++++++++
 12 files changed, 242 insertions(+), 98 deletions(-)

diff --git a/pyiceberg/catalog/dynamodb.py b/pyiceberg/catalog/dynamodb.py
index 7cb5d985..40d873cd 100644
--- a/pyiceberg/catalog/dynamodb.py
+++ b/pyiceberg/catalog/dynamodb.py
@@ -61,6 +61,7 @@ from pyiceberg.table import CommitTableRequest, 
CommitTableResponse, Table
 from pyiceberg.table.metadata import new_table_metadata
 from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder
 from pyiceberg.typedef import EMPTY_DICT, Identifier, Properties
+from pyiceberg.utils.properties import get_first_property_value
 
 if TYPE_CHECKING:
     import pyarrow as pa
@@ -95,19 +96,17 @@ class DynamoDbCatalog(MetastoreCatalog):
     def __init__(self, name: str, **properties: str):
         super().__init__(name, **properties)
 
-        from pyiceberg.table import PropertyUtil
-
         session = boto3.Session(
-            profile_name=PropertyUtil.get_first_property_value(properties, 
DYNAMODB_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
-            region_name=PropertyUtil.get_first_property_value(properties, 
DYNAMODB_REGION, AWS_REGION, DEPRECATED_REGION),
+            profile_name=get_first_property_value(properties, 
DYNAMODB_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
+            region_name=get_first_property_value(properties, DYNAMODB_REGION, 
AWS_REGION, DEPRECATED_REGION),
             botocore_session=properties.get(DEPRECATED_BOTOCORE_SESSION),
-            aws_access_key_id=PropertyUtil.get_first_property_value(
+            aws_access_key_id=get_first_property_value(
                 properties, DYNAMODB_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID, 
DEPRECATED_ACCESS_KEY_ID
             ),
-            aws_secret_access_key=PropertyUtil.get_first_property_value(
+            aws_secret_access_key=get_first_property_value(
                 properties, DYNAMODB_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, 
DEPRECATED_SECRET_ACCESS_KEY
             ),
-            aws_session_token=PropertyUtil.get_first_property_value(
+            aws_session_token=get_first_property_value(
                 properties, DYNAMODB_SESSION_TOKEN, AWS_SESSION_TOKEN, 
DEPRECATED_SESSION_TOKEN
             ),
         )
diff --git a/pyiceberg/catalog/glue.py b/pyiceberg/catalog/glue.py
index fa974a6f..f9d84834 100644
--- a/pyiceberg/catalog/glue.py
+++ b/pyiceberg/catalog/glue.py
@@ -71,7 +71,6 @@ from pyiceberg.serializers import FromInputFile
 from pyiceberg.table import (
     CommitTableRequest,
     CommitTableResponse,
-    PropertyUtil,
     Table,
 )
 from pyiceberg.table.metadata import TableMetadata
@@ -98,6 +97,7 @@ from pyiceberg.types import (
     TimeType,
     UUIDType,
 )
+from pyiceberg.utils.properties import get_first_property_value, 
property_as_bool
 
 if TYPE_CHECKING:
     import pyarrow as pa
@@ -298,19 +298,17 @@ class GlueCatalog(MetastoreCatalog):
     def __init__(self, name: str, **properties: Any):
         super().__init__(name, **properties)
 
-        from pyiceberg.table import PropertyUtil
-
         session = boto3.Session(
-            profile_name=PropertyUtil.get_first_property_value(properties, 
GLUE_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
-            region_name=PropertyUtil.get_first_property_value(properties, 
GLUE_REGION, AWS_REGION, DEPRECATED_REGION),
+            profile_name=get_first_property_value(properties, 
GLUE_PROFILE_NAME, DEPRECATED_PROFILE_NAME),
+            region_name=get_first_property_value(properties, GLUE_REGION, 
AWS_REGION, DEPRECATED_REGION),
             botocore_session=properties.get(DEPRECATED_BOTOCORE_SESSION),
-            aws_access_key_id=PropertyUtil.get_first_property_value(
+            aws_access_key_id=get_first_property_value(
                 properties, GLUE_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID, 
DEPRECATED_ACCESS_KEY_ID
             ),
-            aws_secret_access_key=PropertyUtil.get_first_property_value(
+            aws_secret_access_key=get_first_property_value(
                 properties, GLUE_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY, 
DEPRECATED_SECRET_ACCESS_KEY
             ),
-            aws_session_token=PropertyUtil.get_first_property_value(
+            aws_session_token=get_first_property_value(
                 properties, GLUE_SESSION_TOKEN, AWS_SESSION_TOKEN, 
DEPRECATED_SESSION_TOKEN
             ),
         )
@@ -368,7 +366,7 @@ class GlueCatalog(MetastoreCatalog):
             self.glue.update_table(
                 DatabaseName=database_name,
                 TableInput=table_input,
-                SkipArchive=PropertyUtil.property_as_bool(self.properties, 
GLUE_SKIP_ARCHIVE, GLUE_SKIP_ARCHIVE_DEFAULT),
+                SkipArchive=property_as_bool(self.properties, 
GLUE_SKIP_ARCHIVE, GLUE_SKIP_ARCHIVE_DEFAULT),
                 VersionId=version_id,
             )
         except self.glue.exceptions.EntityNotFoundException as e:
diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py
index 83bbd507..2b9c2265 100644
--- a/pyiceberg/catalog/hive.py
+++ b/pyiceberg/catalog/hive.py
@@ -81,7 +81,6 @@ from pyiceberg.serializers import FromInputFile
 from pyiceberg.table import (
     CommitTableRequest,
     CommitTableResponse,
-    PropertyUtil,
     StagedTable,
     Table,
     TableProperties,
@@ -109,6 +108,7 @@ from pyiceberg.types import (
     TimeType,
     UUIDType,
 )
+from pyiceberg.utils.properties import property_as_bool, property_as_float
 
 if TYPE_CHECKING:
     import pyarrow as pa
@@ -259,13 +259,9 @@ class HiveCatalog(MetastoreCatalog):
         super().__init__(name, **properties)
         self._client = _HiveClient(properties["uri"], properties.get("ugi"))
 
-        self._lock_check_min_wait_time = PropertyUtil.property_as_float(
-            properties, LOCK_CHECK_MIN_WAIT_TIME, 
DEFAULT_LOCK_CHECK_MIN_WAIT_TIME
-        )
-        self._lock_check_max_wait_time = PropertyUtil.property_as_float(
-            properties, LOCK_CHECK_MAX_WAIT_TIME, 
DEFAULT_LOCK_CHECK_MAX_WAIT_TIME
-        )
-        self._lock_check_retries = PropertyUtil.property_as_float(
+        self._lock_check_min_wait_time = property_as_float(properties, 
LOCK_CHECK_MIN_WAIT_TIME, DEFAULT_LOCK_CHECK_MIN_WAIT_TIME)
+        self._lock_check_max_wait_time = property_as_float(properties, 
LOCK_CHECK_MAX_WAIT_TIME, DEFAULT_LOCK_CHECK_MAX_WAIT_TIME)
+        self._lock_check_retries = property_as_float(
             properties,
             LOCK_CHECK_RETRIES,
             DEFAULT_LOCK_CHECK_RETRIES,
@@ -314,7 +310,7 @@ class HiveCatalog(MetastoreCatalog):
             sd=_construct_hive_storage_descriptor(
                 table.schema(),
                 table.location(),
-                PropertyUtil.property_as_bool(self.properties, 
HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT),
+                property_as_bool(self.properties, HIVE2_COMPATIBLE, 
HIVE2_COMPATIBLE_DEFAULT),
             ),
             tableType=EXTERNAL_TABLE,
             parameters=_construct_parameters(table.metadata_location),
diff --git a/pyiceberg/catalog/rest.py b/pyiceberg/catalog/rest.py
index e6fbabf5..6977dce7 100644
--- a/pyiceberg/catalog/rest.py
+++ b/pyiceberg/catalog/rest.py
@@ -71,6 +71,7 @@ from pyiceberg.table.metadata import TableMetadata
 from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder, 
assign_fresh_sort_order_ids
 from pyiceberg.typedef import EMPTY_DICT, UTF8, IcebergBaseModel, Identifier, 
Properties
 from pyiceberg.types import transform_dict_value_to_str
+from pyiceberg.utils.properties import property_as_bool
 
 if TYPE_CHECKING:
     import pyarrow as pa
@@ -257,7 +258,7 @@ class RestCatalog(Catalog):
         self._config_headers(session)
 
         # Configure SigV4 Request Signing
-        if str(self.properties.get(SIGV4, False)).lower() == "true":
+        if property_as_bool(self.properties, SIGV4, False):
             self._init_sigv4(session)
 
         return session
diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py
index 2a03a4de..de67cdff 100644
--- a/pyiceberg/conversions.py
+++ b/pyiceberg/conversions.py
@@ -57,6 +57,7 @@ from pyiceberg.types import (
     TimestamptzType,
     TimeType,
     UUIDType,
+    strtobool,
 )
 from pyiceberg.utils.datetime import date_to_days, datetime_to_micros, 
time_to_micros
 from pyiceberg.utils.decimal import decimal_to_bytes, unscaled_to_decimal
@@ -99,7 +100,7 @@ def partition_to_py(primitive_type: PrimitiveType, 
value_str: str) -> Union[int,
 @partition_to_py.register(BooleanType)
 @handle_none
 def _(primitive_type: BooleanType, value_str: str) -> Union[int, float, str, 
uuid.UUID]:
-    return value_str.lower() == "true"
+    return strtobool(value_str)
 
 
 @partition_to_py.register(IntegerType)
diff --git a/pyiceberg/expressions/parser.py b/pyiceberg/expressions/parser.py
index 107d2349..d99f9227 100644
--- a/pyiceberg/expressions/parser.py
+++ b/pyiceberg/expressions/parser.py
@@ -63,6 +63,7 @@ from pyiceberg.expressions.literals import (
     StringLiteral,
 )
 from pyiceberg.typedef import L
+from pyiceberg.types import strtobool
 
 ParserElement.enablePackrat()
 
@@ -96,7 +97,7 @@ literal_set = Group(DelimitedList(string) | 
DelimitedList(decimal) | DelimitedLi
 
 @boolean.set_parse_action
 def _(result: ParseResults) -> BooleanExpression:
-    if "true" == result.boolean.lower():
+    if strtobool(result.boolean):
         return AlwaysTrue()
     else:
         return AlwaysFalse()
diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py
index c77a3024..d6e4a32a 100644
--- a/pyiceberg/io/fsspec.py
+++ b/pyiceberg/io/fsspec.py
@@ -76,6 +76,7 @@ from pyiceberg.io import (
     OutputStream,
 )
 from pyiceberg.typedef import Properties
+from pyiceberg.utils.properties import get_first_property_value, 
property_as_bool
 
 logger = logging.getLogger(__name__)
 
@@ -118,14 +119,12 @@ def _file(_: Properties) -> LocalFileSystem:
 def _s3(properties: Properties) -> AbstractFileSystem:
     from s3fs import S3FileSystem
 
-    from pyiceberg.table import PropertyUtil
-
     client_kwargs = {
         "endpoint_url": properties.get(S3_ENDPOINT),
-        "aws_access_key_id": PropertyUtil.get_first_property_value(properties, 
S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
-        "aws_secret_access_key": 
PropertyUtil.get_first_property_value(properties, S3_SECRET_ACCESS_KEY, 
AWS_SECRET_ACCESS_KEY),
-        "aws_session_token": PropertyUtil.get_first_property_value(properties, 
S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
-        "region_name": PropertyUtil.get_first_property_value(properties, 
S3_REGION, AWS_REGION),
+        "aws_access_key_id": get_first_property_value(properties, 
S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
+        "aws_secret_access_key": get_first_property_value(properties, 
S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
+        "aws_session_token": get_first_property_value(properties, 
S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
+        "region_name": get_first_property_value(properties, S3_REGION, 
AWS_REGION),
     }
     config_kwargs = {}
     register_events: Dict[str, Callable[[Properties], None]] = {}
@@ -165,11 +164,11 @@ def _gs(properties: Properties) -> AbstractFileSystem:
         token=properties.get(GCS_TOKEN),
         consistency=properties.get(GCS_CONSISTENCY, "none"),
         cache_timeout=properties.get(GCS_CACHE_TIMEOUT),
-        requester_pays=properties.get(GCS_REQUESTER_PAYS, False),
+        requester_pays=property_as_bool(properties, GCS_REQUESTER_PAYS, False),
         session_kwargs=json.loads(properties.get(GCS_SESSION_KWARGS, "{}")),
         endpoint_url=properties.get(GCS_ENDPOINT),
         default_location=properties.get(GCS_DEFAULT_LOCATION),
-        version_aware=properties.get(GCS_VERSION_AWARE, "false").lower() == 
"true",
+        version_aware=property_as_bool(properties, GCS_VERSION_AWARE, False),
     )
 
 
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index f3b85eb4..4175f5fe 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -158,6 +158,7 @@ from pyiceberg.utils.concurrent import ExecutorFactory
 from pyiceberg.utils.config import Config
 from pyiceberg.utils.datetime import millis_to_datetime
 from pyiceberg.utils.deprecated import deprecated
+from pyiceberg.utils.properties import get_first_property_value, 
property_as_int
 from pyiceberg.utils.singleton import Singleton
 from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, 
truncate_upper_bound_text_string
 
@@ -345,14 +346,12 @@ class PyArrowFileIO(FileIO):
         if scheme in {"s3", "s3a", "s3n"}:
             from pyarrow.fs import S3FileSystem
 
-            from pyiceberg.table import PropertyUtil
-
             client_kwargs: Dict[str, Any] = {
                 "endpoint_override": self.properties.get(S3_ENDPOINT),
-                "access_key": 
PropertyUtil.get_first_property_value(self.properties, S3_ACCESS_KEY_ID, 
AWS_ACCESS_KEY_ID),
-                "secret_key": 
PropertyUtil.get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, 
AWS_SECRET_ACCESS_KEY),
-                "session_token": 
PropertyUtil.get_first_property_value(self.properties, S3_SESSION_TOKEN, 
AWS_SESSION_TOKEN),
-                "region": 
PropertyUtil.get_first_property_value(self.properties, S3_REGION, AWS_REGION),
+                "access_key": get_first_property_value(self.properties, 
S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
+                "secret_key": get_first_property_value(self.properties, 
S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
+                "session_token": get_first_property_value(self.properties, 
S3_SESSION_TOKEN, AWS_SESSION_TOKEN),
+                "region": get_first_property_value(self.properties, S3_REGION, 
AWS_REGION),
             }
 
             if proxy_uri := self.properties.get(S3_PROXY_URI):
@@ -2132,10 +2131,10 @@ def data_file_statistics_from_parquet_metadata(
 
 
 def write_file(io: FileIO, table_metadata: TableMetadata, tasks: 
Iterator[WriteTask]) -> Iterator[DataFile]:
-    from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE, 
PropertyUtil, TableProperties
+    from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE, 
TableProperties
 
     parquet_writer_kwargs = 
_get_parquet_writer_kwargs(table_metadata.properties)
-    row_group_size = PropertyUtil.property_as_int(
+    row_group_size = property_as_int(
         properties=table_metadata.properties,
         property_name=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES,
         default=TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT,
@@ -2278,7 +2277,7 @@ PYARROW_UNCOMPRESSED_CODEC = "none"
 
 
 def _get_parquet_writer_kwargs(table_properties: Properties) -> Dict[str, Any]:
-    from pyiceberg.table import PropertyUtil, TableProperties
+    from pyiceberg.table import TableProperties
 
     for key_pattern in [
         TableProperties.PARQUET_ROW_GROUP_SIZE_BYTES,
@@ -2290,7 +2289,7 @@ def _get_parquet_writer_kwargs(table_properties: 
Properties) -> Dict[str, Any]:
             raise NotImplementedError(f"Parquet writer option(s) 
{unsupported_keys} not implemented")
 
     compression_codec = 
table_properties.get(TableProperties.PARQUET_COMPRESSION, 
TableProperties.PARQUET_COMPRESSION_DEFAULT)
-    compression_level = PropertyUtil.property_as_int(
+    compression_level = property_as_int(
         properties=table_properties,
         property_name=TableProperties.PARQUET_COMPRESSION_LEVEL,
         default=TableProperties.PARQUET_COMPRESSION_LEVEL_DEFAULT,
@@ -2301,17 +2300,17 @@ def _get_parquet_writer_kwargs(table_properties: 
Properties) -> Dict[str, Any]:
     return {
         "compression": compression_codec,
         "compression_level": compression_level,
-        "data_page_size": PropertyUtil.property_as_int(
+        "data_page_size": property_as_int(
             properties=table_properties,
             property_name=TableProperties.PARQUET_PAGE_SIZE_BYTES,
             default=TableProperties.PARQUET_PAGE_SIZE_BYTES_DEFAULT,
         ),
-        "dictionary_pagesize_limit": PropertyUtil.property_as_int(
+        "dictionary_pagesize_limit": property_as_int(
             properties=table_properties,
             property_name=TableProperties.PARQUET_DICT_SIZE_BYTES,
             default=TableProperties.PARQUET_DICT_SIZE_BYTES_DEFAULT,
         ),
-        "write_batch_size": PropertyUtil.property_as_int(
+        "write_batch_size": property_as_int(
             properties=table_properties,
             property_name=TableProperties.PARQUET_PAGE_ROW_LIMIT,
             default=TableProperties.PARQUET_PAGE_ROW_LIMIT_DEFAULT,
@@ -2331,11 +2330,11 @@ def _dataframe_to_data_files(
     Returns:
         An iterable that supplies datafiles that represent the table.
     """
-    from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE, 
PropertyUtil, TableProperties, WriteTask
+    from pyiceberg.table import DOWNCAST_NS_TIMESTAMP_TO_US_ON_WRITE, 
TableProperties, WriteTask
 
     counter = counter or itertools.count(0)
     write_uuid = write_uuid or uuid.uuid4()
-    target_file_size: int = PropertyUtil.property_as_int(  # type: ignore  # 
The property is set with non-None value.
+    target_file_size: int = property_as_int(  # type: ignore  # The property 
is set with non-None value.
         properties=table_metadata.properties,
         property_name=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES,
         default=TableProperties.WRITE_TARGET_FILE_SIZE_BYTES_DEFAULT,
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index a7a2dec2..873f5abf 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -144,6 +144,7 @@ from pyiceberg.types import (
     NestedField,
     PrimitiveType,
     StructType,
+    strtobool,
     transform_dict_value_to_str,
 )
 from pyiceberg.utils.bin_packing import ListPacker
@@ -151,6 +152,7 @@ from pyiceberg.utils.concurrent import ExecutorFactory
 from pyiceberg.utils.config import Config
 from pyiceberg.utils.datetime import datetime_to_millis
 from pyiceberg.utils.deprecated import deprecated
+from pyiceberg.utils.properties import property_as_bool, property_as_int
 from pyiceberg.utils.singleton import _convert_to_hashable_type
 
 if TYPE_CHECKING:
@@ -225,41 +227,6 @@ class TableProperties:
     MANIFEST_MERGE_ENABLED_DEFAULT = False
 
 
-class PropertyUtil:
-    @staticmethod
-    def property_as_int(properties: Dict[str, str], property_name: str, 
default: Optional[int] = None) -> Optional[int]:
-        if value := properties.get(property_name):
-            try:
-                return int(value)
-            except ValueError as e:
-                raise ValueError(f"Could not parse table property 
{property_name} to an integer: {value}") from e
-        else:
-            return default
-
-    @staticmethod
-    def property_as_float(properties: Dict[str, str], property_name: str, 
default: Optional[float] = None) -> Optional[float]:
-        if value := properties.get(property_name):
-            try:
-                return float(value)
-            except ValueError as e:
-                raise ValueError(f"Could not parse table property 
{property_name} to a float: {value}") from e
-        else:
-            return default
-
-    @staticmethod
-    def property_as_bool(properties: Dict[str, str], property_name: str, 
default: bool) -> bool:
-        if value := properties.get(property_name):
-            return value.lower() == "true"
-        return default
-
-    @staticmethod
-    def get_first_property_value(properties: Properties, *property_names: str) 
-> Optional[Any]:
-        for property_name in property_names:
-            if property_value := properties.get(property_name):
-                return property_value
-        return None
-
-
 class Transaction:
     _table: Table
     table_metadata: TableMetadata
@@ -492,7 +459,7 @@ class Transaction:
             self.table_metadata.schema(), provided_schema=df.schema, 
downcast_ns_timestamp_to_us=downcast_ns_timestamp_to_us
         )
 
-        manifest_merge_enabled = PropertyUtil.property_as_bool(
+        manifest_merge_enabled = property_as_bool(
             self.table_metadata.properties,
             TableProperties.MANIFEST_MERGE_ENABLED,
             TableProperties.MANIFEST_MERGE_ENABLED_DEFAULT,
@@ -1964,7 +1931,10 @@ class DataScan(TableScan):
 
         partition_evaluators: Dict[int, Callable[[DataFile], bool]] = 
KeyDefaultDict(self._build_partition_evaluator)
         metrics_evaluator = _InclusiveMetricsEvaluator(
-            self.table_metadata.schema(), self.row_filter, 
self.case_sensitive, self.options.get("include_empty_files") == "true"
+            self.table_metadata.schema(),
+            self.row_filter,
+            self.case_sensitive,
+            strtobool(self.options.get("include_empty_files", "false")),
         ).eval
 
         min_sequence_number = _min_sequence_number(manifests)
@@ -3450,17 +3420,17 @@ class MergeAppendFiles(FastAppendFiles):
         snapshot_properties: Dict[str, str] = EMPTY_DICT,
     ) -> None:
         super().__init__(operation, transaction, io, commit_uuid, 
snapshot_properties)
-        self._target_size_bytes = PropertyUtil.property_as_int(
+        self._target_size_bytes = property_as_int(
             self._transaction.table_metadata.properties,
             TableProperties.MANIFEST_TARGET_SIZE_BYTES,
             TableProperties.MANIFEST_TARGET_SIZE_BYTES_DEFAULT,
         )  # type: ignore
-        self._min_count_to_merge = PropertyUtil.property_as_int(
+        self._min_count_to_merge = property_as_int(
             self._transaction.table_metadata.properties,
             TableProperties.MANIFEST_MIN_MERGE_COUNT,
             TableProperties.MANIFEST_MIN_MERGE_COUNT_DEFAULT,
         )  # type: ignore
-        self._merge_enabled = PropertyUtil.property_as_bool(
+        self._merge_enabled = property_as_bool(
             self._transaction.table_metadata.properties,
             TableProperties.MANIFEST_MERGE_ENABLED,
             TableProperties.MANIFEST_MERGE_ENABLED_DEFAULT,
diff --git a/pyiceberg/utils/properties.py b/pyiceberg/utils/properties.py
new file mode 100644
index 00000000..6a0e2072
--- /dev/null
+++ b/pyiceberg/utils/properties.py
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import (
+    Any,
+    Dict,
+    Optional,
+)
+
+from pyiceberg.typedef import Properties
+from pyiceberg.types import strtobool
+
+
+def property_as_int(
+    properties: Dict[str, str],
+    property_name: str,
+    default: Optional[int] = None,
+) -> Optional[int]:
+    if value := properties.get(property_name):
+        try:
+            return int(value)
+        except ValueError as e:
+            raise ValueError(f"Could not parse table property {property_name} 
to an integer: {value}") from e
+    else:
+        return default
+
+
+def property_as_float(
+    properties: Dict[str, str],
+    property_name: str,
+    default: Optional[float] = None,
+) -> Optional[float]:
+    if value := properties.get(property_name):
+        try:
+            return float(value)
+        except ValueError as e:
+            raise ValueError(f"Could not parse table property {property_name} 
to a float: {value}") from e
+    else:
+        return default
+
+
+def property_as_bool(
+    properties: Dict[str, str],
+    property_name: str,
+    default: bool,
+) -> bool:
+    if value := properties.get(property_name):
+        try:
+            return strtobool(value)
+        except ValueError as e:
+            raise ValueError(f"Could not parse table property {property_name} 
to a boolean: {value}") from e
+    return default
+
+
+def get_first_property_value(
+    properties: Properties,
+    *property_names: str,
+) -> Optional[Any]:
+    for property_name in property_names:
+        if property_value := properties.get(property_name):
+            return property_value
+    return None
diff --git a/tests/expressions/test_literals.py 
b/tests/expressions/test_literals.py
index 95da250a..59c2a3de 100644
--- a/tests/expressions/test_literals.py
+++ b/tests/expressions/test_literals.py
@@ -385,17 +385,23 @@ def test_string_to_decimal_literal() -> None:
 
 
 def test_string_to_boolean_literal() -> None:
-    assert literal(True) == literal("true").to(BooleanType())
-    assert literal(True) == literal("True").to(BooleanType())
-    assert literal(False) == literal("false").to(BooleanType())
-    assert literal(False) == literal("False").to(BooleanType())
+    assert literal("true").to(BooleanType()) == literal(True)
+    assert literal("True").to(BooleanType()) == literal(True)
+    assert literal("false").to(BooleanType()) == literal(False)
+    assert literal("False").to(BooleanType()) == literal(False)
+    assert literal("TRUE").to(BooleanType()) == literal(True)
+    assert literal("FALSE").to(BooleanType()) == literal(False)
 
 
-def test_invalid_string_to_boolean_literal() -> None:
-    invalid_boolean_str = literal("unknown")
[email protected](
+    "val",
+    ["unknown", "off", "on", "0", "1", "y", "yes", "n", "no", "t", "f"],
+)
+def test_invalid_string_to_boolean_literal(val: Any) -> None:
+    invalid_boolean_str = literal(val)
     with pytest.raises(ValueError) as e:
         _ = invalid_boolean_str.to(BooleanType())
-    assert "Could not convert unknown into a boolean" in str(e.value)
+    assert f"Could not convert {val} into a boolean" in str(e.value)
 
 
 # MISC
diff --git a/tests/utils/test_properties.py b/tests/utils/test_properties.py
new file mode 100644
index 00000000..2cb4ea5a
--- /dev/null
+++ b/tests/utils/test_properties.py
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+from pyiceberg.utils.properties import (
+    get_first_property_value,
+    property_as_bool,
+    property_as_float,
+    property_as_int,
+)
+
+
+def test_property_as_int() -> None:
+    properties = {
+        "int": "42",
+    }
+
+    assert property_as_int(properties, "int") == 42
+    assert property_as_int(properties, "missing", default=1) == 1
+    assert property_as_int(properties, "missing") is None
+
+
+def test_property_as_int_with_invalid_value() -> None:
+    properties = {
+        "some_int_prop": "invalid",
+    }
+
+    with pytest.raises(ValueError) as exc:
+        property_as_int(properties, "some_int_prop")
+
+        assert "Could not parse table property some_int_prop to an integer: 
invalid" in str(exc.value)
+
+
+def test_property_as_float() -> None:
+    properties = {
+        "float": "42.0",
+    }
+
+    assert property_as_float(properties, "float", default=1.0) == 42.0
+    assert property_as_float(properties, "missing", default=1.0) == 1.0
+    assert property_as_float(properties, "missing") is None
+
+
+def test_property_as_float_with_invalid_value() -> None:
+    properties = {
+        "some_float_prop": "invalid",
+    }
+
+    with pytest.raises(ValueError) as exc:
+        property_as_float(properties, "some_float_prop")
+
+        assert "Could not parse table property some_float_prop to a float: 
invalid" in str(exc.value)
+
+
+def test_property_as_bool() -> None:
+    properties = {
+        "bool": "True",
+    }
+
+    assert property_as_bool(properties, "bool", default=False) is True
+    assert property_as_bool(properties, "missing", default=False) is False
+    assert property_as_float(properties, "missing") is None
+
+
+def test_property_as_bool_with_invalid_value() -> None:
+    properties = {
+        "some_bool_prop": "invalid",
+    }
+
+    with pytest.raises(ValueError) as exc:
+        property_as_bool(properties, "some_bool_prop", True)
+
+        assert "Could not parse table property some_bool_prop to a boolean: 
invalid" in str(exc.value)
+
+
+def test_get_first_property_value() -> None:
+    properties = {
+        "prop_1": "value_1",
+        "prop_2": "value_2",
+    }
+
+    assert get_first_property_value(properties, "prop_2", "prop_1") == 
"value_2"
+    assert get_first_property_value(properties, "missing", "prop_1") == 
"value_1"

Reply via email to