[iceberg] branch master updated: Python: Add more tests for the Avro writer (#8067)

fokko Mon, 07 Aug 2023 12:28:24 -0700

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git



The following commit(s) were added to refs/heads/master by this push:
     new 427d23b7ba Python: Add more tests for the Avro writer (#8067)
427d23b7ba is described below

commit 427d23b7ba4d45355e6f7912737efa99a89098d7
Author: Fokko Driesprong <[email protected]>
AuthorDate: Mon Aug 7 21:27:19 2023 +0200

    Python: Add more tests for the Avro writer (#8067)
    
    * Python: Add more tests for the Avro writer
    
    * Fix the tests
    
    * WIP
    
    * Update
    
    * Update python/pyiceberg/utils/datetime.py
    
    * Update python/pyiceberg/utils/datetime.py
    
    * Update python/tests/avro/test_encoder.py
    
    * WIP
    
    * Cleanup
    
    * Cleanup
    
    * Cleanup
    
    * Cleanup
    
    * Fix the tests
---
 python/dev/provision.py                     |  42 ++++++++++
 python/pyiceberg/avro/decoder.py            |  69 +---------------
 python/pyiceberg/avro/encoder.py            | 120 +++-------------------------
 python/pyiceberg/avro/file.py               |   2 +-
 python/pyiceberg/avro/reader.py             |  63 ++++++++++++---
 python/pyiceberg/avro/writer.py             |  29 +++----
 python/pyiceberg/conversions.py             |   4 +-
 python/pyiceberg/expressions/literals.py    |   4 +-
 python/pyiceberg/utils/datetime.py          |  11 ++-
 python/pyiceberg/utils/decimal.py           |  43 +++++++++-
 python/pyiceberg/utils/schema_conversion.py |  17 ++--
 python/tests/avro/test_decoder.py           |  65 ---------------
 python/tests/avro/test_encoder.py           |  87 ++------------------
 python/tests/avro/test_file.py              |  94 ++++++++++++++++++++++
 python/tests/avro/test_writer.py            |  17 ++++
 python/tests/test_integration.py            |  16 ++++
 python/tests/test_schema.py                 |  47 +++++++++++
 python/tests/test_transforms.py             |   4 +-
 python/tests/utils/test_decimal.py          |  40 ++++++++++
 19 files changed, 405 insertions(+), 369 deletions(-)

diff --git a/python/dev/provision.py b/python/dev/provision.py
index 73ec34fdc1..f62687b746 100644
--- a/python/dev/provision.py
+++ b/python/dev/provision.py
@@ -185,3 +185,45 @@ all_types_dataframe = (
 
all_types_dataframe.writeTo("default.test_all_types").tableProperty("format-version",
 "2").partitionedBy(
     "intCol"
 ).createOrReplace()
+
+for table_name, partition in [
+    ("test_partitioned_by_identity", "ts"),
+    ("test_partitioned_by_years", "years(dt)"),
+    ("test_partitioned_by_months", "months(dt)"),
+    ("test_partitioned_by_days", "days(ts)"),
+    ("test_partitioned_by_hours", "hours(ts)"),
+    ("test_partitioned_by_truncate", "truncate(1, letter)"),
+    ("test_partitioned_by_bucket", "bucket(16, number)"),
+]:
+    spark.sql(
+        f"""
+      CREATE OR REPLACE TABLE default.{table_name} (
+        dt     date,
+        ts     timestamp,
+        number integer,
+        letter string
+      )
+      USING iceberg;
+    """
+    )
+
+    spark.sql(f"ALTER TABLE default.{table_name} ADD PARTITION FIELD 
{partition}")
+
+    spark.sql(
+        f"""
+    INSERT INTO default.{table_name}
+    VALUES
+        (CAST('2022-03-01' AS date), CAST('2022-03-01 01:22:00' AS timestamp), 
1, 'a'),
+        (CAST('2022-03-02' AS date), CAST('2022-03-02 02:22:00' AS timestamp), 
2, 'b'),
+        (CAST('2022-03-03' AS date), CAST('2022-03-03 03:22:00' AS timestamp), 
3, 'c'),
+        (CAST('2022-03-04' AS date), CAST('2022-03-04 04:22:00' AS timestamp), 
4, 'd'),
+        (CAST('2023-03-05' AS date), CAST('2023-03-05 05:22:00' AS timestamp), 
5, 'e'),
+        (CAST('2023-03-06' AS date), CAST('2023-03-06 06:22:00' AS timestamp), 
6, 'f'),
+        (CAST('2023-03-07' AS date), CAST('2023-03-07 07:22:00' AS timestamp), 
7, 'g'),
+        (CAST('2023-03-08' AS date), CAST('2023-03-08 08:22:00' AS timestamp), 
8, 'h'),
+        (CAST('2023-03-09' AS date), CAST('2023-03-09 09:22:00' AS timestamp), 
9, 'i'),
+        (CAST('2023-03-10' AS date), CAST('2023-03-10 10:22:00' AS timestamp), 
10, 'j'),
+        (CAST('2023-03-11' AS date), CAST('2023-03-11 11:22:00' AS timestamp), 
11, 'k'),
+        (CAST('2023-03-12' AS date), CAST('2023-03-12 12:22:00' AS timestamp), 
12, 'l');
+    """
+    )
diff --git a/python/pyiceberg/avro/decoder.py b/python/pyiceberg/avro/decoder.py
index 35a1651192..0b39340b0e 100644
--- a/python/pyiceberg/avro/decoder.py
+++ b/python/pyiceberg/avro/decoder.py
@@ -14,21 +14,16 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import decimal
 from abc import ABC, abstractmethod
-from datetime import datetime, time
 from io import SEEK_CUR
 from typing import Dict, List
-from uuid import UUID
 
 from pyiceberg.avro import STRUCT_DOUBLE, STRUCT_FLOAT
 from pyiceberg.io import InputStream
-from pyiceberg.utils.datetime import micros_to_time, micros_to_timestamp, 
micros_to_timestamptz
-from pyiceberg.utils.decimal import unscaled_to_decimal
 
 
 class BinaryDecoder(ABC):
-    """Read leaf values."""
+    """Decodes bytes into Python physical primitives."""
 
     @abstractmethod
     def __init__(self, input_stream: InputStream) -> None:
@@ -106,77 +101,19 @@ class BinaryDecoder(ABC):
         """
         return float(STRUCT_DOUBLE.unpack(self.read(8))[0])
 
-    def read_decimal_from_bytes(self, precision: int, scale: int) -> 
decimal.Decimal:
-        """Reads a value from the stream as a decimal.
-
-        Decimal bytes are decoded as signed short, int or long depending on the
-        size of bytes.
-        """
-        size = self.read_int()
-        return self.read_decimal_from_fixed(precision, scale, size)
-
-    def read_decimal_from_fixed(self, _: int, scale: int, size: int) -> 
decimal.Decimal:
-        """Reads a value from the stream as a decimal.
-
-        Decimal is encoded as fixed. Fixed instances are encoded using the
-        number of bytes declared in the schema.
-        """
-        data = self.read(size)
-        unscaled_datum = int.from_bytes(data, byteorder="big", signed=True)
-        return unscaled_to_decimal(unscaled_datum, scale)
-
     def read_bytes(self) -> bytes:
         """Bytes are encoded as a long followed by that many bytes of data."""
         num_bytes = self.read_int()
         return self.read(num_bytes) if num_bytes > 0 else b""
 
     def read_utf8(self) -> str:
-        """Reads a utf-8 encoded string from the stream.
+        """Reads an utf-8 encoded string from the stream.
 
         A string is encoded as a long followed by
         that many bytes of UTF-8 encoded character data.
         """
         return self.read_bytes().decode("utf-8")
 
-    def read_uuid_from_fixed(self) -> UUID:
-        """Reads a UUID as a fixed[16]."""
-        return UUID(bytes=self.read(16))
-
-    def read_time_millis(self) -> time:
-        """Reads a milliseconds granularity time from the stream.
-
-        Int is decoded as python time object which represents
-        the number of milliseconds after midnight, 00:00:00.000.
-        """
-        millis = self.read_int()
-        return micros_to_time(millis * 1000)
-
-    def read_time_micros(self) -> time:
-        """Reads a microseconds granularity time from the stream.
-
-        Long is decoded as python time object which represents
-        the number of microseconds after midnight, 00:00:00.000000.
-        """
-        return micros_to_time(self.read_int())
-
-    def read_timestamp_micros(self) -> datetime:
-        """Reads a microsecond granularity timestamp from the stream.
-
-        Long is decoded as python datetime object which represents
-        the number of microseconds from the unix epoch, 1 January 1970.
-        """
-        return micros_to_timestamp(self.read_int())
-
-    def read_timestamptz_micros(self) -> datetime:
-        """Reads a microsecond granularity timestamptz from the stream.
-
-        Long is decoded as python datetime object which represents
-        the number of microseconds from the unix epoch, 1 January 1970.
-
-        Adjusted to UTC.
-        """
-        return micros_to_timestamptz(self.read_int())
-
     def skip_boolean(self) -> None:
         self.skip(1)
 
@@ -199,7 +136,7 @@ class BinaryDecoder(ABC):
 
 
 class StreamingBinaryDecoder(BinaryDecoder):
-    """Read leaf values."""
+    """Decodes bytes into Python physical primitives."""
 
     __slots__ = "_input_stream"
     _input_stream: InputStream
diff --git a/python/pyiceberg/avro/encoder.py b/python/pyiceberg/avro/encoder.py
index cf6d601233..a25e4bb66d 100644
--- a/python/pyiceberg/avro/encoder.py
+++ b/python/pyiceberg/avro/encoder.py
@@ -14,17 +14,14 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import decimal
-import struct
-from datetime import date, datetime, time
+from uuid import UUID
 
 from pyiceberg.avro import STRUCT_DOUBLE, STRUCT_FLOAT
 from pyiceberg.io import OutputStream
-from pyiceberg.utils.datetime import date_to_days, datetime_to_micros, 
time_object_to_micros
 
 
 class BinaryEncoder:
-    """Write leaf values."""
+    """Encodes Python physical types into bytes."""
 
     _output_stream: OutputStream
 
@@ -58,118 +55,21 @@ class BinaryEncoder:
         """A double is written as 8 bytes."""
         self.write(STRUCT_DOUBLE.pack(f))
 
-    def write_decimal_bytes(self, datum: decimal.Decimal) -> None:
-        """
-        Decimal in bytes are encoded as long.
-
-        Since size of packed value in bytes for signed long is 8, 8 bytes are 
written.
-        """
-        sign, digits, _ = datum.as_tuple()
-
-        unscaled_datum = 0
-        for digit in digits:
-            unscaled_datum = (unscaled_datum * 10) + digit
-
-        bits_req = unscaled_datum.bit_length() + 1
-        if sign:
-            unscaled_datum = (1 << bits_req) - unscaled_datum
-
-        bytes_req = bits_req // 8
-        padding_bits = ~((1 << bits_req) - 1) if sign else 0
-        packed_bits = padding_bits | unscaled_datum
-
-        bytes_req += 1 if (bytes_req << 3) < bits_req else 0
-        self.write_int(bytes_req)
-        for index in range(bytes_req - 1, -1, -1):
-            bits_to_write = packed_bits >> (8 * index)
-            self.write(bytearray([bits_to_write & 0xFF]))
-
-    def write_decimal_fixed(self, datum: decimal.Decimal, size: int) -> None:
-        """Decimal in fixed are encoded as size of fixed bytes."""
-        sign, digits, _ = datum.as_tuple()
-
-        unscaled_datum = 0
-        for digit in digits:
-            unscaled_datum = (unscaled_datum * 10) + digit
-
-        bits_req = unscaled_datum.bit_length() + 1
-        size_in_bits = size * 8
-        offset_bits = size_in_bits - bits_req
-
-        mask = 2**size_in_bits - 1
-        bit = 1
-        for _ in range(bits_req):
-            mask ^= bit
-            bit <<= 1
-
-        if bits_req < 8:
-            bytes_req = 1
-        else:
-            bytes_req = bits_req // 8
-            if bits_req % 8 != 0:
-                bytes_req += 1
-        if sign:
-            unscaled_datum = (1 << bits_req) - unscaled_datum
-            unscaled_datum = mask | unscaled_datum
-            for index in range(size - 1, -1, -1):
-                bits_to_write = unscaled_datum >> (8 * index)
-                self.write(bytearray([bits_to_write & 0xFF]))
-        else:
-            for _ in range(offset_bits // 8):
-                self.write(b"\x00")
-            for index in range(bytes_req - 1, -1, -1):
-                bits_to_write = unscaled_datum >> (8 * index)
-                self.write(bytearray([bits_to_write & 0xFF]))
-
     def write_bytes(self, b: bytes) -> None:
         """Bytes are encoded as a long followed by that many bytes of data."""
         self.write_int(len(b))
-        self.write(struct.pack(f"{len(b)}s", b))
-
-    def write_bytes_fixed(self, b: bytes) -> None:
-        """Writes fixed number of bytes."""
-        self.write(struct.pack(f"{len(b)}s", b))
+        self.write(b)
 
     def write_utf8(self, s: str) -> None:
         """A string is encoded as a long followed by that many bytes of UTF-8 
encoded character data."""
         self.write_bytes(s.encode("utf-8"))
 
-    def write_date_int(self, d: date) -> None:
-        """
-        Encode python date object as int.
-
-        It stores the number of days from the unix epoch, 1 January 1970 (ISO 
calendar).
-        """
-        self.write_int(date_to_days(d))
-
-    def write_time_millis_int(self, dt: time) -> None:
-        """
-        Encode python time object as int.
-
-        It stores the number of milliseconds from midnight, 00:00:00.000
-        """
-        self.write_int(int(time_object_to_micros(dt) / 1000))
-
-    def write_time_micros_long(self, dt: time) -> None:
-        """
-        Encode python time object as long.
-
-        It stores the number of microseconds from midnight, 00:00:00.000000
-        """
-        self.write_int(time_object_to_micros(dt))
-
-    def write_timestamp_millis_long(self, dt: datetime) -> None:
-        """
-        Encode python datetime object as long.
-
-        It stores the number of milliseconds from midnight of unix epoch, 1 
January 1970.
-        """
-        self.write_int(int(datetime_to_micros(dt) / 1000))
-
-    def write_timestamp_micros_long(self, dt: datetime) -> None:
-        """
-        Encode python datetime object as long.
+    def write_uuid(self, uuid: UUID) -> None:
+        """Write UUID as a fixed[16].
 
-        It stores the number of microseconds from midnight of unix epoch, 1 
January 1970.
+        The uuid logical type represents a random generated universally unique 
identifier (UUID).
+        An uuid logical type annotates an Avro string. The string has to 
conform with RFC-4122.
         """
-        self.write_int(datetime_to_micros(dt))
+        if len(uuid.bytes) != 16:
+            raise ValueError(f"Expected UUID to have 16 bytes, got: 
len({uuid.bytes!r})")
+        return self.write(uuid.bytes)
diff --git a/python/pyiceberg/avro/file.py b/python/pyiceberg/avro/file.py
index f780a8a30c..3b2f62045e 100644
--- a/python/pyiceberg/avro/file.py
+++ b/python/pyiceberg/avro/file.py
@@ -275,4 +275,4 @@ class AvroOutputFile(Generic[D]):
         self.encoder.write_int(len(objects))
         self.encoder.write_int(len(block_content))
         self.encoder.write(block_content)
-        self.encoder.write_bytes_fixed(self.sync_bytes)
+        self.encoder.write(self.sync_bytes)
diff --git a/python/pyiceberg/avro/reader.py b/python/pyiceberg/avro/reader.py
index ba9456e892..8a8f319a90 100644
--- a/python/pyiceberg/avro/reader.py
+++ b/python/pyiceberg/avro/reader.py
@@ -28,7 +28,6 @@ from __future__ import annotations
 from abc import abstractmethod
 from dataclasses import dataclass
 from dataclasses import field as dataclassfield
-from datetime import datetime, time
 from decimal import Decimal
 from typing import (
     Any,
@@ -43,6 +42,7 @@ from uuid import UUID
 from pyiceberg.avro.decoder import BinaryDecoder
 from pyiceberg.typedef import StructProtocol
 from pyiceberg.types import StructType
+from pyiceberg.utils.decimal import bytes_to_decimal, decimal_required_bytes
 from pyiceberg.utils.singleton import Singleton
 
 
@@ -153,6 +153,11 @@ class DoubleReader(Reader):
 
 
 class DateReader(Reader):
+    """Reads a day granularity date from the stream.
+
+    The number of days from 1 January 1970.
+    """
+
     def read(self, decoder: BinaryDecoder) -> int:
         return decoder.read_int()
 
@@ -161,24 +166,44 @@ class DateReader(Reader):
 
 
 class TimeReader(Reader):
-    def read(self, decoder: BinaryDecoder) -> time:
-        return decoder.read_time_micros()
+    """Reads a microsecond granularity timestamp from the stream.
+
+    Long is decoded as an integer which represents
+    the number of microseconds from the unix epoch, 1 January 1970.
+    """
+
+    def read(self, decoder: BinaryDecoder) -> int:
+        return decoder.read_int()
 
     def skip(self, decoder: BinaryDecoder) -> None:
         decoder.skip_int()
 
 
 class TimestampReader(Reader):
-    def read(self, decoder: BinaryDecoder) -> datetime:
-        return decoder.read_timestamp_micros()
+    """Reads a microsecond granularity timestamp from the stream.
+
+    Long is decoded as python integer which represents
+    the number of microseconds from the unix epoch, 1 January 1970.
+    """
+
+    def read(self, decoder: BinaryDecoder) -> int:
+        return decoder.read_int()
 
     def skip(self, decoder: BinaryDecoder) -> None:
         decoder.skip_int()
 
 
 class TimestamptzReader(Reader):
-    def read(self, decoder: BinaryDecoder) -> datetime:
-        return decoder.read_timestamptz_micros()
+    """Reads a microsecond granularity timestamptz from the stream.
+
+    Long is decoded as python integer which represents
+    the number of microseconds from the unix epoch, 1 January 1970.
+
+    Adjusted to UTC.
+    """
+
+    def read(self, decoder: BinaryDecoder) -> int:
+        return decoder.read_int()
 
     def skip(self, decoder: BinaryDecoder) -> None:
         decoder.skip_int()
@@ -194,7 +219,7 @@ class StringReader(Reader):
 
 class UUIDReader(Reader):
     def read(self, decoder: BinaryDecoder) -> UUID:
-        return decoder.read_uuid_from_fixed()
+        return UUID(bytes=decoder.read(16))
 
     def skip(self, decoder: BinaryDecoder) -> None:
         decoder.skip(16)
@@ -220,6 +245,12 @@ class FixedReader(Reader):
 
 
 class BinaryReader(Reader):
+    """Read a binary value.
+
+    First reads an integer, to get the length of the binary value,
+    then reads the binary field itself.
+    """
+
     def read(self, decoder: BinaryDecoder) -> bytes:
         return decoder.read_bytes()
 
@@ -227,13 +258,25 @@ class BinaryReader(Reader):
         decoder.skip_bytes()
 
 
-@dataclass(frozen=True)
+@dataclass(frozen=True, init=False)
 class DecimalReader(Reader):
+    """Reads a value as a decimal.
+
+    Decimal bytes are decoded as signed short, int or long depending on the
+    size of bytes.
+    """
+
     precision: int = dataclassfield()
     scale: int = dataclassfield()
+    _length: int
+
+    def __init__(self, precision: int, scale: int):
+        object.__setattr__(self, "precision", precision)
+        object.__setattr__(self, "scale", scale)
+        object.__setattr__(self, "_length", decimal_required_bytes(precision))
 
     def read(self, decoder: BinaryDecoder) -> Decimal:
-        return decoder.read_decimal_from_bytes(self.precision, self.scale)
+        return bytes_to_decimal(decoder.read(self._length), self.scale)
 
     def skip(self, decoder: BinaryDecoder) -> None:
         decoder.skip_bytes()
diff --git a/python/pyiceberg/avro/writer.py b/python/pyiceberg/avro/writer.py
index 8ac44036e1..2a1dcd5804 100644
--- a/python/pyiceberg/avro/writer.py
+++ b/python/pyiceberg/avro/writer.py
@@ -25,7 +25,6 @@ from __future__ import annotations
 from abc import abstractmethod
 from dataclasses import dataclass
 from dataclasses import field as dataclassfield
-from datetime import datetime, time
 from typing import (
     Any,
     Dict,
@@ -36,6 +35,7 @@ from uuid import UUID
 
 from pyiceberg.avro.encoder import BinaryEncoder
 from pyiceberg.types import StructType
+from pyiceberg.utils.decimal import decimal_required_bytes, decimal_to_bytes
 from pyiceberg.utils.singleton import Singleton
 
 
@@ -77,23 +77,23 @@ class DoubleWriter(Writer):
 
 
 class DateWriter(Writer):
-    def write(self, encoder: BinaryEncoder, val: Any) -> None:
-        encoder.write_date_int(val)
+    def write(self, encoder: BinaryEncoder, val: int) -> None:
+        encoder.write_int(val)
 
 
 class TimeWriter(Writer):
-    def write(self, encoder: BinaryEncoder, val: time) -> None:
-        encoder.write_time_micros_long(val)
+    def write(self, encoder: BinaryEncoder, val: int) -> None:
+        encoder.write_int(val)
 
 
 class TimestampWriter(Writer):
-    def write(self, encoder: BinaryEncoder, val: datetime) -> None:
-        encoder.write_timestamp_micros_long(val)
+    def write(self, encoder: BinaryEncoder, val: int) -> None:
+        encoder.write_int(val)
 
 
 class TimestamptzWriter(Writer):
-    def write(self, encoder: BinaryEncoder, val: datetime) -> None:
-        encoder.write_timestamp_micros_long(val)
+    def write(self, encoder: BinaryEncoder, val: int) -> None:
+        encoder.write_int(val)
 
 
 class StringWriter(Writer):
@@ -103,12 +103,7 @@ class StringWriter(Writer):
 
 class UUIDWriter(Writer):
     def write(self, encoder: BinaryEncoder, val: UUID) -> None:
-        uuid_bytes = val.bytes
-
-        if len(uuid_bytes) != 16:
-            raise ValueError(f"Expected UUID to be 16 bytes, got: 
{len(uuid_bytes)}")
-
-        encoder.write_bytes_fixed(uuid_bytes)
+        encoder.write(val.bytes)
 
 
 @dataclass(frozen=True)
@@ -116,6 +111,8 @@ class FixedWriter(Writer):
     _len: int = dataclassfield()
 
     def write(self, encoder: BinaryEncoder, val: bytes) -> None:
+        if len(val) != self._len:
+            raise ValueError(f"Expected {self._len} bytes, got {len(val)}")
         encoder.write(val)
 
     def __len__(self) -> int:
@@ -140,7 +137,7 @@ class DecimalWriter(Writer):
     scale: int = dataclassfield()
 
     def write(self, encoder: BinaryEncoder, val: Any) -> None:
-        return encoder.write_decimal_bytes(val)
+        return encoder.write(decimal_to_bytes(val, 
byte_length=decimal_required_bytes(self.precision)))
 
     def __repr__(self) -> str:
         """Returns string representation of this object."""
diff --git a/python/pyiceberg/conversions.py b/python/pyiceberg/conversions.py
index ebf6cfa666..8f155fce3d 100644
--- a/python/pyiceberg/conversions.py
+++ b/python/pyiceberg/conversions.py
@@ -57,7 +57,7 @@ from pyiceberg.types import (
     TimeType,
     UUIDType,
 )
-from pyiceberg.utils.datetime import date_to_days, datetime_to_micros, 
time_object_to_micros
+from pyiceberg.utils.datetime import date_to_days, datetime_to_micros, 
time_to_micros
 from pyiceberg.utils.decimal import decimal_to_bytes, unscaled_to_decimal
 
 _BOOL_STRUCT = Struct("<?")
@@ -203,7 +203,7 @@ def _(_: DateType, value: Union[date, int]) -> bytes:
 @to_bytes.register(TimeType)
 def _(_: TimeType, value: Union[time, int]) -> bytes:
     if isinstance(value, time):
-        value = time_object_to_micros(value)
+        value = time_to_micros(value)
     return _LONG_STRUCT.pack(value)
 
 
diff --git a/python/pyiceberg/expressions/literals.py 
b/python/pyiceberg/expressions/literals.py
index 1e47e87608..f89d0c8331 100644
--- a/python/pyiceberg/expressions/literals.py
+++ b/python/pyiceberg/expressions/literals.py
@@ -50,7 +50,7 @@ from pyiceberg.types import (
 from pyiceberg.utils.datetime import (
     date_str_to_days,
     micros_to_days,
-    time_to_micros,
+    time_str_to_micros,
     timestamp_to_micros,
     timestamptz_to_micros,
 )
@@ -558,7 +558,7 @@ class StringLiteral(Literal[str]):
     @to.register(TimeType)
     def _(self, type_var: TimeType) -> Literal[int]:
         try:
-            return TimeLiteral(time_to_micros(self.value))
+            return TimeLiteral(time_str_to_micros(self.value))
         except (TypeError, ValueError) as e:
             raise ValueError(f"Could not convert {self.value} into a 
{type_var}") from e
 
diff --git a/python/pyiceberg/utils/datetime.py 
b/python/pyiceberg/utils/datetime.py
index 71a62ca456..f43d0fa84b 100644
--- a/python/pyiceberg/utils/datetime.py
+++ b/python/pyiceberg/utils/datetime.py
@@ -61,15 +61,14 @@ def days_to_date(days: int) -> date:
     return EPOCH_DATE + timedelta(days)
 
 
-def time_to_micros(time_str: str) -> int:
+def time_str_to_micros(time_str: str) -> int:
     """Converts an ISO-8601 formatted time to microseconds from midnight."""
-    t = time.fromisoformat(time_str)
-    return (((t.hour * 60 + t.minute) * 60) + t.second) * 1_000_000 + 
t.microsecond
+    return time_to_micros(time.fromisoformat(time_str))
 
 
-def time_object_to_micros(t: time) -> int:
-    """Converts an datetime.time object to microseconds from midnight."""
-    return int(t.hour * 60 * 60 * 1e6 + t.minute * 60 * 1e6 + t.second * 1e6 + 
t.microsecond)
+def time_to_micros(t: time) -> int:
+    """Converts a datetime.time object to microseconds from midnight."""
+    return (((t.hour * 60 + t.minute) * 60) + t.second) * 1_000_000 + 
t.microsecond
 
 
 def datetime_to_micros(dt: datetime) -> int:
diff --git a/python/pyiceberg/utils/decimal.py 
b/python/pyiceberg/utils/decimal.py
index 32f6db6aa9..06b2b945cd 100644
--- a/python/pyiceberg/utils/decimal.py
+++ b/python/pyiceberg/utils/decimal.py
@@ -16,8 +16,9 @@
 #  under the License.
 
 """Helper methods for working with Python Decimals."""
+import math
 from decimal import Decimal
-from typing import Union
+from typing import Optional, Union
 
 
 def decimal_to_unscaled(value: Decimal) -> int:
@@ -64,16 +65,33 @@ def bytes_required(value: Union[int, Decimal]) -> int:
     raise ValueError(f"Unsupported value: {value}")
 
 
-def decimal_to_bytes(value: Decimal) -> bytes:
+def decimal_to_bytes(value: Decimal, byte_length: Optional[int] = None) -> 
bytes:
     """Returns a byte representation of a decimal.
 
     Args:
         value (Decimal): a decimal value.
+        byte_length (int): The number of bytes.
     Returns:
         bytes: the unscaled value of the Decimal as bytes.
     """
     unscaled_value = decimal_to_unscaled(value)
-    return unscaled_value.to_bytes(bytes_required(unscaled_value), 
byteorder="big", signed=True)
+    if byte_length is None:
+        byte_length = bytes_required(unscaled_value)
+    return unscaled_value.to_bytes(byte_length, byteorder="big", signed=True)
+
+
+def bytes_to_decimal(value: bytes, scale: int) -> Decimal:
+    """Returns a decimal from the bytes.
+
+    Args:
+        value (bytes): tbe bytes to be converted into a decimal.
+        scale (int): the scale of the decimal.
+
+    Returns:
+        Decimal: the scaled decimal.
+    """
+    unscaled_datum = int.from_bytes(value, byteorder="big", signed=True)
+    return unscaled_to_decimal(unscaled_datum, scale)
 
 
 def truncate_decimal(value: Decimal, width: int) -> Decimal:
@@ -88,3 +106,22 @@ def truncate_decimal(value: Decimal, width: int) -> Decimal:
     unscaled_value = decimal_to_unscaled(value)
     applied_value = unscaled_value - (((unscaled_value % width) + width) % 
width)
     return unscaled_to_decimal(applied_value, 
abs(int(value.as_tuple().exponent)))
+
+
+MAX_PRECISION = tuple(math.floor(math.log10(math.fabs(math.pow(2, 8 * pos - 1) 
- 1))) for pos in range(24))
+REQUIRED_LENGTH = tuple(next(pos for pos in range(24) if p <= 
MAX_PRECISION[pos]) for p in range(40))
+
+
+def decimal_required_bytes(precision: int) -> int:
+    """Compute the number of bytes required to store a precision.
+
+    Args:
+        precision: The number of digits to store.
+
+    Returns:
+        The number of bytes required to store a decimal with a certain 
precision.
+    """
+    if precision <= 0 or precision >= 40:
+        raise ValueError(f"Unsupported precision, outside of (0, 40]: 
{precision}")
+
+    return REQUIRED_LENGTH[precision]
diff --git a/python/pyiceberg/utils/schema_conversion.py 
b/python/pyiceberg/utils/schema_conversion.py
index 4f46668866..0746024e51 100644
--- a/python/pyiceberg/utils/schema_conversion.py
+++ b/python/pyiceberg/utils/schema_conversion.py
@@ -64,9 +64,7 @@ PRIMITIVE_FIELD_TYPE_MAPPING: Dict[str, PrimitiveType] = {
 
 LOGICAL_FIELD_TYPE_MAPPING: Dict[Tuple[str, str], PrimitiveType] = {
     ("date", "int"): DateType(),
-    ("time-millis", "int"): TimeType(),
-    ("timestamp-millis", "long"): TimestampType(),
-    ("time-micros", "int"): TimeType(),
+    ("time-micros", "long"): TimeType(),
     ("timestamp-micros", "long"): TimestampType(),
     ("uuid", "fixed"): UUIDType(),
 }
@@ -369,6 +367,11 @@ class AvroSchemaConversion:
             return self._convert_logical_decimal_type(avro_logical_type)
         elif logical_type == "map":
             return self._convert_logical_map_type(avro_logical_type)
+        elif logical_type == "timestamp-micros":
+            if avro_logical_type.get("adjust-to-utc", False) is True:
+                return TimestamptzType()
+            else:
+                return TimestampType()
         elif (logical_type, physical_type) in LOGICAL_FIELD_TYPE_MAPPING:
             return LOGICAL_FIELD_TYPE_MAPPING[(logical_type, physical_type)]
         else:
@@ -542,6 +545,8 @@ class 
ConvertSchemaToAvro(SchemaVisitorPerPrimitiveType[AvroType]):
             return {
                 "type": "map",
                 "values": value_result,
+                "key-id": self.last_map_key_field_id,
+                "value-id": self.last_map_value_field_id,
             }
         else:
             # Creates a logical map that's a list of schema's
@@ -588,17 +593,17 @@ class 
ConvertSchemaToAvro(SchemaVisitorPerPrimitiveType[AvroType]):
 
     def visit_timestamp(self, timestamp_type: TimestampType) -> AvroType:
         # Iceberg only supports micro's
-        return {"type": "long", "logicalType": "timestamp-micros"}
+        return {"type": "long", "logicalType": "timestamp-micros", 
"adjust-to-utc": False}
 
     def visit_timestamptz(self, timestamptz_type: TimestamptzType) -> AvroType:
         # Iceberg only supports micro's
-        return {"type": "long", "logicalType": "timestamp-micros"}
+        return {"type": "long", "logicalType": "timestamp-micros", 
"adjust-to-utc": True}
 
     def visit_string(self, string_type: StringType) -> AvroType:
         return "string"
 
     def visit_uuid(self, uuid_type: UUIDType) -> AvroType:
-        return {"type": "string", "logicalType": "uuid"}
+        return {"type": "fixed", "size": "16", "logicalType": "uuid"}
 
     def visit_binary(self, binary_type: BinaryType) -> AvroType:
         return "bytes"
diff --git a/python/tests/avro/test_decoder.py 
b/python/tests/avro/test_decoder.py
index b1ab97fb1d..c51f1090fc 100644
--- a/python/tests/avro/test_decoder.py
+++ b/python/tests/avro/test_decoder.py
@@ -17,12 +17,9 @@
 from __future__ import annotations
 
 import io
-from datetime import datetime, timezone
-from decimal import Decimal
 from io import SEEK_SET
 from types import TracebackType
 from typing import Optional, Type
-from uuid import UUID
 
 import pytest
 
@@ -34,15 +31,6 @@ from pyiceberg.types import DoubleType, FloatType
 AVAILABLE_DECODERS = [StreamingBinaryDecoder, InMemoryBinaryDecoder]
 
 
[email protected]("decoder_class", AVAILABLE_DECODERS)
-def test_read_decimal_from_fixed(decoder_class: Type[BinaryDecoder]) -> None:
-    mis = io.BytesIO(b"\x00\x00\x00\x05\x6A\x48\x1C\xFB\x2C\x7C\x50\x00")
-    decoder = decoder_class(mis)
-    actual = decoder.read_decimal_from_fixed(28, 15, 12)
-    expected = Decimal("99892.123400000000000")
-    assert actual == expected
-
-
 @pytest.mark.parametrize("decoder_class", AVAILABLE_DECODERS)
 def test_read_boolean_true(decoder_class: Type[BinaryDecoder]) -> None:
     mis = io.BytesIO(b"\x01")
@@ -82,24 +70,6 @@ def test_skip_int(decoder_class: Type[BinaryDecoder]) -> 
None:
     assert decoder.tell() == 1
 
 
[email protected]("decoder_class", AVAILABLE_DECODERS)
-def test_read_decimal(decoder_class: Type[BinaryDecoder]) -> None:
-    mis = io.BytesIO(b"\x18\x00\x00\x00\x05\x6A\x48\x1C\xFB\x2C\x7C\x50\x00")
-    decoder = decoder_class(mis)
-    actual = decoder.read_decimal_from_bytes(28, 15)
-    expected = Decimal("99892.123400000000000")
-    assert actual == expected
-
-
[email protected]("decoder_class", AVAILABLE_DECODERS)
-def test_decimal_from_fixed_big(decoder_class: Type[BinaryDecoder]) -> None:
-    mis = io.BytesIO(b"\x0E\xC2\x02\xE9\x06\x16\x33\x49\x77\x67\xA8\x00")
-    decoder = decoder_class(mis)
-    actual = decoder.read_decimal_from_fixed(28, 15, 12)
-    expected = Decimal("4567335489766.998340000000000")
-    assert actual == expected
-
-
 @pytest.mark.parametrize("decoder_class", AVAILABLE_DECODERS)
 def test_read_negative_bytes(decoder_class: Type[BinaryDecoder]) -> None:
     mis = io.BytesIO(b"")
@@ -180,41 +150,6 @@ def test_skip_double(decoder_class: Type[BinaryDecoder]) 
-> None:
     assert decoder.tell() == 8
 
 
[email protected]("decoder_class", AVAILABLE_DECODERS)
-def test_read_uuid_from_fixed(decoder_class: Type[BinaryDecoder]) -> None:
-    mis = io.BytesIO(b"\x12\x34\x56\x78" * 4)
-    decoder = decoder_class(mis)
-    assert decoder.read_uuid_from_fixed() == 
UUID("{12345678-1234-5678-1234-567812345678}")
-
-
[email protected]("decoder_class", AVAILABLE_DECODERS)
-def test_read_time_millis(decoder_class: Type[BinaryDecoder]) -> None:
-    mis = io.BytesIO(b"\xBC\x7D")
-    decoder = decoder_class(mis)
-    assert decoder.read_time_millis().microsecond == 30000
-
-
[email protected]("decoder_class", AVAILABLE_DECODERS)
-def test_read_time_micros(decoder_class: Type[BinaryDecoder]) -> None:
-    mis = io.BytesIO(b"\xBC\x7D")
-    decoder = decoder_class(mis)
-    assert decoder.read_time_micros().microsecond == 8030
-
-
[email protected]("decoder_class", AVAILABLE_DECODERS)
-def test_read_timestamp_micros(decoder_class: Type[BinaryDecoder]) -> None:
-    mis = io.BytesIO(b"\xBC\x7D")
-    decoder = decoder_class(mis)
-    assert decoder.read_timestamp_micros() == datetime(1970, 1, 1, 0, 0, 0, 
8030)
-
-
[email protected]("decoder_class", AVAILABLE_DECODERS)
-def test_read_timestamptz_micros(decoder_class: Type[BinaryDecoder]) -> None:
-    mis = io.BytesIO(b"\xBC\x7D")
-    decoder = decoder_class(mis)
-    assert decoder.read_timestamptz_micros() == datetime(1970, 1, 1, 0, 0, 0, 
8030, tzinfo=timezone.utc)
-
-
 @pytest.mark.parametrize("decoder_class", AVAILABLE_DECODERS)
 def test_read_bytes(decoder_class: Type[BinaryDecoder]) -> None:
     mis = io.BytesIO(b"\x08\x01\x02\x03\x04")
diff --git a/python/tests/avro/test_encoder.py 
b/python/tests/avro/test_encoder.py
index 4646e65e6e..5866719434 100644
--- a/python/tests/avro/test_encoder.py
+++ b/python/tests/avro/test_encoder.py
@@ -16,10 +16,9 @@
 # under the License.
 from __future__ import annotations
 
-import datetime
 import io
 import struct
-from decimal import Decimal
+import uuid
 
 from pyiceberg.avro.encoder import BinaryEncoder
 
@@ -101,28 +100,6 @@ def test_write_double() -> None:
     assert output.getbuffer() == struct.pack("<d", _input)
 
 
-def test_write_decimal_bytes() -> None:
-    output = io.BytesIO()
-    encoder = BinaryEncoder(output)
-
-    _input = Decimal("3.14159265359")
-
-    encoder.write_decimal_bytes(_input)
-
-    assert output.getbuffer() == b"\x0a\x49\x25\x59\xf6\x4f"
-
-
-def test_write_decimal_fixed() -> None:
-    output = io.BytesIO()
-    encoder = BinaryEncoder(output)
-
-    _input = Decimal("3.14159265359")
-
-    encoder.write_decimal_fixed(_input, 8)
-
-    assert output.getbuffer() == b"\x00\x00\x00\x49\x25\x59\xf6\x4f"
-
-
 def test_write_bytes() -> None:
     output = io.BytesIO()
     encoder = BinaryEncoder(output)
@@ -134,17 +111,6 @@ def test_write_bytes() -> None:
     assert output.getbuffer() == b"".join([b"\x06", _input])
 
 
-def test_write_bytes_fixed() -> None:
-    output = io.BytesIO()
-    encoder = BinaryEncoder(output)
-
-    _input = b"\x12\x34\x56"
-
-    encoder.write_bytes_fixed(_input)
-
-    assert output.getbuffer() == _input
-
-
 def test_write_utf8() -> None:
     output = io.BytesIO()
     encoder = BinaryEncoder(output)
@@ -156,52 +122,13 @@ def test_write_utf8() -> None:
     assert output.getbuffer() == b"".join([b"\x7a", bin_input])
 
 
-def test_write_date_int() -> None:
-    output = io.BytesIO()
-    encoder = BinaryEncoder(output)
-
-    _input = datetime.date(1970, 1, 2)
-    encoder.write_date_int(_input)
-
-    assert output.getbuffer() == b"\x02"
-
-
-def test_write_time_millis_int() -> None:
-    output = io.BytesIO()
-    encoder = BinaryEncoder(output)
-
-    _input = datetime.time(1, 2, 3, 456000)
-    encoder.write_time_millis_int(_input)
-
-    assert output.getbuffer() == b"\x80\xc3\xc6\x03"
-
-
-def test_write_time_micros_long() -> None:
-    output = io.BytesIO()
-    encoder = BinaryEncoder(output)
-
-    _input = datetime.time(1, 2, 3, 456000)
-
-    encoder.write_time_micros_long(_input)
-
-    assert output.getbuffer() == b"\x80\xb8\xfb\xde\x1b"
-
-
-def test_write_timestamp_millis_long() -> None:
-    output = io.BytesIO()
-    encoder = BinaryEncoder(output)
-
-    _input = datetime.datetime(2023, 1, 1, 1, 2, 3)
-    encoder.write_timestamp_millis_long(_input)
-
-    assert output.getbuffer() == b"\xf0\xdb\xcc\xad\xad\x61"
-
-
-def test_write_timestamp_micros_long() -> None:
+def test_write_uuid() -> None:
     output = io.BytesIO()
     encoder = BinaryEncoder(output)
 
-    _input = datetime.datetime(2023, 1, 1, 1, 2, 3)
-    encoder.write_timestamp_micros_long(_input)
+    _input = uuid.UUID("12345678-1234-5678-1234-567812345678")
+    encoder.write_uuid(_input)
 
-    assert output.getbuffer() == b"\x80\xe3\xad\x9f\xac\xca\xf8\x05"
+    buf = output.getbuffer()
+    assert len(buf) == 16
+    assert buf.tobytes() == b"\x124Vx\x124Vx\x124Vx\x124Vx"
diff --git a/python/tests/avro/test_file.py b/python/tests/avro/test_file.py
index 101e676c90..2738770492 100644
--- a/python/tests/avro/test_file.py
+++ b/python/tests/avro/test_file.py
@@ -15,11 +15,14 @@
 #  specific language governing permissions and limitations
 #  under the License.
 import inspect
+from datetime import date, datetime, time
 from enum import Enum
 from tempfile import TemporaryDirectory
 from typing import Any
+from uuid import UUID
 
 import pytest
+from _decimal import Decimal
 from fastavro import reader, writer
 
 import pyiceberg.avro.file as avro
@@ -34,7 +37,24 @@ from pyiceberg.manifest import (
     ManifestEntry,
     ManifestEntryStatus,
 )
+from pyiceberg.schema import Schema
 from pyiceberg.typedef import Record
+from pyiceberg.types import (
+    BooleanType,
+    DateType,
+    DecimalType,
+    DoubleType,
+    FixedType,
+    FloatType,
+    IntegerType,
+    LongType,
+    NestedField,
+    StringType,
+    TimestampType,
+    TimestamptzType,
+    TimeType,
+    UUIDType,
+)
 from pyiceberg.utils.schema_conversion import AvroSchemaConversion
 
 
@@ -193,3 +213,77 @@ def 
test_write_manifest_entry_with_fastavro_read_with_iceberg() -> None:
             avro_entry = next(it)
 
             assert entry == avro_entry
+
+
[email protected]("is_required", [True, False])
+def test_all_primitive_types(is_required: bool) -> None:
+    all_primitives_schema = Schema(
+        NestedField(field_id=1, name="field_fixed", field_type=FixedType(16), 
required=is_required),
+        NestedField(field_id=2, name="field_decimal", 
field_type=DecimalType(6, 2), required=is_required),
+        NestedField(field_id=3, name="field_bool", field_type=BooleanType(), 
required=is_required),
+        NestedField(field_id=4, name="field_int", field_type=IntegerType(), 
required=True),
+        NestedField(field_id=5, name="field_long", field_type=LongType(), 
required=is_required),
+        NestedField(field_id=6, name="field_float", field_type=FloatType(), 
required=is_required),
+        NestedField(field_id=7, name="field_double", field_type=DoubleType(), 
required=is_required),
+        NestedField(field_id=8, name="field_date", field_type=DateType(), 
required=is_required),
+        NestedField(field_id=9, name="field_time", field_type=TimeType(), 
required=is_required),
+        NestedField(field_id=10, name="field_timestamp", 
field_type=TimestampType(), required=is_required),
+        NestedField(field_id=11, name="field_timestamptz", 
field_type=TimestamptzType(), required=is_required),
+        NestedField(field_id=12, name="field_string", field_type=StringType(), 
required=is_required),
+        NestedField(field_id=13, name="field_uuid", field_type=UUIDType(), 
required=is_required),
+        schema_id=1,
+    )
+
+    class AllPrimitivesRecord(Record):
+        field_fixed: bytes
+        field_decimal: Decimal
+        field_bool: bool
+        field_int: int
+        field_long: int
+        field_float: float
+        field_double: float
+        field_date: date
+        field_time: time
+        field_timestamp: datetime
+        field_timestamptz: datetime
+        field_string: str
+        field_uuid: UUID
+
+        def __init__(self, *data: Any, **named_data: Any) -> None:
+            super().__init__(*data, **{"struct": 
all_primitives_schema.as_struct(), **named_data})
+
+    record = AllPrimitivesRecord(
+        b"\x124Vx\x124Vx\x124Vx\x124Vx",
+        Decimal("123.45"),
+        True,
+        123,
+        429496729622,
+        123.22000122070312,
+        429496729622.314,
+        19052,
+        69922000000,
+        1677629965000000,
+        1677629965000000,
+        "this is a sentence",
+        UUID("12345678-1234-5678-1234-567812345678"),
+    )
+
+    with TemporaryDirectory() as tmpdir:
+        tmp_avro_file = tmpdir + "/all_primitives.avro"
+        # write to disk
+        with avro.AvroOutputFile[AllPrimitivesRecord](
+            PyArrowFileIO().new_output(tmp_avro_file), all_primitives_schema, 
"all_primitives_schema"
+        ) as out:
+            out.write_block([record])
+
+        # read from disk
+        with avro.AvroFile[AllPrimitivesRecord](
+            PyArrowFileIO().new_input(tmp_avro_file),
+            all_primitives_schema,
+            {-1: AllPrimitivesRecord},
+        ) as avro_reader:
+            it = iter(avro_reader)
+            avro_entry = next(it)
+
+    for idx, field in enumerate(all_primitives_schema.as_struct()):
+        assert record[idx] == avro_entry[idx], f"Invalid {field}"
diff --git a/python/tests/avro/test_writer.py b/python/tests/avro/test_writer.py
index c517a0cd1c..2cdcd4482a 100644
--- a/python/tests/avro/test_writer.py
+++ b/python/tests/avro/test_writer.py
@@ -21,6 +21,7 @@ import struct
 from typing import Dict, List
 
 import pytest
+from _decimal import Decimal
 
 from pyiceberg.avro.encoder import BinaryEncoder
 from pyiceberg.avro.resolver import construct_writer
@@ -218,3 +219,19 @@ def test_write_struct_with_list() -> None:
             b"\x00",
         ]
     )
+
+
+def test_write_decimal() -> None:
+    output = io.BytesIO()
+    encoder = BinaryEncoder(output)
+
+    schema = StructType(
+        NestedField(1, "decimal", DecimalType(10, 2), required=True),
+    )
+
+    class MyStruct(Record):
+        decimal: Decimal
+
+    construct_writer(schema).write(encoder, MyStruct(Decimal("1000.12")))
+
+    assert output.getvalue() == b"\x00\x00\x01\x86\xac"
diff --git a/python/tests/test_integration.py b/python/tests/test_integration.py
index 3609920819..37ba5b9048 100644
--- a/python/tests/test_integration.py
+++ b/python/tests/test_integration.py
@@ -299,3 +299,19 @@ def 
test_pyarrow_deletes_double(test_positional_mor_double_deletes: Table) -> No
     # Testing the slicing of indices
     arrow_table = test_positional_mor_double_deletes.scan(limit=8).to_arrow()
     assert arrow_table["number"].to_pylist() == [1, 2, 3, 4, 5, 7, 8, 10]
+
+
[email protected]
+def test_partitioned_tables(catalog: Catalog) -> None:
+    for table_name, predicate in [
+        ("test_partitioned_by_identity", "ts >= '2023-03-05T00:00:00+00:00'"),
+        ("test_partitioned_by_years", "dt >= '2023-03-05'"),
+        ("test_partitioned_by_months", "dt >= '2023-03-05'"),
+        ("test_partitioned_by_days", "ts >= '2023-03-05T00:00:00+00:00'"),
+        ("test_partitioned_by_hours", "ts >= '2023-03-05T00:00:00+00:00'"),
+        ("test_partitioned_by_truncate", "letter >= 'e'"),
+        ("test_partitioned_by_bucket", "number >= '5'"),
+    ]:
+        table = catalog.load_table(f"default.{table_name}")
+        arrow_table = table.scan(selected_fields=("number",), 
row_filter=predicate).to_arrow()
+        assert set(arrow_table["number"].to_pylist()) == {5, 6, 7, 8, 9, 10, 
11, 12}, f"Table {table_name}, predicate {predicate}"
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index 4a59631acd..76dddb6486 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -154,6 +154,53 @@ def test_schema_index_by_id_visitor(table_schema_nested: 
Schema) -> None:
 
 def test_schema_index_by_name_visitor(table_schema_nested: Schema) -> None:
     """Test index_by_name visitor function"""
+    table_schema_nested = schema.Schema(
+        NestedField(field_id=1, name="foo", field_type=StringType(), 
required=False),
+        NestedField(field_id=2, name="bar", field_type=IntegerType(), 
required=True),
+        NestedField(field_id=3, name="baz", field_type=BooleanType(), 
required=False),
+        NestedField(
+            field_id=4,
+            name="qux",
+            field_type=ListType(element_id=5, element_type=StringType(), 
element_required=True),
+            required=True,
+        ),
+        NestedField(
+            field_id=6,
+            name="quux",
+            field_type=MapType(
+                key_id=7,
+                key_type=StringType(),
+                value_id=8,
+                value_type=MapType(key_id=9, key_type=StringType(), 
value_id=10, value_type=IntegerType(), value_required=True),
+                value_required=True,
+            ),
+            required=True,
+        ),
+        NestedField(
+            field_id=11,
+            name="location",
+            field_type=ListType(
+                element_id=12,
+                element_type=StructType(
+                    NestedField(field_id=13, name="latitude", 
field_type=FloatType(), required=False),
+                    NestedField(field_id=14, name="longitude", 
field_type=FloatType(), required=False),
+                ),
+                element_required=True,
+            ),
+            required=True,
+        ),
+        NestedField(
+            field_id=15,
+            name="person",
+            field_type=StructType(
+                NestedField(field_id=16, name="name", field_type=StringType(), 
required=False),
+                NestedField(field_id=17, name="age", field_type=IntegerType(), 
required=True),
+            ),
+            required=False,
+        ),
+        schema_id=1,
+        identifier_field_ids=[1],
+    )
     index = schema.index_by_name(table_schema_nested)
     assert index == {
         "foo": 1,
diff --git a/python/tests/test_transforms.py b/python/tests/test_transforms.py
index 201dbbbd6d..5c4980d8ac 100644
--- a/python/tests/test_transforms.py
+++ b/python/tests/test_transforms.py
@@ -88,7 +88,7 @@ from pyiceberg.types import (
 from pyiceberg.utils.datetime import (
     date_str_to_days,
     date_to_days,
-    time_to_micros,
+    time_str_to_micros,
     timestamp_to_micros,
     timestamptz_to_micros,
 )
@@ -102,7 +102,7 @@ from pyiceberg.utils.datetime import (
         (34, LongType(), 2017239379),
         (date_to_days(date(2017, 11, 16)), DateType(), -653330422),
         (date_str_to_days("2017-11-16"), DateType(), -653330422),
-        (time_to_micros("22:31:08"), TimeType(), -662762989),
+        (time_str_to_micros("22:31:08"), TimeType(), -662762989),
         (
             timestamp_to_micros("2017-11-16T22:31:08"),
             TimestampType(),
diff --git a/python/tests/utils/test_decimal.py 
b/python/tests/utils/test_decimal.py
new file mode 100644
index 0000000000..683eab93fd
--- /dev/null
+++ b/python/tests/utils/test_decimal.py
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import pytest
+
+from pyiceberg.utils.decimal import decimal_required_bytes
+
+
+def test_decimal_required_bytes() -> None:
+    assert decimal_required_bytes(precision=1) == 1
+    assert decimal_required_bytes(precision=2) == 1
+    assert decimal_required_bytes(precision=3) == 2
+    assert decimal_required_bytes(precision=4) == 2
+    assert decimal_required_bytes(precision=5) == 3
+    assert decimal_required_bytes(precision=7) == 4
+    assert decimal_required_bytes(precision=8) == 4
+    assert decimal_required_bytes(precision=10) == 5
+    assert decimal_required_bytes(precision=32) == 14
+    assert decimal_required_bytes(precision=38) == 16
+
+    with pytest.raises(ValueError) as exc_info:
+        decimal_required_bytes(precision=40)
+    assert "(0, 40]" in str(exc_info.value)
+
+    with pytest.raises(ValueError) as exc_info:
+        decimal_required_bytes(precision=-1)
+    assert "(0, 40]" in str(exc_info.value)

[iceberg] branch master updated: Python: Add more tests for the Avro writer (#8067)

Reply via email to