rdblue commented on code in PR #5447:
URL: https://github.com/apache/iceberg/pull/5447#discussion_r942741371


##########
python/pyiceberg/catalog/hive.py:
##########
@@ -233,24 +299,47 @@ def create_table(
             ValueError: If the identifier is invalid
         """
         database_name, table_name = 
self.identifier_to_database_and_table(identifier)
-        current_time_millis = int(time.time())
+        seconds_since_epoch = int(time.time() * 1000)
+
+        location = self._resolve_table_location(location, database_name, 
table_name)
+
+        metadata_location = f"{location}/metadata/{uuid.uuid4()}.metadata.json"
+        metadata = TableMetadataV2(
+            location=location,
+            schemas=[schema],
+            current_schema_id=schema.schema_id,
+            partition_specs=[partition_spec],
+            default_spec_id=partition_spec.spec_id,
+            sort_orders=[sort_order],
+            default_sort_order_id=sort_order.order_id,
+            properties=properties or {},
+            last_updated_ms=seconds_since_epoch,
+            last_column_id=schema.highest_field_id,
+            last_partition_id=max(field.field_id for field in 
partition_spec.fields)
+            if partition_spec.fields
+            else DEFAULT_LAST_PARTITION_ID,
+        )
+        io = load_file_io({**self.properties})
+        self._write_metadata(metadata, io, metadata_location)
+
         tbl = HiveTable(
             dbName=database_name,
             tableName=table_name,
             owner=properties[OWNER] if properties and OWNER in properties else 
getpass.getuser(),
-            createTime=current_time_millis // 1000,
-            lastAccessTime=current_time_millis // 1000,
+            createTime=seconds_since_epoch // 1000,
+            lastAccessTime=seconds_since_epoch // 1000,
             sd=_construct_hive_storage_descriptor(schema, location),
             tableType="EXTERNAL_TABLE",
-            parameters=_construct_parameters("s3://"),
+            parameters=_construct_parameters(metadata_location),
         )
         try:
             with self._client as open_client:
                 open_client.create_table(tbl)
                 hive_table = open_client.get_table(dbname=database_name, 
tbl_name=table_name)
         except AlreadyExistsException as e:
             raise TableAlreadyExistsError(f"Table {database_name}.{table_name} 
already exists") from e
-        return self._convert_hive_into_iceberg(hive_table)
+
+        return self._convert_hive_into_iceberg(hive_table, io, 
metadata_location)

Review Comment:
   Why pass `metadata_location` here? Since it is set on the table, I think it 
would make sense to always pull it from table parameters. That way we never 
create a situation where we've forgotten to set the table parameter, but 
successfully returned a table instance. Or one where we've forgotten to update 
it and returned an updated table.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to