xloya commented on code in PR #3876:
URL: https://github.com/apache/gravitino/pull/3876#discussion_r1663969322
##########
clients/client-python/tests/integration/hdfs_container.py:
##########
@@ -0,0 +1,143 @@
+"""
+Copyright 2024 Datastrato Pvt Ltd.
+This software is licensed under the Apache License version 2.
+"""
+
+import asyncio
+import logging
+import os
+import time
+
+import docker
+from docker import types as tp
+from docker.errors import NotFound
+
+from gravitino.exceptions.gravitino_runtime_exception import
GravitinoRuntimeException
+
+logger = logging.getLogger(__name__)
+
+
+async def check_hdfs_status(hive_container):
+ retry_limit = 15
+ for _ in range(retry_limit):
+ try:
+ command_and_args = ["bash", "/tmp/check-status.sh"]
+ exec_result = hive_container.exec_run(command_and_args)
+ if exec_result.exit_code != 0:
+ message = (
+ f"Command {command_and_args} exited with
{exec_result.exit_code}"
+ )
+ logger.warning(message)
+ logger.warning("output: %s", exec_result.output)
+ output_status_command = ["hdfs", "dfsadmin", "-report"]
+ exec_result = hive_container.exec_run(output_status_command)
+ logger.info("HDFS report, output: %s", exec_result.output)
+ else:
+ logger.info("HDFS startup successfully!")
+ return True
+ except Exception as e:
+ logger.error(
+ "Exception occurred while checking HDFS container status: %s",
e
+ )
+ time.sleep(10)
+ return False
+
+
+async def check_hdfs_container_status(hive_container):
+ timeout_sec = 150
+ try:
+ result = await asyncio.wait_for(
+ check_hdfs_status(hive_container), timeout=timeout_sec
+ )
+ assert result is True, "HDFS container startup failed!"
+ except asyncio.TimeoutError as e:
+ raise GravitinoRuntimeException(
+ "Timeout occurred while waiting for checking HDFS container
status."
+ ) from e
+
+
+class HDFSContainer:
+ _docker_client = None
+ _container = None
+ _network = None
+ _ip = ""
+ _network_name = "python-net"
+ _container_name = "python-hdfs"
+
+ def __init__(self):
+ self._docker_client = docker.from_env()
+ self._create_networks()
+ try:
+ container =
self._docker_client.containers.get(self._container_name)
+ if container is not None:
+ if container.status == "running":
+ container.kill()
+ container.remove()
Review Comment:
The reason is that when I was testing locally, the HDFS Container startup
was sometimes successful, but the internal HDFS status was not Ready. If it was
reused directly, the integration tests might not succeed. So I added this logic
to ensure that HDFS could be restarted successfully. Of course I am open to
this. If there is a problem with the local test, it is also okay to manually
kill it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]