difin commented on code in PR #6108:
URL: https://github.com/apache/hive/pull/6108#discussion_r2402769246


##########
itests/qtest-iceberg/src/test/java/org/apache/hadoop/hive/cli/TestIcebergRESTCatalogGravitinoLlapLocalCliDriver.java:
##########
@@ -0,0 +1,301 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.cli;
+
+import com.github.dockerjava.api.command.CopyArchiveFromContainerCmd;
+import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
+import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.cli.control.CliAdapter;
+import org.apache.hadoop.hive.cli.control.CliConfigs;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.hive.CatalogUtils;
+import org.apache.iceberg.hive.client.HiveRESTCatalogClient;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestRule;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.containers.wait.strategy.Wait;
+import org.testcontainers.containers.wait.strategy.WaitAllStrategy;
+import org.testcontainers.utility.DockerImageName;
+import org.testcontainers.utility.MountableFile;
+import org.testcontainers.containers.GenericContainer;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Duration;
+import java.util.Comparator;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+
+@RunWith(Parameterized.class)
+public class TestIcebergRESTCatalogGravitinoLlapLocalCliDriver {
+
+  private static final CliAdapter CLI_ADAPTER =
+      new 
CliConfigs.TestIcebergRESTCatalogGravitinoLlapLocalCliDriver().getCliAdapter();
+  
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestIcebergRESTCatalogGravitinoLlapLocalCliDriver.class);
+  
+  private static final String CATALOG_NAME = "ice01";
+  private static final long GRAVITINO_STARTUP_TIMEOUT_MINUTES = 5L;
+  private static final String GRAVITINO_CONF_FILE_TEMPLATE = 
"gravitino-h2-test-template.conf";
+  private static final String GRAVITINO_ROOT_DIR = 
"/root/gravitino-iceberg-rest-server";
+  private static final String GRAVITINO_STARTUP_SCRIPT = GRAVITINO_ROOT_DIR + 
"/bin/start-iceberg-rest-server.sh";
+  private static final String GRAVITINO_H2_LIB = GRAVITINO_ROOT_DIR + 
"/libs/h2-driver.jar";
+  private static final String GRAVITINO_CONF_FILE = GRAVITINO_ROOT_DIR + 
"/conf/gravitino-iceberg-rest-server.conf";
+  private static final DockerImageName GRAVITINO_IMAGE =
+      DockerImageName.parse("apache/gravitino-iceberg-rest:1.0.0");
+
+  private final String name;
+  private final File qfile;
+
+  private GenericContainer<?> gravitinoContainer;
+  private Path warehouseDir;
+  private final ScheduledExecutorService fileSyncExecutor = 
Executors.newSingleThreadScheduledExecutor();
+
+  @Parameters(name = "{0}")
+  public static List<Object[]> getParameters() throws Exception {
+    return CLI_ADAPTER.getParameters();
+  }
+
+  @ClassRule
+  public static final TestRule CLI_CLASS_RULE = CLI_ADAPTER.buildClassRule();
+
+  @Rule
+  public final TestRule cliTestRule = CLI_ADAPTER.buildTestRule();
+
+  public TestIcebergRESTCatalogGravitinoLlapLocalCliDriver(String name, File 
qfile) {
+    this.name = name;
+    this.qfile = qfile;
+  }
+
+  @Before
+  public void setup() throws IOException {
+    createWarehouseDir();
+    prepareGravitinoConfig();
+    startGravitinoContainer();
+    startWarehouseDirSync();
+
+    String host = gravitinoContainer.getHost();
+    Integer port = gravitinoContainer.getMappedPort(9001);
+    String restCatalogPrefix = String.format("%s%s.", 
CatalogUtils.CATALOG_CONFIG_PREFIX, CATALOG_NAME);
+
+    // Suppress IntelliJ warning about using HTTP since this is a local test 
container connection
+    @SuppressWarnings("HttpUrlsUsage")
+    String restCatalogUri = String.format("http://%s:%d/iceberg";, host, port);
+
+    Configuration conf = SessionState.get().getConf();
+    MetastoreConf.setVar(conf, MetastoreConf.ConfVars.METASTORE_CLIENT_IMPL, 
HiveRESTCatalogClient.class.getName());
+    MetastoreConf.setVar(conf, MetastoreConf.ConfVars.CATALOG_DEFAULT, 
CATALOG_NAME);
+    conf.set(restCatalogPrefix + "uri", restCatalogUri);
+    conf.set(restCatalogPrefix + "type", 
CatalogUtil.ICEBERG_CATALOG_TYPE_REST);
+  }
+
+  @After
+  public void teardown() {
+    if (gravitinoContainer != null) {
+      gravitinoContainer.stop();
+    }
+
+    fileSyncExecutor.shutdownNow();
+
+    if (warehouseDir != null && Files.exists(warehouseDir)) {
+      try (var paths = Files.walk(warehouseDir)) {
+        paths.sorted(Comparator.reverseOrder())
+            .forEach(path -> {
+              try {
+                Files.deleteIfExists(path);
+              } catch (IOException e) {
+                LOG.debug("Failed to delete temp file {}", path, e);
+              }
+            });
+      } catch (IOException e) {
+        LOG.debug("Failed to delete temp folder", e);
+      }
+    }
+  }
+
+  /**
+   * Starts a Gravitino container with the Iceberg REST server configured for 
testing.
+   *
+   * <p>This method configures the container to:
+   * <ul>
+   *   <li>Expose container REST port 9001 and map it to a host port.</li>
+   *   <li>Modify the container entrypoint to create the warehouse directory 
before startup.</li>
+   *   <li>Copy a dynamically prepared Gravitino configuration file into the 
container.</li>
+   *   <li>Copy the H2 driver JAR into the server's lib directory.</li>
+   *   <li>Wait for the Gravitino Iceberg REST server to finish starting 
(based on logs and port checks).</li>
+   *   <li>Stream container logs into the test logger for easier 
debugging.</li>
+   * </ul>
+   *
+   * <p>Note: The {@code @SuppressWarnings("resource")} annotation is applied 
because
+   * IntelliJ and some compilers flag {@link 
org.testcontainers.containers.GenericContainer}
+   * as a resource that should be managed with try-with-resources. In this 
test setup,
+   * the container lifecycle is managed explicitly: it is started here and 
stopped in
+   * {@code @After} (via {@code gravitinoContainer.stop()}). Using 
try-with-resources
+   * would not work in this context, since the container must remain running 
across
+   * multiple test methods rather than being confined to a single block 
scope.</p>
+   */
+  @SuppressWarnings("resource")
+  private void startGravitinoContainer() {
+    gravitinoContainer = new GenericContainer<>(GRAVITINO_IMAGE)
+        .withExposedPorts(9001)
+        // Update entrypoint to create the warehouse directory before starting 
the server
+        .withCreateContainerCmdModifier(cmd -> cmd.withEntrypoint("bash", "-c",
+            String.format("mkdir -p %s && exec %s", warehouseDir.toString(), 
GRAVITINO_STARTUP_SCRIPT)))
+        // Mount gravitino configuration file
+        .withCopyFileToContainer(
+            MountableFile.forHostPath(Paths.get(warehouseDir.toString(), 
GRAVITINO_CONF_FILE_TEMPLATE)),
+            GRAVITINO_CONF_FILE
+        )
+        // Mount the H2 driver JAR into the server's lib directory
+        .withCopyFileToContainer(
+            MountableFile.forHostPath(
+                Paths.get("target", "test-dependencies", 
"h2-driver.jar").toAbsolutePath()
+            ),
+            GRAVITINO_H2_LIB
+        )
+        // Wait for the server to be fully started
+        .waitingFor(
+            new WaitAllStrategy()
+                .withStrategy(Wait.forLogMessage(".*GravitinoIcebergRESTServer 
is running.*\\n", 1)
+                    
.withStartupTimeout(Duration.ofMinutes(GRAVITINO_STARTUP_TIMEOUT_MINUTES)))
+                .withStrategy(Wait.forListeningPort()
+                    
.withStartupTimeout(Duration.ofMinutes(GRAVITINO_STARTUP_TIMEOUT_MINUTES)))
+        )
+        .withLogConsumer(new Slf4jLogConsumer(LoggerFactory
+            
.getLogger(TestIcebergRESTCatalogGravitinoLlapLocalCliDriver.class)));
+
+    gravitinoContainer.start();
+  }
+
+  /**
+   * Starts a background daemon that continuously synchronizes the Iceberg 
warehouse
+   * directory from the running Gravitino container to the host file system.
+   *
+   * <p>In CI environments, Testcontainers' {@code .withFileSystemBind()} 
cannot reliably
+   * bind the same host path to the same path inside the container, especially 
when
+   * using remote Docker hosts or Docker-in-Docker setups. This causes the 
container's
+   * writes (e.g., Iceberg metadata files like {@code .metadata.json}) to be 
invisible
+   * on the host.</p>
+   *
+   * <p>This method works around that limitation by repeatedly copying new 
files from
+   * the container's warehouse directory to the corresponding host directory. 
Existing
+   * files on the host are preserved, and only files that do not yet exist are 
copied.
+   * The sync runs every 1 second while the container is running.</p>
+   *
+   * <p>Each archive copy from the container is extracted using a {@link 
TarArchiveInputStream},
+   * and directories are created as needed. Files that already exist on the 
host are skipped
+   * to avoid overwriting container data.</p>
+   */
+  private void startWarehouseDirSync() {
+    fileSyncExecutor.scheduleAtFixedRate(() -> {

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to