jerryshao commented on code in PR #3933: URL: https://github.com/apache/gravitino/pull/3933#discussion_r1698238422
########## spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/sql/CatalogType.java: ########## @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.sql; + +public enum CatalogType { + HIVE, + ICEBERG, + UNKNOWN; + + public static CatalogType fromString(String str) { + if (str == null) { + return UNKNOWN; + } + switch (str.toLowerCase()) { Review Comment: Add `Locale` for `toLowerCase`. ########## spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/sql/SparkQueryRunner.java: ########## @@ -0,0 +1,351 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.sql; + +import com.google.common.collect.Maps; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.client.GravitinoAdminClient; +import org.apache.gravitino.client.GravitinoMetalake; +import org.apache.gravitino.integration.test.container.ContainerSuite; +import org.apache.gravitino.integration.test.container.HiveContainer; +import org.apache.gravitino.integration.test.util.AbstractIT; +import org.apache.gravitino.spark.connector.GravitinoSparkConfig; +import org.apache.gravitino.spark.connector.iceberg.IcebergPropertiesConstants; +import org.apache.gravitino.spark.connector.plugin.GravitinoSparkPlugin; +import org.apache.spark.sql.SparkSession; +import org.junit.jupiter.api.Assertions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Run and check the correctness of the SparkSQLs */ +public class SparkQueryRunner { + + private static final Logger LOG = LoggerFactory.getLogger(SparkSQLRegressionTest.class); + private static final String HIVE_CATALOG_NAME = "hive"; + private static final String ICEBERG_CATALOG_NAME = "iceberg"; + + private SparkSession sparkSession; + private String gravitinoUri; + private String metalakeName; + private String warehouse; + private boolean regenerateGoldenFiles; + // catalogType -> catalogName + private Map<CatalogType, String> catalogs = new HashMap<>(); + private boolean isSetupGravitinoEnv; + private String dataDir; + + private static final ContainerSuite containerSuite = ContainerSuite.getInstance(); + + public SparkQueryRunner(SparkTestConfig sparkTestConfig) { + this.regenerateGoldenFiles = sparkTestConfig.generateGoldenFiles(); + this.isSetupGravitinoEnv = sparkTestConfig.isSetupGravitinoEnv(); + String baseDir = sparkTestConfig.getBaseDir(); + // translate to file:///xx/xx + this.dataDir = Paths.get(baseDir, "data").toUri().toString(); + this.metalakeName = sparkTestConfig.getMetalakeName(); + if (isSetupGravitinoEnv) { + try { + setupGravitinoEnv(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } else { + this.gravitinoUri = sparkTestConfig.getGravitinoUri(); + this.warehouse = sparkTestConfig.getWarehouseLocation(); + } + initSparkEnv(); + + catalogs.put(CatalogType.HIVE, HIVE_CATALOG_NAME); + catalogs.put(CatalogType.ICEBERG, ICEBERG_CATALOG_NAME); + catalogs.put(CatalogType.UNKNOWN, HIVE_CATALOG_NAME); + } + + public void runQuery(TestCaseGroup sqlTestCaseGroup) throws IOException { + useCatalog(sqlTestCaseGroup.getCatalogType()); + try { + if (sqlTestCaseGroup.prepareFile != null) { + runQuery(sqlTestCaseGroup.prepareFile); + } + sqlTestCaseGroup.testCases.forEach( + testCase -> { + try { + runTestCase(testCase); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } finally { + if (sqlTestCaseGroup.cleanupFile != null) { + runQuery(sqlTestCaseGroup.cleanupFile); + } + } + } + + public void close() throws Exception { + if (sparkSession != null) { + sparkSession.close(); + sparkSession = null; + } + if (isSetupGravitinoEnv) { Review Comment: This variable name is not weird, better change to a new name, like `isGravitinoEnvSetUp`. ########## spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/sql/QueryOutput.java: ########## @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.sql; + +import lombok.Getter; + +/** The SQL execution output, include schemas and output */ +@Getter +public class QueryOutput { + private String sql; + private String schema; + private String output; Review Comment: Make them final ########## spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/sql/TestCase.java: ########## @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.sql; + +import java.nio.file.Path; +import java.nio.file.Paths; +import lombok.Getter; +import lombok.ToString; + +/** A test SQL files which contains multi SQL queries. */ +@Getter +@ToString +public class TestCase { + private Path testFile; Review Comment: You'd better make some variables `final` here and below - if they will not modified during running. ########## spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/sql/CatalogType.java: ########## @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.sql; + +public enum CatalogType { + HIVE, + ICEBERG, + UNKNOWN; + + public static CatalogType fromString(String str) { + if (str == null) { + return UNKNOWN; + } + switch (str.toLowerCase()) { + case "hive": + return HIVE; + case "lakehouse-iceberg": + return ICEBERG; + default: + return UNKNOWN; + } + } + + public static CatalogType merge(CatalogType parentCatalogType, CatalogType childCatalogType) { Review Comment: You can simplify to `return parentCatalogType.equals(xxx) ? xxx : xxx`. ########## spark-connector/v3.3/spark/build.gradle.kts: ########## @@ -144,10 +144,14 @@ tasks.test { val skipITs = project.hasProperty("skipITs") val skipSparkITs = project.hasProperty("skipSparkITs") + val skipSparkSQLITs = project.hasProperty("skipSparkSQLITs") Review Comment: I feel like there are too many configurations to control whether to run something or not, do we have better solutions? I would incline to use gradle to control whether to run this or not, by default we would not run these SQL tests. ########## spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/sql/SparkTestConfig.java: ########## @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.sql; + +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.gravitino.Config; +import org.apache.gravitino.config.ConfigBuilder; +import org.apache.gravitino.config.ConfigEntry; +import org.junit.platform.commons.util.StringUtils; + +public class SparkTestConfig extends Config { + private static final String DEFAULT_BASE_DIR = + Paths.get( + System.getenv("GRAVITINO_ROOT_DIR"), + "spark-connector", + "spark-common", + "src", + "test", + "resources") + .toString(); + + private static final ConfigEntry<String> TEST_BASE_DIR = + new ConfigBuilder("gravitino.test.sql.dir") + .doc("The Spark SQL test base dir") + .version("0.6.0") + .stringConf() + .createWithDefault(DEFAULT_BASE_DIR); + + private static final ConfigEntry<String> TEST_SQLS = + new ConfigBuilder("gravitino.test.sqls") + .doc( + "Specify the test SQLs, using directory to specify group of SQLs like `test-sqls/hive`, using file path to specify one SQL like `test-sqls/hive/basic.sql`, use `,` to split multi part") + .version("0.6.0") + .stringConf() + .create(); + + private static final ConfigEntry<Boolean> GENERATE_GOLDEN_FILES = + new ConfigBuilder("gravitino.test.generate.golden.files") + .doc( + "Whether generate golden files which are used to check the correctness of the SQL result") + .version("0.6.0") + .booleanConf() + .createWithDefault(Boolean.FALSE); + + private static final ConfigEntry<String> GRAVITINO_METALAKE_NAME = + new ConfigBuilder("gravitino.test.gravitino.metalake") + .doc("The metalake name to run the test") + .version("0.6.0") + .stringConf() + .createWithDefault("test"); + + private static final ConfigEntry<Boolean> SETUP_GRAVITINO_ENV = + new ConfigBuilder("gravitino.test.setup.env") + .doc("Whether to setup Gravitino and hive environment") + .version("0.6.0") + .booleanConf() + .createWithDefault(Boolean.FALSE); + + private static final ConfigEntry<String> GRAVITINO_URI = + new ConfigBuilder("gravitino.test.gravitino.uri") + .doc("Gravitino uri address, only available when `gravitino.test.setup.env` is false") + .version("0.6.0") + .stringConf() + .createWithDefault("http://127.0.0.1:8090"); + + private static final ConfigEntry<String> WAREHOUSE_DIR = + new ConfigBuilder("gravitino.test.gravitino.warehouse") + .doc("The warehouse location, only available when `gravitino.test.setup.env` is false") + .version("0.6.0") + .stringConf() + .createWithDefault("hdfs://127.0.0.1:9000/user/hive/warehouse-spark-test"); + + public String getBaseDir() { + return get(TEST_BASE_DIR); + } + + public boolean generateGoldenFiles() { + return get(GENERATE_GOLDEN_FILES); + } + + public boolean isSetupGravitinoEnv() { + return get(SETUP_GRAVITINO_ENV); + } + + public List<String> getTestSQLs() { + String testSQLs = get(TEST_SQLS); + if (StringUtils.isNotBlank(testSQLs)) { + return Arrays.asList(testSQLs.split("\\s*,\\s*")); + } + return new ArrayList<>(); + } + + public String getGravitinoUri() { + return get(GRAVITINO_URI); + } + + public String getMetalakeName() { + return get(GRAVITINO_METALAKE_NAME); + } + + public String getWarehouseLocation() { + return get(WAREHOUSE_DIR); + } +} Review Comment: Do we really need configurations for tests? I feel think we don't have to formalize them just for test, WDYT? ########## spark-connector/spark-common/src/test/java/org/apache/gravitino/spark/connector/integration/test/sql/SparkTestConfig.java: ########## @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.spark.connector.integration.test.sql; + +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.gravitino.Config; +import org.apache.gravitino.config.ConfigBuilder; +import org.apache.gravitino.config.ConfigEntry; +import org.junit.platform.commons.util.StringUtils; + +public class SparkTestConfig extends Config { + private static final String DEFAULT_BASE_DIR = + Paths.get( + System.getenv("GRAVITINO_ROOT_DIR"), + "spark-connector", + "spark-common", + "src", + "test", + "resources") + .toString(); + + private static final ConfigEntry<String> TEST_BASE_DIR = + new ConfigBuilder("gravitino.test.sql.dir") + .doc("The Spark SQL test base dir") + .version("0.6.0") + .stringConf() + .createWithDefault(DEFAULT_BASE_DIR); + + private static final ConfigEntry<String> TEST_SQLS = + new ConfigBuilder("gravitino.test.sqls") + .doc( + "Specify the test SQLs, using directory to specify group of SQLs like `test-sqls/hive`, using file path to specify one SQL like `test-sqls/hive/basic.sql`, use `,` to split multi part") Review Comment: This is too long, we should limit the length into 100 chars. ########## spark-connector/v3.3/spark/build.gradle.kts: ########## @@ -144,10 +144,14 @@ tasks.test { val skipITs = project.hasProperty("skipITs") val skipSparkITs = project.hasProperty("skipSparkITs") + val skipSparkSQLITs = project.hasProperty("skipSparkSQLITs") Review Comment: Do we need a configuration to enable or disable this test? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
