jerryshao commented on code in PR #10191: URL: https://github.com/apache/gravitino/pull/10191#discussion_r2883449375
########## maintenance/optimizer/src/test/java/org/apache/gravitino/maintenance/optimizer/integration/test/UpdaterIT.java: ########## @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.maintenance.optimizer.integration.test; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.maintenance.optimizer.api.common.MetricPoint; +import org.apache.gravitino.maintenance.optimizer.api.common.PartitionPath; +import org.apache.gravitino.maintenance.optimizer.api.common.StatisticEntry; +import org.apache.gravitino.maintenance.optimizer.monitor.metrics.GravitinoMetricsProvider; +import org.apache.gravitino.maintenance.optimizer.recommender.statistics.GravitinoStatisticsProvider; +import org.apache.gravitino.maintenance.optimizer.recommender.util.PartitionUtils; +import org.apache.gravitino.maintenance.optimizer.updater.UpdateType; +import org.apache.gravitino.maintenance.optimizer.updater.Updater; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +public class UpdaterIT extends GravitinoOptimizerEnvIT { + + private Updater updater; + private GravitinoStatisticsProvider statisticsProvider; + private GravitinoMetricsProvider metricsProvider; + + @Override + protected Map<String, String> getSpecifyConfigs() { + return Map.of(); + } + + @BeforeAll + void init() { + this.updater = new Updater(optimizerEnv); + this.statisticsProvider = new GravitinoStatisticsProvider(); + statisticsProvider.initialize(optimizerEnv); + this.metricsProvider = new GravitinoMetricsProvider(); + metricsProvider.initialize(optimizerEnv); + } + + @Test + void testUpdateTableStatistics() { + String tableName = "update-stats"; + createTable(tableName); + NameIdentifier tableIdentifier = getTableIdentifier(tableName); + updater.update( + DummyTableStatisticsComputer.DUMMY_TABLE_STAT, + Arrays.asList(tableIdentifier), + UpdateType.STATISTICS); + + List<StatisticEntry<?>> tableStats = statisticsProvider.tableStatistics(tableIdentifier); + Assertions.assertEquals(1, tableStats.size()); + Assertions.assertEquals(DummyTableStatisticsComputer.TABLE_STAT_NAME, tableStats.get(0).name()); + Assertions.assertEquals(1L, ((Number) tableStats.get(0).value().value()).longValue()); + + Map<PartitionPath, List<StatisticEntry<?>>> partitionStats = + statisticsProvider.partitionStatistics(tableIdentifier); + Assertions.assertEquals(1, partitionStats.size()); + List<StatisticEntry<?>> partitionEntries = + partitionStats.values().stream().findFirst().orElseThrow(IllegalStateException::new); + Assertions.assertEquals(1, partitionEntries.size()); + Assertions.assertEquals( + DummyTableStatisticsComputer.TABLE_STAT_NAME, partitionEntries.get(0).name()); + Assertions.assertEquals(2L, ((Number) partitionEntries.get(0).value().value()).longValue()); + Assertions.assertEquals( + PartitionUtils.encodePartitionPath( + PartitionPath.of(DummyTableStatisticsComputer.getPartitionName())), + PartitionUtils.encodePartitionPath( + partitionStats.keySet().stream().findFirst().orElseThrow(IllegalStateException::new))); + } + + @Test + void testUpdateTableMetrics() { + String tableName = "update-metrics"; + createTable(tableName); + NameIdentifier tableIdentifier = getTableIdentifier(tableName); + updater.update( + DummyTableStatisticsComputer.DUMMY_TABLE_STAT, + Arrays.asList(tableIdentifier), + UpdateType.METRICS); + + List<MetricPoint> tableMetrics = + metricsProvider.tableMetrics(tableIdentifier, 0, Long.MAX_VALUE); + Assertions.assertEquals(1, tableMetrics.size()); + MetricPoint tableMetric = tableMetrics.get(0); + Assertions.assertEquals(DummyTableStatisticsComputer.TABLE_STAT_NAME, tableMetric.metricName()); + long tableDiff = System.currentTimeMillis() / 1000 - tableMetric.timestampSeconds(); + Assertions.assertTrue(tableDiff >= 0 && tableDiff <= 10000); + Assertions.assertEquals(1L, ((Number) tableMetric.value().value()).longValue()); + + PartitionPath expectedPartition = + PartitionPath.of(DummyTableStatisticsComputer.getPartitionName()); + List<MetricPoint> partitionMetrics = + metricsProvider.partitionMetrics(tableIdentifier, expectedPartition, 0, Long.MAX_VALUE); + Assertions.assertEquals(1, partitionMetrics.size()); + MetricPoint partitionMetric = partitionMetrics.get(0); + Assertions.assertEquals( + DummyTableStatisticsComputer.TABLE_STAT_NAME, partitionMetric.metricName()); + long partitionDiff = System.currentTimeMillis() / 1000 - partitionMetric.timestampSeconds(); + Assertions.assertTrue(partitionDiff >= 0 && partitionDiff <= 10000); + Assertions.assertEquals(2L, ((Number) partitionMetric.value().value()).longValue()); + Assertions.assertEquals( + PartitionUtils.encodePartitionPath(expectedPartition), + PartitionUtils.encodePartitionPath(partitionMetric.partitionPath().orElseThrow())); + } + + @Test + void testUpdateJobMetrics() { + String jobName = "update-job-metrics"; + NameIdentifier jobIdentifier = NameIdentifier.of(jobName); + updater.update( + DummyJobMetricsComputer.DUMMY_JOB_METRICS, + Arrays.asList(jobIdentifier), + UpdateType.METRICS); + + List<MetricPoint> jobMetrics = metricsProvider.jobMetrics(jobIdentifier, 0, Long.MAX_VALUE); + Assertions.assertEquals(1, jobMetrics.size()); + MetricPoint jobMetric = jobMetrics.get(0); + Assertions.assertEquals(DummyJobMetricsComputer.JOB_STAT_NAME, jobMetric.metricName()); + long diff = System.currentTimeMillis() / 1000 - jobMetric.timestampSeconds(); + Assertions.assertTrue(diff >= 0 && diff <= 10000); + Assertions.assertEquals(1L, ((Number) jobMetric.value().value()).longValue()); + } +} Review Comment: Can we have a IT that can try to simulate the completed workflow of table maintenance and verify each step? ########## maintenance/optimizer/src/test/java/org/apache/gravitino/maintenance/optimizer/integration/test/GravitinoOptimizerEnvIT.java: ########## @@ -0,0 +1,184 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.maintenance.optimizer.integration.test; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import java.util.HashMap; +import java.util.Map; +import org.apache.gravitino.Catalog; +import org.apache.gravitino.MetadataObject; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.Schema; +import org.apache.gravitino.catalog.lakehouse.iceberg.IcebergConstants; +import org.apache.gravitino.client.GravitinoMetalake; +import org.apache.gravitino.integration.test.util.BaseIT; +import org.apache.gravitino.maintenance.optimizer.common.OptimizerEnv; +import org.apache.gravitino.maintenance.optimizer.common.conf.OptimizerConfig; +import org.apache.gravitino.maintenance.optimizer.recommender.strategy.GravitinoStrategy; +import org.apache.gravitino.maintenance.optimizer.updater.metrics.storage.jdbc.GenericJdbcMetricsRepository; +import org.apache.gravitino.policy.PolicyContent; +import org.apache.gravitino.policy.PolicyContents; +import org.apache.gravitino.rel.Column; +import org.apache.gravitino.rel.Table; +import org.apache.gravitino.rel.expressions.transforms.Transform; +import org.apache.gravitino.rel.expressions.transforms.Transforms; +import org.apache.gravitino.rel.types.Types; +import org.junit.jupiter.api.BeforeAll; + +// Set up the Gravitino server, metalake, Iceberg catalogs +public class GravitinoOptimizerEnvIT extends BaseIT { + + protected static final String METALAKE_NAME = "test_metalake"; + protected static final String GRAVITINO_CATALOG_NAME = "iceberg"; + protected static final String TEST_SCHEMA = "test_schema"; + + protected Catalog catalogClient; + protected GravitinoMetalake metalakeClient; + protected OptimizerEnv optimizerEnv; + + @BeforeAll + @Override + public void startIntegrationTest() throws Exception { + super.startIntegrationTest(); + initMetalakeAndCatalog(); + this.optimizerEnv = initOptimizerEnv(); + } + + protected void createTable(String tableName) { + catalogClient + .asTableCatalog() + .createTable( + NameIdentifier.of(TEST_SCHEMA, tableName), + new Column[] {Column.of("col_1", Types.IntegerType.get())}, + "comment", + ImmutableMap.of()); + } + + protected NameIdentifier getTableIdentifier(String tableName) { + return NameIdentifier.of(GRAVITINO_CATALOG_NAME, TEST_SCHEMA, tableName); + } + + protected void createPartitionTable(String tableName) { + catalogClient + .asTableCatalog() + .createTable( + NameIdentifier.of(TEST_SCHEMA, tableName), + new Column[] { + Column.of("col1", Types.IntegerType.get(), "col1"), + Column.of("col2", Types.IntegerType.get(), "col2"), + Column.of("col3", Types.IntegerType.get(), "col3") + }, + "comment", + ImmutableMap.of(), + new Transform[] { + Transforms.identity("col1"), Transforms.bucket(8, new String[] {"col2"}) + }); + } + + protected void createPolicy(String policyName, Map<String, Object> rules, String policyType) { + PolicyContent content = + PolicyContents.custom( + rules, + ImmutableSet.of(MetadataObject.Type.TABLE), + Map.of( + GravitinoStrategy.STRATEGY_TYPE_KEY, + policyType, + GravitinoStrategy.JOB_TEMPLATE_NAME_KEY, + "template-name")); + metalakeClient.createPolicy(policyName, "custom", "comment", true, content); + } + + protected void associatePoliciesToTable(String policyName, String tableName) { + Table table = + catalogClient.asTableCatalog().loadTable(NameIdentifier.of(TEST_SCHEMA, tableName)); + table.supportsPolicies().associatePolicies(new String[] {policyName}, new String[] {}); + } + + protected void associatePoliciesToSchema(String policyName, String schemaName) { + Schema schema = catalogClient.asSchemas().loadSchema(schemaName); + schema.supportsPolicies().associatePolicies(new String[] {policyName}, new String[] {}); + } + + protected Map<String, String> getSpecifyConfigs() { + return Map.of(); + } + + protected OptimizerEnv initOptimizerEnv() { + Map<String, String> configs = new HashMap<>(); + configs.putAll(getGravitinoConfigs()); + configs.putAll(getJdbcMetricsConfigs()); + configs.putAll(getSpecifyConfigs()); + return new OptimizerEnv(new OptimizerConfig(configs)); + } + + private Map<String, String> getJdbcMetricsConfigs() { + String jdbcUrl = + String.format( + "jdbc:h2:file:/tmp/gravitino-optimizer-it-%d;DB_CLOSE_DELAY=-1;MODE=MYSQL;AUTO_SERVER=TRUE", + System.nanoTime()); + + return Map.of( + OptimizerConfig.OPTIMIZER_PREFIX + + GenericJdbcMetricsRepository.JDBC_METRICS_PREFIX + + GenericJdbcMetricsRepository.JDBC_URL, + jdbcUrl, + OptimizerConfig.OPTIMIZER_PREFIX + + GenericJdbcMetricsRepository.JDBC_METRICS_PREFIX + + GenericJdbcMetricsRepository.JDBC_DRIVER, + "org.h2.Driver"); + } + + private Map<String, String> getGravitinoConfigs() { + int gravitinoPort = getGravitinoServerPort(); + String uri = String.format("http://127.0.0.1:%d", gravitinoPort); + return ImmutableMap.of( + OptimizerConfig.GRAVITINO_URI, + uri, + OptimizerConfig.GRAVITINO_METALAKE, + METALAKE_NAME, + OptimizerConfig.GRAVITINO_DEFAULT_CATALOG, + GRAVITINO_CATALOG_NAME); + } + + private void initMetalakeAndCatalog() { + this.metalakeClient = client.createMetalake(METALAKE_NAME, "", new HashMap<>()); + this.catalogClient = createGravitinoIcebergCatalog(); + + if (!catalogClient.asSchemas().schemaExists(TEST_SCHEMA)) { + catalogClient.asSchemas().createSchema(TEST_SCHEMA, "comment", ImmutableMap.of()); + } + } + + private Catalog createGravitinoIcebergCatalog() { + return metalakeClient.createCatalog( + GRAVITINO_CATALOG_NAME, + Catalog.Type.RELATIONAL, + "lakehouse-iceberg", + "comment", + ImmutableMap.of( + IcebergConstants.URI, + "memory://gravitino-optimizer", + IcebergConstants.CATALOG_BACKEND, + "memory", + IcebergConstants.WAREHOUSE, + "file:///tmp/gravitino-optimizer/")); + } +} Review Comment: You called this class `xxxIT`, but I don't find any test in this class, is it on purpose? ########## maintenance/optimizer/src/test/java/org/apache/gravitino/maintenance/optimizer/integration/test/DummyJobMetricsComputer.java: ########## @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.maintenance.optimizer.integration.test; + +import java.util.List; +import org.apache.gravitino.NameIdentifier; +import org.apache.gravitino.maintenance.optimizer.api.common.MetricPoint; +import org.apache.gravitino.maintenance.optimizer.api.updater.SupportsCalculateJobMetrics; +import org.apache.gravitino.maintenance.optimizer.common.OptimizerEnv; +import org.apache.gravitino.stats.StatisticValues; + +public class DummyJobMetricsComputer implements SupportsCalculateJobMetrics { Review Comment: Change to use `xxxCalculator`. ########## maintenance/optimizer/src/test/java/org/apache/gravitino/maintenance/optimizer/integration/test/RecordingJobSubmitterForIT.java: ########## @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.gravitino.maintenance.optimizer.integration.test; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CopyOnWriteArrayList; +import org.apache.gravitino.maintenance.optimizer.api.recommender.JobExecutionContext; +import org.apache.gravitino.maintenance.optimizer.api.recommender.JobSubmitter; +import org.apache.gravitino.maintenance.optimizer.common.OptimizerEnv; +import org.apache.gravitino.maintenance.optimizer.common.conf.OptimizerConfig; + +public class RecordingJobSubmitterForIT implements JobSubmitter { + public static final String NAME = "recording-job-submitter-it"; + public static final String SESSION_ID_KEY = + OptimizerConfig.JOB_SUBMITTER_CONFIG_PREFIX + "recording-session-id"; + + private static final Map<String, List<JobExecutionContext>> SUBMITTED_CONTEXTS_BY_SESSION = + new ConcurrentHashMap<>(); + + private String sessionId; + + public static void reset(String sessionId) { + SUBMITTED_CONTEXTS_BY_SESSION.put(sessionId, new CopyOnWriteArrayList<>()); + } + + public static List<JobExecutionContext> submittedContexts(String sessionId) { + return List.copyOf( + SUBMITTED_CONTEXTS_BY_SESSION.getOrDefault(sessionId, new CopyOnWriteArrayList<>())); + } + + public static void clear(String sessionId) { + SUBMITTED_CONTEXTS_BY_SESSION.remove(sessionId); + } + + @Override + public String submitJob(String jobTemplateName, JobExecutionContext jobExecutionContext) { + List<JobExecutionContext> submittedContexts = + SUBMITTED_CONTEXTS_BY_SESSION.computeIfAbsent( + sessionId, key -> new CopyOnWriteArrayList<>()); + submittedContexts.add(jobExecutionContext); + return "it-job-" + submittedContexts.size(); + } + + @Override + public String name() { + return NAME; + } + + @Override + public void initialize(OptimizerEnv optimizerEnv) { + this.sessionId = optimizerEnv.config().getAllConfig().get(SESSION_ID_KEY); + if (sessionId == null || sessionId.isEmpty()) { + throw new IllegalArgumentException( + "Missing test session id config for RecordingJobSubmitterForIT: " + SESSION_ID_KEY); + } + } + + @Override + public void close() throws Exception {} +} Review Comment: Also here, what do you test here? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
