lcspinter commented on a change in pull request #2563:
URL: https://github.com/apache/hive/pull/2563#discussion_r694684449
##########
File path:
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetricsOnTez.java
##########
@@ -105,12 +118,100 @@ public void testDeltaFilesMetric() throws Exception {
executeStatementOnDriver("select avg(b) from " + tableName, driver);
Thread.sleep(1000);
- Assert.assertTrue(
- equivalent(
- new HashMap<String, String>() {{
+ verifyMetricsMatch(new HashMap<String, String>() {{
put(tableName + Path.SEPARATOR + partitionToday, "1");
- }}, gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS)));
+ }}, gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS));
+ }
- DeltaFilesMetricReporter.close();
+ /**
+ * Queries shouldn't fail, but metrics should be 0, if tez.counters.max
limit is passed.
+ * @throws Exception
+ */
+ @Test
+ public void testDeltaFilesMetricTezMaxCounters() throws Exception {
+ HiveConf conf = new HiveConf();
+ conf.setInt(TezConfiguration.TEZ_COUNTERS_MAX, 50);
+ HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED,
true);
+ configureMetrics(conf);
+ setupWithConf(conf);
+
+ MetricsFactory.close();
+ MetricsFactory.init(conf);
+ DeltaFilesMetricReporter.init(conf);
+
+ String tableName = "test_metrics";
+ CompactorOnTezTest.TestDataProvider testDataProvider = new
CompactorOnTezTest.TestDataProvider();
+ testDataProvider.createFullAcidTable(tableName, true, false);
+ // Create 51 partitions
+ for (int i = 0; i < 51; i++) {
+ executeStatementOnDriver("insert into " + tableName + " values('1', " +
i * i + ", '" + i + "')", driver);
+ }
+
+ // Touch all partitions
+ executeStatementOnDriver("select avg(b) from " + tableName, driver);
+ Thread.sleep(1000);
+
+ Assert.assertEquals(0,
gaugeToMap(MetricsConstants.COMPACTION_NUM_DELTAS).size());
+ Assert.assertEquals(0,
gaugeToMap(MetricsConstants.COMPACTION_NUM_OBSOLETE_DELTAS).size());
+ Assert.assertEquals(0,
gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS).size());
+ }
+
+ /**
+ * Queries should succeed if additional acid metrics are disabled.
+ * @throws Exception
+ */
+ @Test
+ public void testDeltaFilesMetricWithMetricsDisabled() throws Exception {
+ HiveConf conf = new HiveConf();
+ HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED,
false);
+ MetastoreConf.setBoolVar(conf,
MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON, true);
+ configureMetrics(conf);
+ super.setupWithConf(conf);
+
+ MetricsFactory.close();
+ MetricsFactory.init(conf);
+
+ String tableName = "test_metrics";
+ CompactorOnTezTest.TestDataProvider testDataProvider = new
CompactorOnTezTest.TestDataProvider();
+ testDataProvider.createFullAcidTable(tableName, true, false);
+ testDataProvider.insertTestDataPartitioned(tableName);
+
+ executeStatementOnDriver("select avg(b) from " + tableName, driver);
+
+ try {
+ Assert.assertEquals(0,
gaugeToMap(MetricsConstants.COMPACTION_NUM_DELTAS).size());
Review comment:
Would it be possible to move this assertion to function level?
`@Test(expected = javax.management.InstanceNotFoundException.class)`
##########
File path:
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetricsOnTez.java
##########
@@ -17,42 +17,59 @@
*/
package org.apache.hadoop.hive.ql.txn.compactor;
-import com.codahale.metrics.Gauge;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.metrics.common.MetricsFactory;
-import org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.CompactionType;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.metastore.metrics.MetricsConstants;
import
org.apache.hadoop.hive.ql.txn.compactor.metrics.DeltaFilesMetricReporter;
+import org.apache.tez.dag.api.TezConfiguration;
+import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
import java.text.MessageFormat;
import java.util.HashMap;
-import java.util.Map;
import java.util.concurrent.TimeUnit;
-import static
org.apache.hadoop.hive.ql.txn.compactor.TestCompactionMetrics.equivalent;
-import static
org.apache.hadoop.hive.ql.txn.compactor.TestCompactionMetrics.gaugeToMap;
+import static
org.apache.hadoop.hive.ql.txn.compactor.TestDeltaFilesMetrics.gaugeToMap;
+import static
org.apache.hadoop.hive.ql.txn.compactor.TestDeltaFilesMetrics.verifyMetricsMatch;
import static
org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.executeStatementOnDriver;
public class TestCompactionMetricsOnTez extends CompactorOnTezTest {
- @Test
- public void testDeltaFilesMetric() throws Exception {
- MetricsFactory.close();
- HiveConf conf = driver.getConf();
+ /**
+ * Use {@link
CompactorOnTezTest#setupWithConf(org.apache.hadoop.hive.conf.HiveConf)} when
HiveConf is
+ * configured to your liking.
+ */
+ @Override
+ public void setup() {
+ }
- HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED,
true);
- MetricsFactory.init(conf);
+ @After
+ public void tearDown() {
+ DeltaFilesMetricReporter.close();
+ }
+ private void configureMetrics(HiveConf conf) {
HiveConf.setIntVar(conf,
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_OBSOLETE_DELTA_NUM_THRESHOLD, 0);
HiveConf.setIntVar(conf,
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_NUM_THRESHOLD, 0);
HiveConf.setTimeVar(conf,
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_REPORTING_INTERVAL, 1,
TimeUnit.SECONDS);
HiveConf.setTimeVar(conf,
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_CHECK_THRESHOLD, 0,
TimeUnit.SECONDS);
HiveConf.setFloatVar(conf,
HiveConf.ConfVars.HIVE_TXN_ACID_METRICS_DELTA_PCT_THRESHOLD, 0.7f);
+ }
+
+ @Test
+ public void testDeltaFilesMetric() throws Exception {
+ HiveConf conf = new HiveConf();
+ HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED,
true);
+ configureMetrics(conf);
+ setupWithConf(conf);
Review comment:
In my opinion, it is hard to follow what parameters are set in HiveConf?
Would it be possible to move it to setup()? Also, there is a bit of repetition
in this init phase? Can we move it to a separate method?
##########
File path:
ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java
##########
@@ -272,30 +272,34 @@ private void prepare(InputInitializerContext
initializerContext) throws IOExcept
String groupName = null;
String vertexName = null;
if (inputInitializerContext != null) {
- tezCounters = new TezCounters();
- groupName = HiveInputCounters.class.getName();
- vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, "");
- counterName =
Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(),
vertexName);
- tezCounters.findCounter(groupName,
counterName).increment(splits.length);
- final List<Path> paths = Utilities.getInputPathsTez(jobConf, work);
- counterName =
Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(),
vertexName);
- tezCounters.findCounter(groupName,
counterName).increment(paths.size());
- final Set<String> files = new HashSet<>();
- for (InputSplit inputSplit : splits) {
- if (inputSplit instanceof FileSplit) {
- final FileSplit fileSplit = (FileSplit) inputSplit;
- final Path path = fileSplit.getPath();
- // The assumption here is the path is a file. Only case this is
different is ACID deltas.
- // The isFile check is avoided here for performance reasons.
- final String fileStr = path.toString();
- if (!files.contains(fileStr)) {
- files.add(fileStr);
+ try {
+ tezCounters = new TezCounters();
+ groupName = HiveInputCounters.class.getName();
+ vertexName = jobConf.get(Operator.CONTEXT_NAME_KEY, "");
+ counterName =
Utilities.getVertexCounterName(HiveInputCounters.RAW_INPUT_SPLITS.name(),
vertexName);
+ tezCounters.findCounter(groupName,
counterName).increment(splits.length);
+ final List<Path> paths = Utilities.getInputPathsTez(jobConf, work);
+ counterName =
Utilities.getVertexCounterName(HiveInputCounters.INPUT_DIRECTORIES.name(),
vertexName);
+ tezCounters.findCounter(groupName,
counterName).increment(paths.size());
+ final Set<String> files = new HashSet<>();
+ for (InputSplit inputSplit : splits) {
+ if (inputSplit instanceof FileSplit) {
+ final FileSplit fileSplit = (FileSplit) inputSplit;
+ final Path path = fileSplit.getPath();
+ // The assumption here is the path is a file. Only case this
is different is ACID deltas.
+ // The isFile check is avoided here for performance reasons.
+ final String fileStr = path.toString();
+ if (!files.contains(fileStr)) {
+ files.add(fileStr);
+ }
}
}
+ counterName =
Utilities.getVertexCounterName(HiveInputCounters.INPUT_FILES.name(),
vertexName);
+ tezCounters.findCounter(groupName,
counterName).increment(files.size());
+ DeltaFilesMetricReporter.createCountersForAcidMetrics(tezCounters,
jobConf);
+ } catch (Exception e) {
Review comment:
Do we want to catch every Throwable or can we narrow it down?
##########
File path:
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactionMetricsOnTez.java
##########
@@ -105,12 +118,100 @@ public void testDeltaFilesMetric() throws Exception {
executeStatementOnDriver("select avg(b) from " + tableName, driver);
Thread.sleep(1000);
- Assert.assertTrue(
- equivalent(
- new HashMap<String, String>() {{
+ verifyMetricsMatch(new HashMap<String, String>() {{
put(tableName + Path.SEPARATOR + partitionToday, "1");
- }}, gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS)));
+ }}, gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS));
+ }
- DeltaFilesMetricReporter.close();
+ /**
+ * Queries shouldn't fail, but metrics should be 0, if tez.counters.max
limit is passed.
+ * @throws Exception
+ */
+ @Test
+ public void testDeltaFilesMetricTezMaxCounters() throws Exception {
+ HiveConf conf = new HiveConf();
+ conf.setInt(TezConfiguration.TEZ_COUNTERS_MAX, 50);
+ HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED,
true);
+ configureMetrics(conf);
+ setupWithConf(conf);
+
+ MetricsFactory.close();
+ MetricsFactory.init(conf);
+ DeltaFilesMetricReporter.init(conf);
+
+ String tableName = "test_metrics";
+ CompactorOnTezTest.TestDataProvider testDataProvider = new
CompactorOnTezTest.TestDataProvider();
+ testDataProvider.createFullAcidTable(tableName, true, false);
+ // Create 51 partitions
+ for (int i = 0; i < 51; i++) {
+ executeStatementOnDriver("insert into " + tableName + " values('1', " +
i * i + ", '" + i + "')", driver);
+ }
+
+ // Touch all partitions
+ executeStatementOnDriver("select avg(b) from " + tableName, driver);
+ Thread.sleep(1000);
+
+ Assert.assertEquals(0,
gaugeToMap(MetricsConstants.COMPACTION_NUM_DELTAS).size());
+ Assert.assertEquals(0,
gaugeToMap(MetricsConstants.COMPACTION_NUM_OBSOLETE_DELTAS).size());
+ Assert.assertEquals(0,
gaugeToMap(MetricsConstants.COMPACTION_NUM_SMALL_DELTAS).size());
+ }
+
+ /**
+ * Queries should succeed if additional acid metrics are disabled.
+ * @throws Exception
+ */
+ @Test
+ public void testDeltaFilesMetricWithMetricsDisabled() throws Exception {
+ HiveConf conf = new HiveConf();
+ HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_METRICS_ENABLED,
false);
+ MetastoreConf.setBoolVar(conf,
MetastoreConf.ConfVars.METASTORE_ACIDMETRICS_EXT_ON, true);
+ configureMetrics(conf);
+ super.setupWithConf(conf);
+
+ MetricsFactory.close();
+ MetricsFactory.init(conf);
+
+ String tableName = "test_metrics";
+ CompactorOnTezTest.TestDataProvider testDataProvider = new
CompactorOnTezTest.TestDataProvider();
+ testDataProvider.createFullAcidTable(tableName, true, false);
+ testDataProvider.insertTestDataPartitioned(tableName);
+
+ executeStatementOnDriver("select avg(b) from " + tableName, driver);
+
+ try {
+ Assert.assertEquals(0,
gaugeToMap(MetricsConstants.COMPACTION_NUM_DELTAS).size());
+ Assert.fail();
+ } catch (javax.management.InstanceNotFoundException e) {
+ }
+ }
+
+
+ /**
+ * Queries should succeed if extended metrics are disabled.
+ * @throws Exception
+ */
+ @Test
+ public void testDeltaFilesMetricWithExtMetricsDisabled() throws Exception {
Review comment:
Again, exception assertion on function level.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]