veghlaci05 commented on code in PR #4091:
URL: https://github.com/apache/hive/pull/4091#discussion_r1135353739


##########
ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/handler/TxnAbortedCleaner.java:
##########
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.txn.compactor.handler;
+
+import org.apache.hadoop.hive.common.ValidReaderWriteIdList;
+import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.metrics.MetricsConstants;
+import org.apache.hadoop.hive.metastore.metrics.PerfLogger;
+import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
+import org.apache.hadoop.hive.metastore.txn.TxnStore;
+import org.apache.hadoop.hive.metastore.txn.TxnUtils;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.txn.compactor.CompactorUtil;
+import org.apache.hadoop.hive.ql.txn.compactor.CompactorUtil.ThrowingRunnable;
+import org.apache.hadoop.hive.ql.txn.compactor.FSRemover;
+import org.apache.hadoop.hive.ql.txn.compactor.MetadataCache;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import static java.util.Objects.isNull;
+
+class TxnAbortedCleaner extends AcidTxnCleaner {

Review Comment:
   I think AbortedTxnCleaner is more meaningful



##########
ql/src/test/org/apache/hadoop/hive/ql/txn/compactor/TestInitiatorWithAbortCleanupUsingCleaner.java:
##########
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.txn.compactor;
+
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+
+public class TestInitiatorWithAbortCleanupUsingCleaner extends TestInitiator {
+  @Override
+  public void setup() throws Exception {
+    super.setup();
+    MetastoreConf.setBoolVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_CLEAN_ABORTS_USING_CLEANER, true);

Review Comment:
   This class is not a subclass of CompactorTest. It should be done the other 
way around: you should set it false here and rename the class to 
TestInitiatorWithAbortCleanupUsingHandler or sth similar. In the base class it 
will take the default value which is true. I suggest doing the same in the 
other class hierarchy as well (CompactorTest and descendants). For me, it is 
more straightforward that a descendant class where the only change is a config 
value should test the non-default value, rather than restore the default value 
which was set to sth else in the base class.



##########
ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/handler/TxnAbortedCleaner.java:
##########
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.txn.compactor.handler;
+
+import org.apache.hadoop.hive.common.ValidReaderWriteIdList;
+import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.metrics.MetricsConstants;
+import org.apache.hadoop.hive.metastore.metrics.PerfLogger;
+import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
+import org.apache.hadoop.hive.metastore.txn.TxnStore;
+import org.apache.hadoop.hive.metastore.txn.TxnUtils;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.hadoop.hive.ql.txn.compactor.CompactorUtil;
+import org.apache.hadoop.hive.ql.txn.compactor.CompactorUtil.ThrowingRunnable;
+import org.apache.hadoop.hive.ql.txn.compactor.FSRemover;
+import org.apache.hadoop.hive.ql.txn.compactor.MetadataCache;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+
+import static java.util.Objects.isNull;
+
+class TxnAbortedCleaner extends AcidTxnCleaner {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(TxnAbortedCleaner.class.getName());
+
+  public TxnAbortedCleaner(HiveConf conf, TxnStore txnHandler,
+                           MetadataCache metadataCache, boolean metricsEnabled,
+                           FSRemover fsRemover) {
+    super(conf, txnHandler, metadataCache, metricsEnabled, fsRemover);
+  }
+
+  /**
+   The following cleanup is based on the following idea - <br>
+   1. Aborted cleanup is independent of compaction. This is because 
directories which are written by
+      aborted txns are not visible by any open txns. It is only visible while 
determining the AcidState (which
+      only sees the aborted deltas and does not read the file).<br><br>
+
+   The following algorithm is used to clean the set of aborted directories - 
<br>
+      a. Find the list of entries which are suitable for cleanup (This is done 
in {@link TxnStore#findReadyToCleanForAborts(long, int)}).<br>
+      b. If the table/partition does not exist, then remove the associated 
aborted entry in TXN_COMPONENTS table. <br>
+      c. Get the AcidState of the table by using the min open txnID, database 
name, tableName, partition name, highest write ID <br>
+      d. Fetch the aborted directories and delete the directories. <br>
+      e. Fetch the aborted write IDs from the AcidState and use it to delete 
the associated metadata in the TXN_COMPONENTS table.
+   **/
+  @Override
+  public List<Runnable> getTasks() throws MetaException {
+    int abortedThreshold = HiveConf.getIntVar(conf,
+              HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD);
+    long abortedTimeThreshold = HiveConf
+              .getTimeVar(conf, 
HiveConf.ConfVars.HIVE_COMPACTOR_ABORTEDTXN_TIME_THRESHOLD,
+                      TimeUnit.MILLISECONDS);
+    List<CompactionInfo> readyToCleanAborts = 
txnHandler.findReadyToCleanForAborts(abortedTimeThreshold, abortedThreshold);
+
+    if (!readyToCleanAborts.isEmpty()) {
+      return readyToCleanAborts.stream().map(ci -> 
ThrowingRunnable.unchecked(() ->
+                      clean(ci, ci.txnId > 0 ? ci.txnId : Long.MAX_VALUE, 
metricsEnabled)))
+              .collect(Collectors.toList());
+    }
+    return Collections.emptyList();
+  }
+
+  private void clean(CompactionInfo ci, long minOpenTxn, boolean 
metricsEnabled) throws MetaException {

Review Comment:
   This class should have its own DAO/POJO class, we are starting to really 
overuse CompactionInfo



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to