klcopp commented on a change in pull request #2579:
URL: https://github.com/apache/hive/pull/2579#discussion_r686914879
##########
File path:
itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
##########
@@ -781,6 +782,67 @@ public void
autoCompactOnStreamingIngestWithDynamicPartition() throws Exception
}
}
+ @Test
+ public void testNoDataLossWhenMaxNumDeltaIsUsed() throws Exception {
+ String dbName = "default";
+ String tblName = "cws";
+ executeStatementOnDriver("drop table if exists " + tblName, driver);
+
+ executeStatementOnDriver("CREATE TABLE " + tblName + "(a INT, b STRING) " +
+ " STORED AS ORC TBLPROPERTIES ('transactional'='true')", driver);
+ executeStatementOnDriver("insert into " + tblName + " values (1, 'a')",
driver);
+ executeStatementOnDriver("insert into " + tblName + " values (3, 'b')",
driver);
+
+ runMajorCompaction(dbName, tblName);
+ runCleaner(conf);
+
+ for (int i = 0; i < 3; i++) {
+ executeStatementOnDriver("MERGE INTO " + tblName + " AS T USING (" +
+ "select * from " + tblName + " union all select a+1, b from " +
tblName + ") AS S " +
+ "ON T.a=s.a " +
+ "WHEN MATCHED THEN DELETE " +
+ "WHEN not MATCHED THEN INSERT values (s.a, s.b)", driver);
+ }
+
+ driver.run("select a from " + tblName);
+ List<String> res = new ArrayList<>();
+ driver.getFetchTask().fetch(res);
+ Assert.assertEquals(res, Arrays.asList("4", "6"));
+
+ conf.setIntVar(HiveConf.ConfVars.COMPACTOR_MAX_NUM_DELTA, 5);
+ runMajorCompaction(dbName, tblName);
+
+ List<String> matchesNotFound = new ArrayList<>(5);
+ matchesNotFound.add(AcidUtils.deleteDeltaSubdir(3, 4) +
VISIBILITY_PATTERN);
+ matchesNotFound.add(AcidUtils.deltaSubdir(3, 4) + VISIBILITY_PATTERN);
+ matchesNotFound.add(AcidUtils.deleteDeltaSubdir(5, 5, 0));
+ matchesNotFound.add(AcidUtils.deltaSubdir(5, 5, 1));
+ matchesNotFound.add(AcidUtils.baseDir(5) + VISIBILITY_PATTERN);
+
+ IMetaStoreClient msClient = new HiveMetaStoreClient(conf);
+ Table table = msClient.getTable(dbName, tblName);
+ msClient.close();
+
+ FileSystem fs = FileSystem.get(conf);
+ FileStatus[] stat = fs.listStatus(new Path(table.getSd().getLocation()));
+
+ for (FileStatus f : stat) {
+ for (int j = 0; j < matchesNotFound.size(); j++) {
+ if (f.getPath().getName().matches(matchesNotFound.get(j))) {
+ matchesNotFound.remove(j);
+ break;
+ }
+ }
+ }
+ Assert.assertEquals("Matches Not Found: " + matchesNotFound.toArray(), 0,
matchesNotFound.size());
Review comment:
matchesNotFound.toArray() should be
Arrays.toString(matchesNotFound.toArray()) to display contents
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
##########
@@ -234,12 +234,24 @@ void run(HiveConf conf, String jobName, Table t,
Partition p, StorageDescriptor
"especially if this message repeats. Check that compaction is running
properly. Check for any " +
"runaway/mis-configured process writing to ACID tables, especially
using Streaming Ingest API.");
int numMinorCompactions = parsedDeltas.size() / maxDeltasToHandle;
+ parsedDeltas.sort(AcidUtils.ParsedDeltaLight::compareTo);
+
+ int start = 0;
+ int end = maxDeltasToHandle;
+
for (int jobSubId = 0; jobSubId < numMinorCompactions; jobSubId++) {
+ while (parsedDeltas.get(end).getMinWriteId() == parsedDeltas.get(end -
1).getMinWriteId() &&
+ parsedDeltas.get(end).getMaxWriteId() == parsedDeltas.get(end -
1).getMaxWriteId()) {
Review comment:
Sorry I don't get this part. What does it do?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]