[
https://issues.apache.org/jira/browse/HIVE-21960?focusedWorklogId=284357&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-284357
]
ASF GitHub Bot logged work on HIVE-21960:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 29/Jul/19 15:49
Start Date: 29/Jul/19 15:49
Worklog Time Spent: 10m
Work Description: ashutosh-bapat commented on pull request #735:
HIVE-21960 : Avoid running stats updater and partition management task on a
replicated table.
URL: https://github.com/apache/hive/pull/735#discussion_r308304082
##########
File path:
standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java
##########
@@ -563,6 +564,94 @@ public void testPartitionDiscoverySkipInvalidPath()
throws TException, IOExcepti
assertEquals(4, partitions.size());
}
+ @Test
+ public void testNoPartitionDiscoveryForReplTable() throws Exception {
+ String dbName = "db_repl1";
+ String tableName = "tbl_repl1";
+ Map<String, Column> colMap = buildAllColumns();
+ List<String> partKeys = Lists.newArrayList("state", "dt");
+ List<String> partKeyTypes = Lists.newArrayList("string", "date");
+ List<List<String>> partVals = Lists.newArrayList(
+ Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"),
+ Lists.newArrayList("CA", "1986-04-28"),
+ Lists.newArrayList("MN", "2018-11-31"));
+ createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys,
partKeyTypes, partVals, colMap, false);
+ Table table = client.getTable(dbName, tableName);
+ List<Partition> partitions = client.listPartitions(dbName, tableName,
(short) -1);
+ assertEquals(3, partitions.size());
+ String tableLocation = table.getSd().getLocation();
+ URI location = URI.create(tableLocation);
+ Path tablePath = new Path(location);
+ FileSystem fs = FileSystem.get(location, conf);
+ Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01");
+ Path newPart2 = new Path(tablePath, "state=UT/dt=2018-12-02");
+ fs.mkdirs(newPart1);
+ fs.mkdirs(newPart2);
+ assertEquals(5, fs.listStatus(tablePath).length);
+ partitions = client.listPartitions(dbName, tableName, (short) -1);
+ assertEquals(3, partitions.size());
+
+ // table property is set to true, but the table is marked as replication
target. The new
+ // partitions should not be created
+
table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY,
"true");
+ table.getParameters().put(ReplConst.REPL_TARGET_PROPERTY, "1");
+ client.alter_table(dbName, tableName, table);
+ runPartitionManagementTask(conf);
+ partitions = client.listPartitions(dbName, tableName, (short) -1);
+ assertEquals(3, partitions.size());
+
+ // change table type to external, delete a partition directory and make
sure partition discovery works
+ table.getParameters().put("EXTERNAL", "true");
+ table.setTableType(TableType.EXTERNAL_TABLE.name());
+ client.alter_table(dbName, tableName, table);
+ // Delete location of one of the partitions. The partition discovery task
should not drop
+ // that partition.
+ boolean deleted = fs.delete((new
Path(URI.create(partitions.get(0).getSd().getLocation()))).getParent(),
+ true);
+ assertTrue(deleted);
+ assertEquals(4, fs.listStatus(tablePath).length);
+ runPartitionManagementTask(conf);
+ partitions = client.listPartitions(dbName, tableName, (short) -1);
+ assertEquals(3, partitions.size());
+ }
+
+ @Test
+ public void testNoPartitionRetentionForReplTarget() throws TException,
InterruptedException {
+ String dbName = "db_repl2";
+ String tableName = "tbl_repl2";
+ Map<String, Column> colMap = buildAllColumns();
+ List<String> partKeys = Lists.newArrayList("state", "dt");
+ List<String> partKeyTypes = Lists.newArrayList("string", "date");
+ List<List<String>> partVals = Lists.newArrayList(
+ Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"),
+ Lists.newArrayList("CA", "1986-04-28"),
+ Lists.newArrayList("MN", "2018-11-31"));
+ // Check for the existence of partitions 10 seconds after the partition
retention period has
+ // elapsed. Gives enough time for the partition retention task to work.
+ long partitionRetentionPeriodMs = 20000;
+ long waitingPeriodForTest = partitionRetentionPeriodMs + 10 * 1000;
+ createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys,
partKeyTypes, partVals, colMap, false);
+ Table table = client.getTable(dbName, tableName);
+ List<Partition> partitions = client.listPartitions(dbName, tableName,
(short) -1);
+ assertEquals(3, partitions.size());
+
+
table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY,
"true");
+
table.getParameters().put(PartitionManagementTask.PARTITION_RETENTION_PERIOD_TBLPROPERTY,
+ partitionRetentionPeriodMs + "ms");
+ table.getParameters().put(ReplConst.REPL_TARGET_PROPERTY, "1");
+ client.alter_table(dbName, tableName, table);
+
+ runPartitionManagementTask(conf);
+ partitions = client.listPartitions(dbName, tableName, (short) -1);
+ assertEquals(3, partitions.size());
+
+ // after 30s all partitions should remain in-tact for a table which is
target of replication.
+ Thread.sleep(waitingPeriodForTest);
+ runPartitionManagementTask(conf);
+ partitions = client.listPartitions(dbName, tableName, (short) -1);
Review comment:
testPartitionRetention() does that already. Do you want some other scenario
to be tested?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 284357)
Time Spent: 1h (was: 50m)
> HMS tasks on replica
> --------------------
>
> Key: HIVE-21960
> URL: https://issues.apache.org/jira/browse/HIVE-21960
> Project: Hive
> Issue Type: Improvement
> Components: HiveServer2, repl
> Affects Versions: 4.0.0
> Reporter: Ashutosh Bapat
> Assignee: Ashutosh Bapat
> Priority: Major
> Labels: pull-request-available
> Attachments: HIVE-21960.01.patch, HIVE-21960.02.patch,
> HIVE-21960.03.patch, Replication and House keeping tasks.pdf
>
> Time Spent: 1h
> Remaining Estimate: 0h
>
> An HMS performs a number of housekeeping tasks. Assess whether
> # They are required to be performed in the replicated data
> # Performing those on replicated data causes any issues and how to fix those.
--
This message was sent by Atlassian JIRA
(v7.6.14#76016)