[
https://issues.apache.org/jira/browse/HIVE-19927?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16576544#comment-16576544
]
Sankar Hariappan edited comment on HIVE-19927 at 8/10/18 4:54 PM:
------------------------------------------------------------------
[~alishap],
It fails because of assert in below code.
{code:java}
Long bootDumpBeginReplId =
queryState.getConf().getLong(ReplicationSemanticAnalyzer.LAST_REPL_ID_KEY, -1L);
assert (bootDumpBeginReplId >= 0L);{code}
It is expected to set the "hive.repl.last.repl.id" config in
queryState.queryConf by Driver before invoking bootstrapDump.
So, this unit test should mock it like this.
{code:java}
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryState;
@RunWith(PowerMockRunner.class)
@PrepareForTest({ Utils.class })
@PowerMockIgnore({ "javax.management.*" })
public class ReplDumpTaskTest {
@Mock
private Hive hive;
@Mock
private HiveConf conf;
@Mock
private QueryState qs;
class StubReplDumpTask extends ReplDumpTask {
@Override
protected Hive getHive() {
return hive;
}
@Override
long currentNotificationId(Hive hiveDb) {
return Long.MAX_VALUE;
}
@Override
String getValidTxnListForReplDump(Hive hiveDb) {
return "";
}
@Override
void dumpFunctionMetadata(String dbName, Path dumpRoot) {
}
@Override
Path dumpDbMetadata(String dbName, Path dumpRoot, long lastReplId) {
return Mockito.mock(Path.class);
}
@Override
void dumpConstraintMetadata(String dbName, String tblName, Path dbRoot) {
}
}
private static class TestException extends Exception {
}
@Test(expected = TestException.class)
public void removeDBPropertyToPreventRenameWhenBootstrapDumpOfTableFails()
throws Exception {
List<String> tableList = Arrays.asList("a1", "a2");
String dbRandomKey = "akeytoberandom";
mockStatic(Utils.class);
when(Utils.matchesDb(same(hive), eq("default")))
.thenReturn(Collections.singletonList("default"));
when(Utils.getAllTables(same(hive), eq("default"))).thenReturn(tableList);
when(Utils.setDbBootstrapDumpState(same(hive),
eq("default"))).thenReturn(dbRandomKey);
when(Utils.matchesTbl(same(hive), eq("default"),
anyString())).thenReturn(tableList);
when(hive.getAllFunctions()).thenReturn(Collections.emptyList());
when(qs.getConf()).thenReturn(conf);
when(conf.getLong("hive.repl.last.repl.id", -1L)).thenReturn(1L);
ReplDumpTask task = new StubReplDumpTask() {
private int tableDumpCount = 0;
@Override
void dumpTable(String dbName, String tblName, String validTxnList, Path
dbRoot, long lastReplId)
throws Exception {
tableDumpCount++;
if (tableDumpCount > 1) {
throw new TestException();
}
}
};
task.initialize(qs, null, null, null);
task.setWork(
new ReplDumpWork("default", "",
Long.MAX_VALUE, Long.MAX_VALUE, "",
Integer.MAX_VALUE, "")
);
try {
task.bootStrapDump(mock(Path.class), null, mock(Path.class));
} finally {
verifyStatic();
Utils.resetDbBootstrapDumpState(same(hive), eq("default"),
eq(dbRandomKey));
}
}
}{code}
I think, this test was not run by ptest as it's naming doesn't start with Test*.
Will submit a patch with this fix in HIVE-20361. Thanks for bringing this up!
was (Author: sankarh):
[~alishap],
It fails because of assert in below code.
{code:java}
Long bootDumpBeginReplId =
queryState.getConf().getLong(ReplicationSemanticAnalyzer.LAST_REPL_ID_KEY, -1L);
assert (bootDumpBeginReplId >= 0L);{code}
It is expected to set the "hive.repl.last.repl.id" config in
queryState.queryConf by Driver before invoking bootstrapDump.
So, this unit test should mock it like this.
{code:java}
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.QueryState;
@RunWith(PowerMockRunner.class)
@PrepareForTest({ Utils.class })
@PowerMockIgnore({ "javax.management.*" })
public class ReplDumpTaskTest {
@Mock
private Hive hive;
@Mock
private HiveConf conf;
@Mock
private QueryState qs;
class StubReplDumpTask extends ReplDumpTask {
@Override
protected Hive getHive() {
return hive;
}
@Override
long currentNotificationId(Hive hiveDb) {
return Long.MAX_VALUE;
}
@Override
String getValidTxnListForReplDump(Hive hiveDb) {
return "";
}
@Override
void dumpFunctionMetadata(String dbName, Path dumpRoot) {
}
@Override
Path dumpDbMetadata(String dbName, Path dumpRoot, long lastReplId) {
return Mockito.mock(Path.class);
}
@Override
void dumpConstraintMetadata(String dbName, String tblName, Path dbRoot) {
}
}
private static class TestException extends Exception {
}
@Test(expected = TestException.class)
public void removeDBPropertyToPreventRenameWhenBootstrapDumpOfTableFails()
throws Exception {
List<String> tableList = Arrays.asList("a1", "a2");
String dbRandomKey = "akeytoberandom";
mockStatic(Utils.class);
when(Utils.matchesDb(same(hive), eq("default")))
.thenReturn(Collections.singletonList("default"));
when(Utils.getAllTables(same(hive), eq("default"))).thenReturn(tableList);
when(Utils.setDbBootstrapDumpState(same(hive),
eq("default"))).thenReturn(dbRandomKey);
when(Utils.matchesTbl(same(hive), eq("default"),
anyString())).thenReturn(tableList);
when(hive.getAllFunctions()).thenReturn(Collections.emptyList());
when(qs.getConf()).thenReturn(conf);
when(conf.getLong("hive.repl.last.repl.id", -1L)).thenReturn(1L);
ReplDumpTask task = new StubReplDumpTask() {
private int tableDumpCount = 0;
@Override
void dumpTable(String dbName, String tblName, String validTxnList, Path
dbRoot, long lastReplId)
throws Exception {
tableDumpCount++;
if (tableDumpCount > 1) {
throw new TestException();
}
}
};
task.initialize(qs, null, null, null);
task.setWork(
new ReplDumpWork("default", "",
Long.MAX_VALUE, Long.MAX_VALUE, "",
Integer.MAX_VALUE, "")
);
try {
task.bootStrapDump(mock(Path.class), null, mock(Path.class));
} finally {
verifyStatic();
Utils.resetDbBootstrapDumpState(same(hive), eq("default"),
eq(dbRandomKey));
}
}
}{code}
I think, this test was not run by ptest as it's naming doesn't start with Test*.
Will submit a patch with this fix inHIVE-20361. Thanks for bringing this up!
> Last Repl ID set by bootstrap dump is incorrect and may cause data loss if
> have ACID/MM tables.
> -----------------------------------------------------------------------------------------------
>
> Key: HIVE-19927
> URL: https://issues.apache.org/jira/browse/HIVE-19927
> Project: Hive
> Issue Type: Sub-task
> Components: HiveServer2, repl, Transactions
> Affects Versions: 3.1.0
> Reporter: Sankar Hariappan
> Assignee: Sankar Hariappan
> Priority: Major
> Labels: DR, pull-request-available, replication
> Fix For: 4.0.0, 3.2.0
>
> Attachments: HIVE-19927.01-branch-3.patch, HIVE-19927.01.patch,
> HIVE-19927.02.patch, HIVE-19927.03.patch, HIVE-19927.04.patch
>
>
> During bootstrap dump of ACID tables, let's consider the below sequence.
> - Current session (REPL DUMP), Open txn (Txn1) - Event-10
> - Another session (Session-2), Open txn (Txn2) - Event-11
> - Session-2 -> Insert data (T1.D1) to ACID table. - Event-12
> - Get lastReplId = last event ID logged. (Event-12)
> - Session-2 -> Commit Txn (Txn2) - Event-13
> - Dump ACID tables based on validTxnList based on Txn1. --> This step skips
> all the data written by txns > Txn1. So, T1.D1 will be missing.
> - Commit Txn (Txn1)
> - REPL LOAD from bootstrap dump will skip T1.D1.
> - Incremental REPL DUMP will start from Event-13 and hence lose Txn2 which is
> opened after Txn1. So, data T1.D1 will be lost for ever.
> Proposed to capture the lastReplId of bootstrap before opening current txn
> (Txn1) and store it in Driver context and use it for dump.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)