This is an automated email from the ASF dual-hosted git repository.
alexey pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 94159e5 KUDU-3295: Fix the number of replicas does not equal the
number of servers
94159e5 is described below
commit 94159e5c6713834d86bb89ecb681224cf6eeaf01
Author: Hongjiang Zhang <[email protected]>
AuthorDate: Tue Oct 26 11:07:15 2021 +0800
KUDU-3295: Fix the number of replicas does not equal the number of servers
When the tserver is shutdown and its DNS cannot be resolved, it is still
possible for the client to receive the replicas including this tserver,
as a result, discoverTablets will see the servers size not equal to
replica size because the replica does not verify server connectivity.
This patch fixes this by ignoring replica whose server cannot be
resolved.
Change-Id: I7d2e7d49d234e41e66e9c367e061fb7da32e9b47
Reviewed-on: http://gerrit.cloudera.org:8080/17971
Reviewed-by: Andrew Wong <[email protected]>
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <[email protected]>
---
.../org/apache/kudu/client/AsyncKuduClient.java | 13 ++++++-
.../apache/kudu/client/TestAsyncKuduClient.java | 43 ++++++++++++++++++++++
2 files changed, 55 insertions(+), 1 deletion(-)
diff --git
a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
index c0b2870..3e2c466 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
@@ -2360,7 +2360,7 @@ public class AsyncKuduClient implements AutoCloseable {
List<Exception> lookupExceptions = new
ArrayList<>(tabletPb.getInternedReplicasCount());
List<ServerInfo> servers = new
ArrayList<>(tabletPb.getInternedReplicasCount());
-
+ List<String> dnsFailedServers = new
ArrayList<>(tabletPb.getInternedReplicasCount());
// Lambda that does the common handling of a ts info.
Consumer<TSInfoPB> updateServersAndCollectExceptions = tsInfo -> {
try {
@@ -2370,6 +2370,9 @@ public class AsyncKuduClient implements AutoCloseable {
}
} catch (UnknownHostException ex) {
lookupExceptions.add(ex);
+ final List<Common.HostPortPB> addresses =
tsInfo.getRpcAddressesList();
+ // Here only add the first address because resolveTS only resolves
the first one.
+ dnsFailedServers.add(addresses.get(0).getHost());
}
};
@@ -2380,6 +2383,10 @@ public class AsyncKuduClient implements AutoCloseable {
updateServersAndCollectExceptions.accept(tsInfo);
String tsHost = tsInfo.getRpcAddressesList().isEmpty() ?
null : tsInfo.getRpcAddressesList().get(0).getHost();
+ if (tsHost == null || dnsFailedServers.contains(tsHost)) {
+ // skip the DNS failed tserver
+ continue;
+ }
Integer tsPort = tsInfo.getRpcAddressesList().isEmpty() ?
null : tsInfo.getRpcAddressesList().get(0).getPort();
String dimensionLabel = replica.hasDimensionLabel() ?
replica.getDimensionLabel() : null;
@@ -2398,6 +2405,10 @@ public class AsyncKuduClient implements AutoCloseable {
updateServersAndCollectExceptions.accept(tsInfo);
String tsHost = tsInfo.getRpcAddressesList().isEmpty() ?
null : tsInfo.getRpcAddressesList().get(0).getHost();
+ if (tsHost == null || dnsFailedServers.contains(tsHost)) {
+ // skip the DNS failed tserver
+ continue;
+ }
Integer tsPort = tsInfo.getRpcAddressesList().isEmpty() ?
null : tsInfo.getRpcAddressesList().get(0).getPort();
String dimensionLabel = replica.hasDimensionLabel() ?
replica.getDimensionLabel() : null;
diff --git
a/java/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduClient.java
b/java/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduClient.java
index d6fbd0f..1a6baba 100644
---
a/java/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduClient.java
+++
b/java/kudu-client/src/test/java/org/apache/kudu/client/TestAsyncKuduClient.java
@@ -187,6 +187,49 @@ public class TestAsyncKuduClient {
}
@Test
+ public void testDiscoverTabletOnBadHostname() throws Exception {
+ int tserverIdx = 1; // select one tserver for testing
+ final CreateTableOptions options = getBasicCreateTableOptions();
+ final KuduTable table = client.createTable(
+ "testDiscoverTabletOnBadHostname-" + System.currentTimeMillis(),
+ basicSchema,
+ options);
+ // Get the tserver host_port to uuid mapping
+ List<HostAndPort> tservers = harness.getTabletServers();
+
+ // call discoverTablets
+ List<Master.TabletLocationsPB> tabletLocations = new ArrayList<>();
+ List<Master.TSInfoPB> tsInfos = new ArrayList<>();
+
+ // Builder three bad locations.
+ Master.TabletLocationsPB.Builder tabletPb =
Master.TabletLocationsPB.newBuilder();
+ for (int i = 0; i < 3; i++) {
+ Common.PartitionPB.Builder partition = Common.PartitionPB.newBuilder();
+ partition.setPartitionKeyStart(ByteString.copyFrom("a" + i,
UTF_8.name()));
+ partition.setPartitionKeyEnd(ByteString.copyFrom("b" + i, UTF_8.name()));
+ tabletPb.setPartition(partition);
+ tabletPb.setTabletId(ByteString.copyFromUtf8("some id " + i));
+
tabletPb.addInternedReplicas(ProtobufUtils.getFakeTabletInternedReplicaPB(
+ i, Metadata.RaftPeerPB.Role.FOLLOWER));
+ tabletLocations.add(tabletPb.build());
+ String[] hostPort = tservers.get(i).toString().split(":");
+ String tserverHost = hostPort[0];
+ if (i == tserverIdx) {
+ // simulate IP resolve failure by hacking the hostname
+ tserverHost = tserverHost + "xxx";
+ }
+ tsInfos.add(ProtobufUtils.getFakeTSInfoPB("tserver",
+ tserverHost, Integer.parseInt(hostPort[1])).build());
+ }
+ try {
+ asyncClient.discoverTablets(table, new byte[0], 100,
+ tabletLocations, tsInfos, 1000);
+ } catch (Exception ex) {
+ fail("discoverTablets should not complain: " + ex.getMessage());
+ }
+ }
+
+ @Test
public void testNoLeader() throws Exception {
final int requestBatchSize = 10;
final CreateTableOptions options = getBasicCreateTableOptions();