This is an automated email from the ASF dual-hosted git repository.
granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git
The following commit(s) were added to refs/heads/master by this push:
new 929cb67 KUDU-3205: Fix building scan tokens when tablet not found
errors occur
929cb67 is described below
commit 929cb67da52f9bd30827527bb440b83d2260f608
Author: Grant Henke <[email protected]>
AuthorDate: Mon Jan 11 16:08:34 2021 -0600
KUDU-3205: Fix building scan tokens when tablet not found errors occur
When tablet not found errors occur AsyncKuduClient.invalidateTabletCache
is called which then calls RemoteTablet.removeTabletClient to remove
a server (by uuid) from the RemoteTablet tabletServers list. This means that
the RemoteTablet can have a replica returned from RemoteTablet.getReplicas
which has a server that is not returned by
RemoteTablet.getTabletServersCopy.
This results in a NPE when building a scan token because there is no
matching
server for the replica in the serverIndexMap.
To fix this I simply check if the serverIndex found via the serverIndexMap
is
null and ignore that replica if it is. A better long term fix is likely to
build
the server info from the replicas themself, or to change the RemoteTablet
behavior
to blacklist servers instead of removing them from the actual tabletServers
list, or
to also remove the replica itself when RemoteTablet.removeTabletClient is
called.
I didn’t do any of these options now because they are all larger changes
with a
wider potential impact.
Change-Id: I68679dee1ad7ebca405dd6e086770f3e034e310c
Reviewed-on: http://gerrit.cloudera.org:8080/16941
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <[email protected]>
---
.../org/apache/kudu/client/AsyncKuduClient.java | 2 ++
.../java/org/apache/kudu/client/KuduScanToken.java | 7 ++++
.../java/org/apache/kudu/client/RemoteTablet.java | 6 ++++
.../java/org/apache/kudu/client/TestScanToken.java | 40 ++++++++++++++++++++++
4 files changed, 55 insertions(+)
diff --git
a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
index f3a437d..6a9ab5a 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
@@ -2235,6 +2235,8 @@ public class AsyncKuduClient implements AutoCloseable {
final String uuid = info.getUuid();
LOG.info("Invalidating location {} for tablet {}: {}",
info, tablet.getTabletId(), errorMessage);
+ // TODO(ghenke): Should this also remove the related replica?
+ // As it stands there can be a replica with a missing tablet server.
tablet.removeTabletClient(uuid);
}
diff --git
a/java/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java
b/java/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java
index 2d39c4b..1282a04 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java
@@ -679,6 +679,13 @@ public class KuduScanToken implements
Comparable<KuduScanToken> {
for (LocatedTablet.Replica replica : remoteTablet.getReplicas())
{
Integer serverIndex = serverIndexMap.get(
new HostAndPort(replica.getRpcHost(),
replica.getRpcPort()));
+ // If the server index is not found it means that
RemoteTablet.removeTabletClient
+ // was called and removed the server likely as a result of a
tablet not found error.
+ // In that case we should remove the replica as it can't be
contacted.
+ if (serverIndex == null) {
+ continue;
+ }
+
Client.TabletMetadataPB.ReplicaMetadataPB.Builder
tabletMetadataBuilder =
Client.TabletMetadataPB.ReplicaMetadataPB.newBuilder()
.setRole(replica.getRoleAsEnum())
diff --git
a/java/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java
b/java/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java
index 29f33d4..6d63b2c 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java
@@ -130,6 +130,8 @@ public class RemoteTablet implements
Comparable<RemoteTablet> {
if (leaderUuid != null && leaderUuid.equals(uuid)) {
leaderUuid = null;
}
+ // TODO(ghenke): Should this also remove the related replica?
+ // As it stands there can be a replica with a missing tablet server.
if (tabletServers.remove(uuid) != null) {
return true;
}
@@ -260,6 +262,10 @@ public class RemoteTablet implements
Comparable<RemoteTablet> {
/**
* Get replicas of this tablet. The returned list may not be mutated.
*
+ * This list of replicas may include replicas for servers that have been
+ * removed via `removeTabletClient`, therefore won't be returned via
+ * `getTabletServersCopy`.
+ *
* @return the replicas of the tablet
*/
List<LocatedTablet.Replica> getReplicas() {
diff --git
a/java/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java
b/java/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java
index 490b7dd..87e3128 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java
@@ -693,4 +693,44 @@ public class TestScanToken {
assertEquals(1, resultKeys.size());
assertEquals(PREDICATE_VAL,
Iterables.getOnlyElement(resultKeys).intValue());
}
+
+ /**
+ * Regression test for KUDU-3205.
+ */
+ @Test
+ public void testBuildTokensWithDownTabletServer() throws Exception {
+ Schema schema = getBasicSchema();
+ CreateTableOptions createOptions = new CreateTableOptions();
+ createOptions.setRangePartitionColumns(ImmutableList.of());
+ createOptions.setNumReplicas(3);
+ KuduTable table = client.createTable(testTableName, schema, createOptions);
+
+ // Insert a row.
+ KuduSession session = client.newSession();
+ Insert insert = createBasicSchemaInsert(table, 1);
+ session.apply(insert);
+ session.close();
+
+ // Remove a tablet server from the remote tablet by calling
`removeTabletClient`.
+ // This is done in normal applications via
AsyncKuduClient.invalidateTabletCache
+ // when a tablet not found error is handled.
+ TableLocationsCache.Entry entry =
+ asyncClient.getTableLocationEntry(table.getTableId(),
insert.partitionKey());
+ RemoteTablet remoteTablet = entry.getTablet();
+ List<ServerInfo> tabletServers = remoteTablet.getTabletServersCopy();
+ remoteTablet.removeTabletClient(tabletServers.get(0).getUuid());
+
+ // Ensure we can build and use the token without an error.
+ KuduScanToken.KuduScanTokenBuilder tokenBuilder =
client.newScanTokenBuilder(table);
+ tokenBuilder.includeTableMetadata(true);
+ tokenBuilder.includeTabletMetadata(true);
+ List<KuduScanToken> tokens = tokenBuilder.build();
+ assertEquals(1, tokens.size());
+
+ // Use a new client to simulate hydrating in a new process.
+ KuduClient newClient =
+ new
KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build();
+ KuduScanner scanner = tokens.get(0).intoScanner(newClient);
+ assertEquals(1, countRowsInScan(scanner));
+ }
}