This is an automated email from the ASF dual-hosted git repository.

granthenke pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new 929cb67  KUDU-3205: Fix building scan tokens when tablet not found 
errors occur
929cb67 is described below

commit 929cb67da52f9bd30827527bb440b83d2260f608
Author: Grant Henke <[email protected]>
AuthorDate: Mon Jan 11 16:08:34 2021 -0600

    KUDU-3205: Fix building scan tokens when tablet not found errors occur
    
    When tablet not found errors occur AsyncKuduClient.invalidateTabletCache
    is called which then calls RemoteTablet.removeTabletClient to remove
    a server (by uuid) from the RemoteTablet tabletServers list. This means that
    the RemoteTablet can have a replica returned from RemoteTablet.getReplicas
    which has a server that is not returned by 
RemoteTablet.getTabletServersCopy.
    This results in a NPE when building a scan token because there is no 
matching
    server for the replica in the serverIndexMap.
    
    To fix this I simply check if the serverIndex found via the serverIndexMap 
is
    null and ignore that replica if it is. A better long term fix is likely to 
build
    the server info from the replicas themself, or to change the RemoteTablet 
behavior
    to blacklist servers instead of removing them from the actual tabletServers 
list, or
    to also remove the replica itself when RemoteTablet.removeTabletClient is 
called.
    I didn’t do any of these options now because they are all larger changes 
with a
    wider potential impact.
    
    Change-Id: I68679dee1ad7ebca405dd6e086770f3e034e310c
    Reviewed-on: http://gerrit.cloudera.org:8080/16941
    Tested-by: Kudu Jenkins
    Reviewed-by: Alexey Serbin <[email protected]>
---
 .../org/apache/kudu/client/AsyncKuduClient.java    |  2 ++
 .../java/org/apache/kudu/client/KuduScanToken.java |  7 ++++
 .../java/org/apache/kudu/client/RemoteTablet.java  |  6 ++++
 .../java/org/apache/kudu/client/TestScanToken.java | 40 ++++++++++++++++++++++
 4 files changed, 55 insertions(+)

diff --git 
a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java 
b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
index f3a437d..6a9ab5a 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/AsyncKuduClient.java
@@ -2235,6 +2235,8 @@ public class AsyncKuduClient implements AutoCloseable {
     final String uuid = info.getUuid();
     LOG.info("Invalidating location {} for tablet {}: {}",
              info, tablet.getTabletId(), errorMessage);
+    // TODO(ghenke): Should this also remove the related replica?
+    //  As it stands there can be a replica with a missing tablet server.
     tablet.removeTabletClient(uuid);
   }
 
diff --git 
a/java/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java 
b/java/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java
index 2d39c4b..1282a04 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/KuduScanToken.java
@@ -679,6 +679,13 @@ public class KuduScanToken implements 
Comparable<KuduScanToken> {
               for (LocatedTablet.Replica replica : remoteTablet.getReplicas()) 
{
                 Integer serverIndex = serverIndexMap.get(
                     new HostAndPort(replica.getRpcHost(), 
replica.getRpcPort()));
+                // If the server index is not found it means that 
RemoteTablet.removeTabletClient
+                // was called and removed the server likely as a result of a 
tablet not found error.
+                // In that case we should remove the replica as it can't be 
contacted.
+                if (serverIndex == null) {
+                  continue;
+                }
+
                 Client.TabletMetadataPB.ReplicaMetadataPB.Builder 
tabletMetadataBuilder =
                     Client.TabletMetadataPB.ReplicaMetadataPB.newBuilder()
                         .setRole(replica.getRoleAsEnum())
diff --git 
a/java/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java 
b/java/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java
index 29f33d4..6d63b2c 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/RemoteTablet.java
@@ -130,6 +130,8 @@ public class RemoteTablet implements 
Comparable<RemoteTablet> {
       if (leaderUuid != null && leaderUuid.equals(uuid)) {
         leaderUuid = null;
       }
+      // TODO(ghenke): Should this also remove the related replica?
+      //  As it stands there can be a replica with a missing tablet server.
       if (tabletServers.remove(uuid) != null) {
         return true;
       }
@@ -260,6 +262,10 @@ public class RemoteTablet implements 
Comparable<RemoteTablet> {
   /**
    * Get replicas of this tablet. The returned list may not be mutated.
    *
+   * This list of replicas may include replicas for servers that have been
+   * removed via `removeTabletClient`, therefore won't be returned via
+   * `getTabletServersCopy`.
+   *
    * @return the replicas of the tablet
    */
   List<LocatedTablet.Replica> getReplicas() {
diff --git 
a/java/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java 
b/java/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java
index 490b7dd..87e3128 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestScanToken.java
@@ -693,4 +693,44 @@ public class TestScanToken {
     assertEquals(1, resultKeys.size());
     assertEquals(PREDICATE_VAL, 
Iterables.getOnlyElement(resultKeys).intValue());
   }
+
+  /**
+   * Regression test for KUDU-3205.
+   */
+  @Test
+  public void testBuildTokensWithDownTabletServer() throws Exception {
+    Schema schema = getBasicSchema();
+    CreateTableOptions createOptions = new CreateTableOptions();
+    createOptions.setRangePartitionColumns(ImmutableList.of());
+    createOptions.setNumReplicas(3);
+    KuduTable table = client.createTable(testTableName, schema, createOptions);
+
+    // Insert a row.
+    KuduSession session = client.newSession();
+    Insert insert = createBasicSchemaInsert(table, 1);
+    session.apply(insert);
+    session.close();
+
+    // Remove a tablet server from the remote tablet by calling 
`removeTabletClient`.
+    // This is done in normal applications via 
AsyncKuduClient.invalidateTabletCache
+    // when a tablet not found error is handled.
+    TableLocationsCache.Entry entry =
+        asyncClient.getTableLocationEntry(table.getTableId(), 
insert.partitionKey());
+    RemoteTablet remoteTablet = entry.getTablet();
+    List<ServerInfo> tabletServers = remoteTablet.getTabletServersCopy();
+    remoteTablet.removeTabletClient(tabletServers.get(0).getUuid());
+
+    // Ensure we can build and use the token without an error.
+    KuduScanToken.KuduScanTokenBuilder tokenBuilder = 
client.newScanTokenBuilder(table);
+    tokenBuilder.includeTableMetadata(true);
+    tokenBuilder.includeTabletMetadata(true);
+    List<KuduScanToken> tokens = tokenBuilder.build();
+    assertEquals(1, tokens.size());
+
+    // Use a new client to simulate hydrating in a new process.
+    KuduClient newClient =
+        new 
KuduClient.KuduClientBuilder(harness.getMasterAddressesAsString()).build();
+    KuduScanner scanner = tokens.get(0).intoScanner(newClient);
+    assertEquals(1, countRowsInScan(scanner));
+  }
 }

Reply via email to