EdColeman commented on code in PR #3137:
URL: https://github.com/apache/accumulo/pull/3137#discussion_r1057914369


##########
server/manager/src/main/java/org/apache/accumulo/manager/upgrade/Upgrader10to11.java:
##########
@@ -0,0 +1,270 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.manager.upgrade;
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.apache.accumulo.core.Constants.ZNAMESPACES;
+import static org.apache.accumulo.core.Constants.ZTABLES;
+import static org.apache.accumulo.core.Constants.ZTABLE_STATE;
+import static 
org.apache.accumulo.core.metadata.schema.MetadataSchema.RESERVED_PREFIX;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+
+import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.BatchDeleter;
+import org.apache.accumulo.core.client.MutationsRejectedException;
+import org.apache.accumulo.core.client.TableNotFoundException;
+import org.apache.accumulo.core.data.InstanceId;
+import org.apache.accumulo.core.data.NamespaceId;
+import org.apache.accumulo.core.data.Range;
+import org.apache.accumulo.core.data.TableId;
+import org.apache.accumulo.core.fate.zookeeper.ZooReaderWriter;
+import org.apache.accumulo.core.fate.zookeeper.ZooUtil;
+import org.apache.accumulo.core.manager.state.tables.TableState;
+import org.apache.accumulo.core.metadata.MetadataTable;
+import org.apache.accumulo.core.security.Authorizations;
+import org.apache.accumulo.core.volume.Volume;
+import org.apache.accumulo.server.ServerContext;
+import org.apache.accumulo.server.conf.store.NamespacePropKey;
+import org.apache.accumulo.server.conf.store.PropStore;
+import org.apache.accumulo.server.conf.store.PropStoreKey;
+import org.apache.accumulo.server.conf.store.SystemPropKey;
+import org.apache.accumulo.server.conf.store.TablePropKey;
+import org.apache.hadoop.fs.Path;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+
+public class Upgrader10to11 implements Upgrader {
+
+  private static final Logger log = 
LoggerFactory.getLogger(Upgrader10to11.class);
+
+  // Included for upgrade code usage any other usage post 3.0 should not be 
used.
+  private static final TableId REPLICATION_ID = TableId.of("+rep");
+
+  public Upgrader10to11() {
+    super();
+  }
+
+  @Override
+  public void upgradeZookeeper(final ServerContext context) {
+    log.info("upgrade of ZooKeeper entries");
+
+    var zrw = context.getZooReaderWriter();
+    var iid = context.getInstanceID();
+
+    // if the replication base path (../tables/+rep) assume removed or never 
existed.
+    if (!checkReplicationTableInZk(iid, zrw)) {
+      log.debug("replication table root node does not exist in ZooKeeper - 
nothing to do");
+      return;
+    }
+
+    // if the replication table is online - stop. There could be data in 
transit.
+    if (!checkReplicationOffline(iid, zrw)) {
+      throw new IllegalStateException(
+          "Replication table is not offline. Cannot continue with upgrade that 
will remove replication with replication active");
+    }
+
+    deleteReplicationConfigs(zrw, iid, context.getPropStore());
+
+    deleteReplicationTableZkEntries(zrw, iid);
+
+  }
+
+  @Override
+  public void upgradeRoot(final ServerContext context) {
+    log.info("upgrade root - skipping, nothing to do");
+  }
+
+  @Override
+  public void upgradeMetadata(final ServerContext context) {
+    log.info("upgrade metadata entries");
+    deleteReplMetadataEntries(context);
+    deleteReplHdfsFiles(context);
+  }
+
+  /**
+   * remove +rep entries from metadata.
+   */
+  private void deleteReplMetadataEntries(final ServerContext context) {
+    try (BatchDeleter deleter =
+        context.createBatchDeleter(MetadataTable.NAME, Authorizations.EMPTY, 
10)) {
+
+      Range repTableRange =
+          new Range(REPLICATION_ID.canonical() + ";", true, 
REPLICATION_ID.canonical() + "<", true);
+      // copied from MetadataSchema 2.1 (removed in 3.0)
+      Range repWalRange =
+          new Range(RESERVED_PREFIX + "repl", true, RESERVED_PREFIX + "repm", 
false);
+
+      deleter.setRanges(List.of(repTableRange, repWalRange));
+      deleter.delete();
+    } catch (TableNotFoundException | MutationsRejectedException ex) {
+      throw new IllegalStateException("failed to remove replication info from 
metadata table", ex);
+    }
+  }
+
+  @VisibleForTesting
+  void deleteReplHdfsFiles(final ServerContext context) {
+    try {
+      for (Volume volume : context.getVolumeManager().getVolumes()) {
+        String dirUri = volume.getBasePath() + Constants.HDFS_TABLES_DIR + 
Path.SEPARATOR
+            + REPLICATION_ID.canonical();
+        Path replPath = new Path(dirUri);
+        if (volume.getFileSystem().exists(replPath)) {
+          try {
+            log.debug("Removing replication dir and files in hdfs {}", 
replPath);
+            volume.getFileSystem().delete(replPath, true);
+          } catch (IOException ex) {
+            log.error("Unable to remove replication dir and files from " + 
replPath + ": " + ex);
+          }
+        }
+      }
+    } catch (IOException ex) {
+      log.error("Unable to remove replication dir and files: " + ex);
+    }
+  }
+
+  private boolean checkReplicationTableInZk(final InstanceId iid, final 
ZooReaderWriter zrw) {
+    try {
+      String path = buildRepTablePath(iid);
+      return zrw.exists(path);
+    } catch (KeeperException ex) {
+      throw new IllegalStateException("ZooKeeper error - cannot determine 
replication table status",
+          ex);
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new IllegalStateException("interrupted reading replication state 
from ZooKeeper", ex);
+    }
+  }
+
+  /**
+   * To protect against removing replication information if replication is 
being used and possible
+   * active, check the replication table state in Zookeeper to see if it is 
ONLINE (active) or
+   * OFFLINE (inactive). If the state node does not exist, then the status is 
considered as OFFLINE.
+   *
+   * @return true if the replication table state is OFFLINE, false otherwise
+   */
+  private boolean checkReplicationOffline(final InstanceId iid, final 
ZooReaderWriter zrw) {
+    try {
+      String path = buildRepTablePath(iid) + ZTABLE_STATE;
+      byte[] bytes = zrw.getData(path);
+      if (bytes != null && bytes.length > 0) {
+        String status = new String(bytes, UTF_8);
+        return TableState.OFFLINE.name().equals(status);
+      }
+      return false;
+    } catch (KeeperException ex) {
+      throw new IllegalStateException("ZooKeeper error - cannot determine 
replication table status",
+          ex);
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new IllegalStateException("interrupted reading replication state 
from ZooKeeper", ex);
+    }
+  }
+
+  /**
+   * Utility method to build the ZooKeeper replication table path. The path 
resolves to
+   * {@code /accumulo/INSTANCE_ID/tables/+rep}
+   */
+  static String buildRepTablePath(final InstanceId iid) {
+    return ZooUtil.getRoot(iid) + ZTABLES + "/" + REPLICATION_ID.canonical();
+  }
+
+  private void deleteReplicationTableZkEntries(ZooReaderWriter zrw, InstanceId 
iid) {
+    String repTablePath = buildRepTablePath(iid);
+    try {
+      zrw.recursiveDelete(repTablePath, ZooUtil.NodeMissingPolicy.SKIP);
+    } catch (KeeperException ex) {
+      throw new IllegalStateException(
+          "ZooKeeper error - failed recursive deletion on " + repTablePath, 
ex);
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new IllegalStateException("interrupted deleting " + repTablePath + 
" from ZooKeeper",
+          ex);
+    }
+  }
+
+  private void deleteReplicationConfigs(ZooReaderWriter zrw, InstanceId iid, 
PropStore propStore) {
+    List<PropStoreKey<?>> ids = getPropKeysFromZkIds(zrw, iid);

Review Comment:
   No, it is more that removing them from the cache - 
`propStore.removeProperties`  updates ZooKeeper.  The issue is when there are 
replication iterator configs specified in the the configuration - particularly 
the metadata table. But this is also checking the other ZooKeeper configs to 
make sure that if the replication iterator was set it is removed.
   
   In 2.1, the ZooKeeper config for the metadata table has these replication 
entries:
   
   ```
   root@uno> config -t accumulo.metadata -f replc
   
-----------+-------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------
   SCOPE      | NAME                                            | VALUE
   
-----------+-------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------
   table      | table.iterator.majc.replcombiner .............. | 
9,org.apache.accumulo.server.replication.StatusCombiner
   table      | table.iterator.majc.replcombiner.opt.columns .. | stat
   table      | table.iterator.minc.replcombiner .............. | 
9,org.apache.accumulo.server.replication.StatusCombiner
   table      | table.iterator.minc.replcombiner.opt.columns .. | stat
   table      | table.iterator.scan.replcombiner .............. | 
9,org.apache.accumulo.server.replication.StatusCombiner
   table      | table.iterator.scan.replcombiner.opt.columns .. | stat
   
-----------+-------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------
   
   ```
   
   And, the StatusCombiner, if present, will cause 3.0 code to fail because 
they have been removed from the code base. 
   
   And at this point, only the metadata and root tables are online.
   
   Not sure what to do if replication iterators have been set in the system 
config file - that's a manual removal - but if present they would likely cause 
the manager to go offline, if the upgrade sequence could complete.  The easiest 
way is for the upgrade instruction to include removing any replication 
configuration values with 2.1 down , but before starting 3.0.  There will 
likely be manual steps to completely remove replication remnants  - but 
functionally they will not prevent Accumulo from running. 
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to