EdColeman commented on code in PR #2569:
URL: https://github.com/apache/accumulo/pull/2569#discussion_r847316140


##########
server/base/src/main/java/org/apache/accumulo/server/conf/store/impl/ZooPropStore.java:
##########
@@ -0,0 +1,494 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.server.conf.store.impl;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.BiFunction;
+
+import org.apache.accumulo.core.data.InstanceId;
+import org.apache.accumulo.core.metrics.MetricsUtil;
+import org.apache.accumulo.fate.zookeeper.ZooReaderWriter;
+import org.apache.accumulo.fate.zookeeper.ZooUtil;
+import org.apache.accumulo.server.ServerContext;
+import org.apache.accumulo.server.conf.codec.VersionedPropCodec;
+import org.apache.accumulo.server.conf.codec.VersionedProperties;
+import org.apache.accumulo.server.conf.store.PropCache;
+import org.apache.accumulo.server.conf.store.PropCacheKey;
+import org.apache.accumulo.server.conf.store.PropChangeListener;
+import org.apache.accumulo.server.conf.store.PropStore;
+import org.apache.accumulo.server.conf.store.PropStoreException;
+import org.apache.accumulo.server.conf.util.ConfigTransformer;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.data.Stat;
+import org.checkerframework.checker.nullness.qual.NonNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.github.benmanes.caffeine.cache.Ticker;
+import com.google.common.annotations.VisibleForTesting;
+
+public class ZooPropStore implements PropStore, PropChangeListener {
+
+  private final static Logger log = 
LoggerFactory.getLogger(ZooPropStore.class);
+  private final static VersionedPropCodec codec = 
VersionedPropCodec.getDefault();
+
+  private final ZooReaderWriter zrw;
+  private final PropStoreWatcher propStoreWatcher;
+  private final PropCache cache;
+  private final PropStoreMetrics cacheMetrics = new PropStoreMetrics();
+  private final ReadyMonitor zkReadyMon;
+
+  /**
+   * Create instance using ZooPropStore.Builder
+   *
+   * @param instanceId
+   *          the instance id
+   * @param zrw
+   *          a wrapper set of utilities for accessing ZooKeeper.
+   * @param readyMonitor
+   *          coordination utility for ZooKeeper connection status.
+   * @param propStoreWatcher
+   *          an extended ZooKeeper watcher
+   * @param ticker
+   *          a synthetic clock used for testing.
+   */
+  private ZooPropStore(final InstanceId instanceId, final ZooReaderWriter zrw,
+      final ReadyMonitor readyMonitor, final PropStoreWatcher propStoreWatcher,
+      final Ticker ticker) {
+
+    this.zrw = zrw;
+    this.zkReadyMon = readyMonitor;
+    this.propStoreWatcher = propStoreWatcher;
+
+    MetricsUtil.initializeProducers(cacheMetrics);
+
+    ZooPropLoader propLoader = new ZooPropLoader(zrw, codec, propStoreWatcher, 
cacheMetrics);
+
+    if (ticker == null) {
+      cache = new PropCacheCaffeineImpl.Builder(propLoader, 
cacheMetrics).build();
+    } else {
+      cache =
+          new PropCacheCaffeineImpl.Builder(propLoader, 
cacheMetrics).withTicker(ticker).build();
+    }
+
+    try {
+      var path = ZooUtil.getRoot(instanceId);
+      if (zrw.exists(path, propStoreWatcher)) {
+        log.debug("Have a ZooKeeper connection and found instance node: {}", 
instanceId);
+        zkReadyMon.setReady();
+      } else {
+        throw new IllegalStateException("Instance may not have been 
initialized, root node: " + path
+            + " does not exist in ZooKeeper");
+      }
+    } catch (InterruptedException | KeeperException ex) {
+      throw new IllegalStateException("Failed to read root node " + instanceId 
+ " from ZooKeeper",
+          ex);
+    }
+  }
+
+  public static PropStore initialize(final InstanceId instanceId, final 
ZooReaderWriter zrw) {
+    return new ZooPropStore.Builder(instanceId, zrw, 
zrw.getSessionTimeout()).build();
+  }
+
+  /**
+   * Create the system configuration node and initialize with empty props - 
used when creating a new
+   * Accumulo instance.
+   * <p>
+   * Needs to be called early in the Accumulo ZooKeeper initialization 
sequence so that correct
+   * watchers can be created for the instance when the PropStore is 
instantiated.
+   *
+   * @param instanceId
+   *          the instance uuid.
+   * @param zrw
+   *          a ZooReaderWriter
+   */
+  public static void instancePathInit(final InstanceId instanceId, final 
ZooReaderWriter zrw)
+      throws InterruptedException, KeeperException {
+    var sysPropPath = PropCacheKey.forSystem(instanceId).getPath();
+    VersionedProperties vProps = new VersionedProperties();
+    try {
+      var created =
+          zrw.putPersistentData(sysPropPath, codec.toBytes(vProps), 
ZooUtil.NodeExistsPolicy.FAIL);
+      if (!created) {
+        throw new IllegalStateException(
+            "Failed to create default system props during initialization at: 
{}" + sysPropPath);
+      }
+    } catch (IOException ex) {
+      throw new IllegalStateException(
+          "Failed to create default system props during initialization at: {}" 
+ sysPropPath, ex);
+    }
+  }
+
+  @Override
+  public boolean exists(final PropCacheKey propCacheKey) throws 
PropStoreException {
+    try {
+      if (zrw.exists(propCacheKey.getPath())) {
+        return true;
+      }
+
+    } catch (KeeperException ex) {
+      // ignore Keeper exception on check.
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new PropStoreException("Interrupted testing if node exists", ex);
+    }
+    return false;
+  }
+
+  /**
+   * Create initial system props for the instance. If the node already exists, 
no action is
+   * performed.
+   *
+   * @param context
+   *          the server context.
+   * @param initProps
+   *          map of k, v pairs of initial properties.
+   */
+  public synchronized static void initSysProps(final ServerContext context,
+      final Map<String,String> initProps) {
+    PropCacheKey sysPropKey = PropCacheKey.forSystem(context.getInstanceID());
+    createInitialProps(context, sysPropKey, initProps);
+  }
+
+  /**
+   * Create initial properties if they do not exist. If the node exists, 
initialization will be
+   * skipped.
+   *
+   * @param context
+   *          the system context
+   * @param propCacheKey
+   *          a prop id
+   * @param props
+   *          initial properties
+   */
+  public static void createInitialProps(final ServerContext context,
+      final PropCacheKey propCacheKey, Map<String,String> props) {
+
+    try {
+      ZooReaderWriter zrw = context.getZooReaderWriter();
+      if (zrw.exists(propCacheKey.getPath())) {
+        return;
+      }
+      VersionedProperties vProps = new VersionedProperties(props);
+      zrw.putPersistentData(propCacheKey.getPath(), codec.toBytes(vProps),
+          ZooUtil.NodeExistsPolicy.FAIL);
+    } catch (InterruptedException ex) {
+      Thread.currentThread().interrupt();
+      throw new PropStoreException("Interrupted creating node " + 
propCacheKey, ex);
+    } catch (Exception ex) {
+      throw new PropStoreException("Failed to create node " + propCacheKey, 
ex);
+    }
+  }
+
+  public PropStoreMetrics getMetrics() {
+    return cacheMetrics;
+  }
+
+  @Override
+  public void create(PropCacheKey propCacheKey, Map<String,String> props) {
+
+    try {
+      VersionedProperties vProps = new VersionedProperties(props);
+      String path = propCacheKey.getPath();
+      zrw.putPersistentData(path, codec.toBytes(vProps), 
ZooUtil.NodeExistsPolicy.FAIL);
+    } catch (IOException | KeeperException | InterruptedException ex) {
+      throw new PropStoreException("Failed to serialize properties for " + 
propCacheKey, ex);
+    }
+  }
+
+  /**
+   * get or create properties from the store. If the property node does not 
exist in ZooKeeper,
+   * legacy properties exist, they will be converted to the new storage form 
and naming convention.
+   * The legacy properties are deleted once the new node format is written.
+   *
+   * @param propCacheKey
+   *          the prop cache key
+   * @return The versioned properties or null if the properties do not exist 
for the id.
+   * @throws PropStoreException
+   *           if the updates fails because of an underlying store exception
+   */
+  @Override
+  public @NonNull VersionedProperties get(final PropCacheKey propCacheKey)
+      throws PropStoreException {
+    checkZkConnection(); // if ZK not connected, block, do not just return a 
cached value.
+    propStoreWatcher.registerListener(propCacheKey, this);
+
+    var props = cache.get(propCacheKey);
+    if (props != null) {
+      return props;
+    }
+
+    return new ConfigTransformer(zrw, codec, 
propStoreWatcher).transform(propCacheKey);

Review Comment:
   The transform code should only be called when the encoded props node does 
not exist. It is heavy weight - but should only run once and during 
initialization. In general, I tried to be conservative because the upgrade 
cannot be reversed once the original ZooKeeper properties are removed.
   
   The code can be run as a stand-alone executable before starting 2.1 for the 
first time and then the transform code is never called. It was an expressed 
goal that running that stand-alone utility should be optional and we should 
support just starting a 2.1 instance without a separate step.  
   
   At a minimum, the conversion of the system property node must occur when any 
service starts so that the correct system configuration exists for that 
service. The upgrade code runs in the master and after the system configuration 
is needed - so some provision is necessary to upgrade the system properties 
before the upgrade code can be run.
   
   A primary design point was to allow tservers to be started first, without 
the master - and before the upgrade code could have run. To start a tserver, it 
must be able to read the system properties.  With that, the design attempts to 
allow that without hammering ZooKeeper - as the tservers come on line hopefully 
only a few would even try to create the ephemeral node. Once that ephemeral 
node exists, any other process will wait for the conversion to complete. And 
only the one node with the lock would read individual legacy property nodes (a 
call to getChildren and then individual node reads) And once the encoded node 
exits, all other services will use that without even entering the transform 
path. Code that tried, but failed to get the ephemeral lock should block and 
then continue with the encoded node once it is created.
   
   The uuid in the ephemeral node is to ensure that the node created was 
created by that process and not someone else.  The checking of the uuid later 
double checks that the same node is present - that seemed easier to reason 
about than trying to use events.
   
   The flow is that the code sees the encoded nod is missing, tries to create 
the ephemeral node, checks the node exists with the service uuid, reads any 
legacy properties, transforms them and persists the encoded node in ZooKeeper. 
At that point any service that starts later will use that encoded node.  The 
check before the delete just double checks that the same service has the same 
ephemeral lock - if somehow it does not, the delete is not executed - that 
would allow an admin to manually try to correct things - and preserves the 
legacy properties if necessary for reference.  
   
   The properties will have been persisted in the encoded node, so any deletion 
errors will not change that, but once the legacy properties are deleted, it is 
irreversible. Even with a partial delete of the legacy nodes, it should not 
matter - once the encoded node is written, that is the single source of "truth" 
for the system. 
   
   The extra check is used because trying to recover any values could be 
challenging because either you printed / saved the values before the upgrade or 
you need to dig through the logs and try to find the values - and sensitive 
values are not going to be in the logs anyway.  
   
   The code could have have treated the system props as a special case, but 
generalizing it to any node eliminates special handling if somehow things 
changed in the future. However, in practice the code should only need to 
execute for the system props. Any service on start will create the system node, 
and then wait for the master to start assignments. The master will start, 
convert all of the other nodes as part of the upgrade and then the transform 
code should never run again. 
   
   The current code uses create calls when a namespace or table is created, 
this also allows setting properties at initialization time. But, if a node does 
not exist, the transform code would run, but should not have any properties to 
convert. This climates coding errors if create was not called - but this 
behavior will need to be reviewed for 2.2 to see if create calls should be 
optional.
   
   This code also only needs to exist for 2.1.x releases, a 2.2 or 3.x can 
assume that the encoded properties exist. (if there was a 2.2 release and 
someone wanted to upgrade from a 1.x or 2.0 to 2.2 they would need to first 
upgrade to a 2.1.x instance as an intermediate step)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to