EdColeman commented on code in PR #2569: URL: https://github.com/apache/accumulo/pull/2569#discussion_r847316140
########## server/base/src/main/java/org/apache/accumulo/server/conf/store/impl/ZooPropStore.java: ########## @@ -0,0 +1,494 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.server.conf.store.impl; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; +import java.util.Objects; +import java.util.function.BiFunction; + +import org.apache.accumulo.core.data.InstanceId; +import org.apache.accumulo.core.metrics.MetricsUtil; +import org.apache.accumulo.fate.zookeeper.ZooReaderWriter; +import org.apache.accumulo.fate.zookeeper.ZooUtil; +import org.apache.accumulo.server.ServerContext; +import org.apache.accumulo.server.conf.codec.VersionedPropCodec; +import org.apache.accumulo.server.conf.codec.VersionedProperties; +import org.apache.accumulo.server.conf.store.PropCache; +import org.apache.accumulo.server.conf.store.PropCacheKey; +import org.apache.accumulo.server.conf.store.PropChangeListener; +import org.apache.accumulo.server.conf.store.PropStore; +import org.apache.accumulo.server.conf.store.PropStoreException; +import org.apache.accumulo.server.conf.util.ConfigTransformer; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.data.Stat; +import org.checkerframework.checker.nullness.qual.NonNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.github.benmanes.caffeine.cache.Ticker; +import com.google.common.annotations.VisibleForTesting; + +public class ZooPropStore implements PropStore, PropChangeListener { + + private final static Logger log = LoggerFactory.getLogger(ZooPropStore.class); + private final static VersionedPropCodec codec = VersionedPropCodec.getDefault(); + + private final ZooReaderWriter zrw; + private final PropStoreWatcher propStoreWatcher; + private final PropCache cache; + private final PropStoreMetrics cacheMetrics = new PropStoreMetrics(); + private final ReadyMonitor zkReadyMon; + + /** + * Create instance using ZooPropStore.Builder + * + * @param instanceId + * the instance id + * @param zrw + * a wrapper set of utilities for accessing ZooKeeper. + * @param readyMonitor + * coordination utility for ZooKeeper connection status. + * @param propStoreWatcher + * an extended ZooKeeper watcher + * @param ticker + * a synthetic clock used for testing. + */ + private ZooPropStore(final InstanceId instanceId, final ZooReaderWriter zrw, + final ReadyMonitor readyMonitor, final PropStoreWatcher propStoreWatcher, + final Ticker ticker) { + + this.zrw = zrw; + this.zkReadyMon = readyMonitor; + this.propStoreWatcher = propStoreWatcher; + + MetricsUtil.initializeProducers(cacheMetrics); + + ZooPropLoader propLoader = new ZooPropLoader(zrw, codec, propStoreWatcher, cacheMetrics); + + if (ticker == null) { + cache = new PropCacheCaffeineImpl.Builder(propLoader, cacheMetrics).build(); + } else { + cache = + new PropCacheCaffeineImpl.Builder(propLoader, cacheMetrics).withTicker(ticker).build(); + } + + try { + var path = ZooUtil.getRoot(instanceId); + if (zrw.exists(path, propStoreWatcher)) { + log.debug("Have a ZooKeeper connection and found instance node: {}", instanceId); + zkReadyMon.setReady(); + } else { + throw new IllegalStateException("Instance may not have been initialized, root node: " + path + + " does not exist in ZooKeeper"); + } + } catch (InterruptedException | KeeperException ex) { + throw new IllegalStateException("Failed to read root node " + instanceId + " from ZooKeeper", + ex); + } + } + + public static PropStore initialize(final InstanceId instanceId, final ZooReaderWriter zrw) { + return new ZooPropStore.Builder(instanceId, zrw, zrw.getSessionTimeout()).build(); + } + + /** + * Create the system configuration node and initialize with empty props - used when creating a new + * Accumulo instance. + * <p> + * Needs to be called early in the Accumulo ZooKeeper initialization sequence so that correct + * watchers can be created for the instance when the PropStore is instantiated. + * + * @param instanceId + * the instance uuid. + * @param zrw + * a ZooReaderWriter + */ + public static void instancePathInit(final InstanceId instanceId, final ZooReaderWriter zrw) + throws InterruptedException, KeeperException { + var sysPropPath = PropCacheKey.forSystem(instanceId).getPath(); + VersionedProperties vProps = new VersionedProperties(); + try { + var created = + zrw.putPersistentData(sysPropPath, codec.toBytes(vProps), ZooUtil.NodeExistsPolicy.FAIL); + if (!created) { + throw new IllegalStateException( + "Failed to create default system props during initialization at: {}" + sysPropPath); + } + } catch (IOException ex) { + throw new IllegalStateException( + "Failed to create default system props during initialization at: {}" + sysPropPath, ex); + } + } + + @Override + public boolean exists(final PropCacheKey propCacheKey) throws PropStoreException { + try { + if (zrw.exists(propCacheKey.getPath())) { + return true; + } + + } catch (KeeperException ex) { + // ignore Keeper exception on check. + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new PropStoreException("Interrupted testing if node exists", ex); + } + return false; + } + + /** + * Create initial system props for the instance. If the node already exists, no action is + * performed. + * + * @param context + * the server context. + * @param initProps + * map of k, v pairs of initial properties. + */ + public synchronized static void initSysProps(final ServerContext context, + final Map<String,String> initProps) { + PropCacheKey sysPropKey = PropCacheKey.forSystem(context.getInstanceID()); + createInitialProps(context, sysPropKey, initProps); + } + + /** + * Create initial properties if they do not exist. If the node exists, initialization will be + * skipped. + * + * @param context + * the system context + * @param propCacheKey + * a prop id + * @param props + * initial properties + */ + public static void createInitialProps(final ServerContext context, + final PropCacheKey propCacheKey, Map<String,String> props) { + + try { + ZooReaderWriter zrw = context.getZooReaderWriter(); + if (zrw.exists(propCacheKey.getPath())) { + return; + } + VersionedProperties vProps = new VersionedProperties(props); + zrw.putPersistentData(propCacheKey.getPath(), codec.toBytes(vProps), + ZooUtil.NodeExistsPolicy.FAIL); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new PropStoreException("Interrupted creating node " + propCacheKey, ex); + } catch (Exception ex) { + throw new PropStoreException("Failed to create node " + propCacheKey, ex); + } + } + + public PropStoreMetrics getMetrics() { + return cacheMetrics; + } + + @Override + public void create(PropCacheKey propCacheKey, Map<String,String> props) { + + try { + VersionedProperties vProps = new VersionedProperties(props); + String path = propCacheKey.getPath(); + zrw.putPersistentData(path, codec.toBytes(vProps), ZooUtil.NodeExistsPolicy.FAIL); + } catch (IOException | KeeperException | InterruptedException ex) { + throw new PropStoreException("Failed to serialize properties for " + propCacheKey, ex); + } + } + + /** + * get or create properties from the store. If the property node does not exist in ZooKeeper, + * legacy properties exist, they will be converted to the new storage form and naming convention. + * The legacy properties are deleted once the new node format is written. + * + * @param propCacheKey + * the prop cache key + * @return The versioned properties or null if the properties do not exist for the id. + * @throws PropStoreException + * if the updates fails because of an underlying store exception + */ + @Override + public @NonNull VersionedProperties get(final PropCacheKey propCacheKey) + throws PropStoreException { + checkZkConnection(); // if ZK not connected, block, do not just return a cached value. + propStoreWatcher.registerListener(propCacheKey, this); + + var props = cache.get(propCacheKey); + if (props != null) { + return props; + } + + return new ConfigTransformer(zrw, codec, propStoreWatcher).transform(propCacheKey); Review Comment: The transform code should only be called when the encoded props node does not exist. It is heavy weight - but should only run once and during initialization. In general, I tried to be conservative because the upgrade cannot be reversed once the original ZooKeeper properties are removed. The code can be run as a stand-alone executable before starting 2.1 for the first time and then the transform code is never called. It was an expressed goal that running that stand-alone utility should be optional and we should support just starting a 2.1 instance without a separate step. At a minimum, the conversion of the system property node must occur when any service starts so that the correct system configuration exists for that service. The upgrade code runs in the master and after the system configuration is needed - so some provision is necessary to upgrade the system properties before the upgrade code can be run. A primary design point was to allow tservers to be started first, without the master - and before the upgrade code could have run. To start a tserver, it must be able to read the system properties. With that, the design attempts to allow that without hammering ZooKeeper - as the tservers come on line hopefully only a few would even try to create the ephemeral node. Once that ephemeral node exists, any other process will wait for the conversion to complete. And only the one node with the lock would read individual legacy property nodes (a call to getChildren and then individual node reads) And once the encoded node exits, all other services will use that without even entering the transform path. Code that tried, but failed to get the ephemeral lock should block and then continue with the encoded node once it is created. The uuid in the ephemeral node is to ensure that the node created was created by that process and not someone else. The checking of the uuid later double checks that the same node is present - that seemed easier to reason about than trying to use events. The flow is that the code sees the encoded nod is missing, tries to create the ephemeral node, checks the node exists with the service uuid, reads any legacy properties, transforms them and persists the encoded node in ZooKeeper. At that point any service that starts later will use that encoded node. The check before the delete just double checks that the same service has the same ephemeral lock - if somehow it does not, the delete is not executed - that would allow an admin to manually try to correct things - and preserves the legacy properties if necessary for reference. The properties will have been persisted in the encoded node, so any deletion errors will not change that, but once the legacy properties are deleted, it is irreversible. Even with a partial delete of the legacy nodes, it should not matter - once the encoded node is written, that is the single source of "truth" for the system. The extra check is used because trying to recover any values could be challenging because either you printed / saved the values before the upgrade or you need to dig through the logs and try to find the values - and sensitive values are not going to be in the logs anyway. The code could have have treated the system props as a special case, but generalizing it to any node eliminates special handling if somehow things changed in the future. However, in practice the code should only need to execute for the system props. Any service on start will create the system node, and then wait for the master to start assignments. The master will start, convert all of the other nodes as part of the upgrade and then the transform code should never run again. The current code uses create calls when a namespace or table is created, this also allows setting properties at initialization time. But, if a node does not exist, the transform code would run, but should not have any properties to convert. This climates coding errors if create was not called - but this behavior will need to be reviewed for 2.2 to see if create calls should be optional. This code also only needs to exist for 2.1.x releases, a 2.2 or 3.x can assume that the encoded properties exist. (if there was a 2.2 release and someone wanted to upgrade from a 1.x or 2.0 to 2.2 they would need to first upgrade to a 2.1.x instance as an intermediate step) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
