EdColeman commented on code in PR #2569: URL: https://github.com/apache/accumulo/pull/2569#discussion_r847836104
########## server/base/src/main/java/org/apache/accumulo/server/conf/util/ConfigTransformer.java: ########## @@ -0,0 +1,366 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.server.conf.util; + +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.concurrent.TimeUnit.MILLISECONDS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.accumulo.core.conf.DeprecatedPropertyUtil; +import org.apache.accumulo.core.util.DurationFormat; +import org.apache.accumulo.fate.util.Retry; +import org.apache.accumulo.fate.zookeeper.ZooReaderWriter; +import org.apache.accumulo.fate.zookeeper.ZooUtil; +import org.apache.accumulo.server.conf.codec.VersionedPropCodec; +import org.apache.accumulo.server.conf.codec.VersionedProperties; +import org.apache.accumulo.server.conf.store.PropCacheKey; +import org.apache.accumulo.server.conf.store.PropStoreException; +import org.apache.accumulo.server.conf.store.impl.PropStoreWatcher; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.data.Stat; +import org.checkerframework.checker.nullness.qual.NonNull; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Read legacy properties (pre 2.1) from ZooKeeper and transform them into the single node format. + * The encoded properties are stored in ZooKeeper and then the legacy property ZooKeeper nodes are + * deleted. + */ +public class ConfigTransformer { + + private static final Logger log = LoggerFactory.getLogger(ConfigTransformer.class); + + private final ZooReaderWriter zrw; + private final VersionedPropCodec codec; + private final PropStoreWatcher propStoreWatcher; + private final Retry retry; + + /** + * Instantiate a transformer instance. + * + * @param zrw + * a ZooReaderWriter + * @param codec + * the codec used to encode to the single-node format. + * @param propStoreWatcher + * the watcher registered to receive future notifications of changes to the encoded + * property node. + */ + public ConfigTransformer(final ZooReaderWriter zrw, VersionedPropCodec codec, + final PropStoreWatcher propStoreWatcher) { + this.zrw = zrw; + this.codec = codec; + this.propStoreWatcher = propStoreWatcher; + + // default - allow for a conservative max delay of about a minute + retry = + Retry.builder().maxRetries(15).retryAfter(250, MILLISECONDS).incrementBy(500, MILLISECONDS) + .maxWait(5, SECONDS).backOffFactor(1.75).logInterval(3, MINUTES).createRetry(); + + } + + public ConfigTransformer(final ZooReaderWriter zrw, VersionedPropCodec codec, + final PropStoreWatcher propStoreWatcher, final Retry retry) { + this.zrw = zrw; + this.codec = codec; + this.propStoreWatcher = propStoreWatcher; + this.retry = retry; + } + + /** + * Transform the properties for the provided prop cache key. + * + * @return the encoded properties. + */ + public VersionedProperties transform(final PropCacheKey propCacheKey) { + TransformLock lock = TransformLock.createLock(propCacheKey, zrw); + return transform(propCacheKey, lock); + } + + // Allow external (mocked) TransformLock to be used + @VisibleForTesting + VersionedProperties transform(final PropCacheKey propCacheKey, final TransformLock lock) { + + VersionedProperties results; + Instant start = Instant.now(); + try { + while (!lock.isLocked()) { + try { + retry.useRetry(); + retry.waitForNextAttempt(); + // look and return node if created. + if (zrw.exists(propCacheKey.getPath())) { + Stat stat = new Stat(); + byte[] bytes = zrw.getData(propCacheKey.getPath(), propStoreWatcher, stat); + return codec.fromBytes(stat.getVersion(), bytes); + } + // still does not exist - try again. + lock.lock(); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + throw new PropStoreException("Failed to get transform lock for " + propCacheKey, ex); + } catch (IllegalStateException ex) { + throw new PropStoreException("Failed to get transform lock for " + propCacheKey, ex); + } + } + + Set<LegacyPropNode> upgradeNodes = readLegacyProps(propCacheKey); + + upgradeNodes = convertDeprecatedProps(propCacheKey, upgradeNodes); + + results = writeConverted(propCacheKey, upgradeNodes); + + if (results == null) { + throw new PropStoreException("Could not create properties for " + propCacheKey, null); + } + + // validate lock still valid before deletion. + if (!lock.validateLock()) { Review Comment: The locking was refactored in fbb86bafe9 The two-level caching was existing behavior and was necessary to support the updateCount scheme used by the derivers - because of the frequency that the updateCount is checked, I attempted to avoid locking and synchronization except during updates. The updateCount deals with the property hierarchy - which seems expensive to compute and why the two-level caching was used in the first place. The refactored code in fbb86bafe9 should resolve the comments on locking and multiple calls to set the snapshot to null. The code may have race conditions because getProperties and getDataVersion in the configuration are made in separate methods and could result in calls to different snapshots - however, I think that any races would be handled because things will be eventually consistent. Access to the property map should always be the most recent, and if a call gets an earlier data version and then reads a more recent set of properties - those properties should be "more" correct - and then on the next data version check, the count will have advanced to match the properties. It will then re-read, re-build the configuration with the values that it already had, but they are still correct. This should be rare and seems a reasonable tradeoff - otherwise additional synchronization / locking would be needed to make sure they are strictly consistent - and that synchronization / locking likely would impact the deriver performance. (Well, that is the goal) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
