stefan-egli commented on code in PR #635: URL: https://github.com/apache/jackrabbit-oak/pull/635#discussion_r930849882
########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/mongo/MongoDocumentStoreThrottlingMetricsUpdater.java: ########## @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.jackrabbit.oak.plugins.document.mongo; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.math.DoubleMath; +import com.google.common.util.concurrent.AtomicDouble; +import com.google.common.util.concurrent.MoreExecutors; +import com.mongodb.client.MongoCollection; +import com.mongodb.client.MongoDatabase; +import org.bson.BsonTimestamp; +import org.bson.Document; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Objects; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; + +import static java.util.concurrent.TimeUnit.SECONDS; + +/** + * Mongo Document Store throttling metric updater. + * + * This class fetches and updates the mongo oplog window + */ +public class MongoDocumentStoreThrottlingMetricsUpdater { + + private static final Logger LOG = LoggerFactory.getLogger(MongoDocumentStoreThrottlingMetricsUpdater.class); + static final String TS_TIME = "ts"; + private static final String NATURAL = "$natural"; + private static final String MAX_SIZE = "maxSize"; + private static final String OPLOG_RS = "oplog.rs"; + public static final String SIZE = "size"; + private final ScheduledExecutorService throttlingMetricsExecutor; + private final AtomicDouble oplogWindow; + private final MongoDatabase localDb; + + public MongoDocumentStoreThrottlingMetricsUpdater(final @NotNull MongoDatabase localDb, final @NotNull AtomicDouble oplogWindow) { + // exiting scheduled executor, will exit when we call to shut down jvm + this.throttlingMetricsExecutor = MoreExecutors.getExitingScheduledExecutorService( + (ScheduledThreadPoolExecutor) Executors.newScheduledThreadPool(1), 40, SECONDS); + this.oplogWindow = oplogWindow; + this.localDb = localDb; + } + + public void updateMetrics() { Review Comment: Was wondering about the 3 warn cases : while it's unclear why those problems exist, it might be worth setting the `oplogWindow` to max_integer in that case - as basically we don't know how long such error situations last and what the impact is. So the fallback would be to say : "unknown oplog window", thus max_integer (currently it leaves the `oplogWindow` unchanged in warn cases, which might mean throttling even though perhaps in the meantime the oplog window grew - of course the opposite is also true, but given this is an exceptional situation I'd steer to the infinite side) ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/ThrottlingDocumentStoreWrapper.java: ########## @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.document.util; + +import org.apache.jackrabbit.oak.cache.CacheStats; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.Throttler; +import org.apache.jackrabbit.oak.plugins.document.UpdateOp; +import org.apache.jackrabbit.oak.plugins.document.DocumentStore; +import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; +import org.apache.jackrabbit.oak.plugins.document.cache.CacheInvalidationStats; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; + +import static java.lang.Thread.sleep; + +/** + * Wrapper of another DocumentStore that does a throttling check on any method + * invocation (create, update or delete) and throttled the system if under high load. + */ +public class ThrottlingDocumentStoreWrapper implements DocumentStore { + + private static final Logger LOG = LoggerFactory.getLogger(ThrottlingDocumentStoreWrapper.class); + + @NotNull + private final DocumentStore store; + public ThrottlingDocumentStoreWrapper(final @NotNull DocumentStore store) { + this.store = store; + } + + @Override + public <T extends Document> T find(final Collection<T> collection, final String key) { + return store.find(collection, key); + } + + @Override + public <T extends Document> T find(final Collection<T> collection, final String key, + final int maxCacheAge) { + return store.find(collection, key, maxCacheAge); + } + + @NotNull + @Override + public <T extends Document> List<T> query(final Collection<T> collection, final String fromKey, + final String toKey, final int limit) { + return store.query(collection, fromKey, toKey, limit); + } + + @Override + @NotNull + public <T extends Document> List<T> query(final Collection<T> collection, final String fromKey, + final String toKey, final String indexedProperty, + final long startValue, final int limit) { + return store.query(collection, fromKey, toKey, indexedProperty, startValue, limit); + } + + @Override + public <T extends Document> void remove(Collection<T> collection, String key) { + performThrottling(); + store.remove(collection, key); + } + + @Override + public <T extends Document> void remove(Collection<T> collection, List<String> keys) { + performThrottling(); + store.remove(collection, keys); + } + + @Override + public <T extends Document> int remove(final Collection<T> collection, final Map<String, Long> toRemove) { + performThrottling(); + return store.remove(collection, toRemove); + } + + @Override + public <T extends Document> int remove(final Collection<T> collection, final String indexedProperty, + final long startValue, final long endValue) throws DocumentStoreException { + performThrottling(); + return store.remove(collection, indexedProperty, startValue, endValue); + } + + @Override + public <T extends Document> boolean create(final Collection<T> collection, final List<UpdateOp> updateOps) { + performThrottling(); + return store.create(collection, updateOps); + } + + @Override + public <T extends Document> T createOrUpdate(final Collection<T> collection, final UpdateOp update) { + performThrottling(); + return store.createOrUpdate(collection, update); + } + + @Override + public <T extends Document> List<T> createOrUpdate(final Collection<T> collection, final List<UpdateOp> updateOps) { + performThrottling(); + return store.createOrUpdate(collection, updateOps); + } + + @Override + public <T extends Document> T findAndUpdate(final Collection<T> collection, final UpdateOp update) { + performThrottling(); + return store.findAndUpdate(collection, update); + } + + @Override + public CacheInvalidationStats invalidateCache() { + return store.invalidateCache(); + } + + @Override + public CacheInvalidationStats invalidateCache(Iterable<String> keys) { + return store.invalidateCache(keys); + } + + @Override + public <T extends Document> void invalidateCache(Collection<T> collection, String key) { + store.invalidateCache(collection, key); + } + + @Override + public void dispose() { + store.dispose(); + } + + @Override + public <T extends Document> T getIfCached(final Collection<T> collection, final String key) { + return store.getIfCached(collection, key); + } + + @Override + public void setReadWriteMode(String readWriteMode) { + store.setReadWriteMode(readWriteMode); + } + + @Override + public Iterable<CacheStats> getCacheStats() { + return store.getCacheStats(); + } + + @Override + public Map<String, String> getMetadata() { + return store.getMetadata(); + } + + @NotNull + @Override + public Map<String, String> getStats() { + return store.getStats(); + } + + @Override + public long determineServerTimeDifferenceMillis() { + return store.determineServerTimeDifferenceMillis(); + } + + /** + * Return the size limit for node name based on the document store implementation + * + * @return node name size limit + */ + @Override + public int getNodeNameLimit() { + return store.getNodeNameLimit(); + } + + /** + * Return the {@link Throttler} for the underlying store + * Default is no throttling + * + * @return throttler for document store + */ + @Override + public Throttler throttler() { + return store.throttler(); + } + + // helper methods + + private void performThrottling() { + Review Comment: I think we should not throttle the lease updates - even though currently the throttling times are lowish and probably don't interfere with the 2+ min lease durations. If the lease cannot be maintained, the throttling is equivalent to a shutdown. And that's probably not the intention. So, even though not throttling lease updates increases the use of oplog window while that is low, I'd argue lease updates are a high priority kind of data and could be excempted. One way to do that - with lease line changes - could for example be to change `performThrottling` like so: ``` private void performThrottling(final Collection<T> collection) { if (Collection.CLUSTER_NODES == collection) { // no throttling of CLUSTER_NODES collection return; } ... ``` ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/DocumentNodeStoreBuilder.java: ########## @@ -100,6 +100,20 @@ static final int MANY_CHILDREN_THRESHOLD = Integer.getInteger( "oak.documentMK.manyChildren", 50); + /** + * The threshold value after which the document store should start (if enabled) throttling. + */ + // For mongo based document store this value is threshold for the oplog replication window. + public static final int DEFAULT_THROTTLING_THRESHOLD = Integer.getInteger( + "oak.documentMK.throttlingThreshold", 2); + + /** + * The default throttling time (in millis) when throttling is enabled. This is the time for + * which we block any data modification operation when system has been throttled. + */ + public static final long DEFAULT_THROTTLING_TIME_MS = Long.getLong( + "oak.documentMK.throttlingTime", 20); + Review Comment: I think these could be moved to MongoDocumentStore (and then consequently renamed to `"oak.mongo..."`) as they are now specific to mongo only ########## oak-store-document/src/main/java/org/apache/jackrabbit/oak/plugins/document/util/ThrottlingDocumentStoreWrapper.java: ########## @@ -0,0 +1,216 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.document.util; + +import org.apache.jackrabbit.oak.cache.CacheStats; +import org.apache.jackrabbit.oak.plugins.document.Collection; +import org.apache.jackrabbit.oak.plugins.document.Document; +import org.apache.jackrabbit.oak.plugins.document.Throttler; +import org.apache.jackrabbit.oak.plugins.document.UpdateOp; +import org.apache.jackrabbit.oak.plugins.document.DocumentStore; +import org.apache.jackrabbit.oak.plugins.document.DocumentStoreException; +import org.apache.jackrabbit.oak.plugins.document.cache.CacheInvalidationStats; +import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; + +import static java.lang.Thread.sleep; + +/** + * Wrapper of another DocumentStore that does a throttling check on any method + * invocation (create, update or delete) and throttled the system if under high load. + */ +public class ThrottlingDocumentStoreWrapper implements DocumentStore { + + private static final Logger LOG = LoggerFactory.getLogger(ThrottlingDocumentStoreWrapper.class); + + @NotNull + private final DocumentStore store; + public ThrottlingDocumentStoreWrapper(final @NotNull DocumentStore store) { + this.store = store; + } + + @Override + public <T extends Document> T find(final Collection<T> collection, final String key) { + return store.find(collection, key); + } + + @Override + public <T extends Document> T find(final Collection<T> collection, final String key, + final int maxCacheAge) { + return store.find(collection, key, maxCacheAge); + } + + @NotNull + @Override + public <T extends Document> List<T> query(final Collection<T> collection, final String fromKey, + final String toKey, final int limit) { + return store.query(collection, fromKey, toKey, limit); + } + + @Override + @NotNull + public <T extends Document> List<T> query(final Collection<T> collection, final String fromKey, + final String toKey, final String indexedProperty, + final long startValue, final int limit) { + return store.query(collection, fromKey, toKey, indexedProperty, startValue, limit); + } + + @Override + public <T extends Document> void remove(Collection<T> collection, String key) { + performThrottling(); + store.remove(collection, key); + } + + @Override + public <T extends Document> void remove(Collection<T> collection, List<String> keys) { + performThrottling(); + store.remove(collection, keys); + } + + @Override + public <T extends Document> int remove(final Collection<T> collection, final Map<String, Long> toRemove) { + performThrottling(); + return store.remove(collection, toRemove); + } + + @Override + public <T extends Document> int remove(final Collection<T> collection, final String indexedProperty, + final long startValue, final long endValue) throws DocumentStoreException { + performThrottling(); + return store.remove(collection, indexedProperty, startValue, endValue); + } + + @Override + public <T extends Document> boolean create(final Collection<T> collection, final List<UpdateOp> updateOps) { + performThrottling(); + return store.create(collection, updateOps); + } + + @Override + public <T extends Document> T createOrUpdate(final Collection<T> collection, final UpdateOp update) { + performThrottling(); + return store.createOrUpdate(collection, update); + } + + @Override + public <T extends Document> List<T> createOrUpdate(final Collection<T> collection, final List<UpdateOp> updateOps) { + performThrottling(); + return store.createOrUpdate(collection, updateOps); + } + + @Override + public <T extends Document> T findAndUpdate(final Collection<T> collection, final UpdateOp update) { + performThrottling(); + return store.findAndUpdate(collection, update); + } + + @Override + public CacheInvalidationStats invalidateCache() { + return store.invalidateCache(); + } + + @Override + public CacheInvalidationStats invalidateCache(Iterable<String> keys) { + return store.invalidateCache(keys); + } + + @Override + public <T extends Document> void invalidateCache(Collection<T> collection, String key) { + store.invalidateCache(collection, key); + } + + @Override + public void dispose() { + store.dispose(); + } + + @Override + public <T extends Document> T getIfCached(final Collection<T> collection, final String key) { + return store.getIfCached(collection, key); + } + + @Override + public void setReadWriteMode(String readWriteMode) { + store.setReadWriteMode(readWriteMode); + } + + @Override + public Iterable<CacheStats> getCacheStats() { + return store.getCacheStats(); + } + + @Override + public Map<String, String> getMetadata() { + return store.getMetadata(); + } + + @NotNull + @Override + public Map<String, String> getStats() { + return store.getStats(); + } + + @Override + public long determineServerTimeDifferenceMillis() { + return store.determineServerTimeDifferenceMillis(); + } + + /** + * Return the size limit for node name based on the document store implementation + * + * @return node name size limit + */ + @Override + public int getNodeNameLimit() { + return store.getNodeNameLimit(); + } + + /** + * Return the {@link Throttler} for the underlying store + * Default is no throttling + * + * @return throttler for document store + */ + @Override + public Throttler throttler() { + return store.throttler(); + } + + // helper methods + + private void performThrottling() { + + final Throttler throttler = throttler(); + long throttleTime = throttler.throttlingTime(); + + if (throttleTime == 0) { + return; // no throttling + } + + try { + LOG.info("Throttling the system for {} ms", throttleTime); Review Comment: I agree that we should log at INFO if throttling is happening. I'm wondering though how noisy this one could become. Could we reduce the frequency of how much we log at INFO, for example only every 10 sec (without causing too much synchronization between the different threads calling `performThrottle`..)? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
