dlmarion commented on code in PR #2665: URL: https://github.com/apache/accumulo/pull/2665#discussion_r939180215
########## core/src/main/java/org/apache/accumulo/core/spi/scan/DefaultScanServerSelector.java: ########## @@ -0,0 +1,415 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.core.spi.scan; + +import static java.nio.charset.StandardCharsets.UTF_8; + +import java.lang.reflect.Type; +import java.security.SecureRandom; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.function.Supplier; + +import org.apache.accumulo.core.conf.ConfigurationTypeHelper; +import org.apache.accumulo.core.data.TabletId; + +import com.google.common.base.Preconditions; +import com.google.common.base.Suppliers; +import com.google.common.collect.Sets; +import com.google.common.hash.HashCode; +import com.google.common.hash.Hashing; +import com.google.gson.Gson; +import com.google.gson.reflect.TypeToken; + +import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; + +/** + * The default Accumulo selector for scan servers. This dispatcher will : + * + * <ul> + * <li>Hash each tablet to a per attempt configurable number of scan servers and then randomly + * choose one of those scan servers. Using hashing allows different client to select the same scan + * servers for a given tablet.</li> + * <li>Use a per attempt configurable busy timeout.</li> + * </ul> + * + * <p> + * This class accepts a single configuration that has a json value. To configure this class set + * {@code scan.server.selector.opts.profiles=<json>} in the accumulo client configuration along with + * the config for the class. The following is the default configuration value. + * </p> + * <p> + * {@value DefaultScanServerSelector#PROFILES_DEFAULT} + * </p> + * + * The json is structured as a list of profiles, with each profile having the following fields. + * + * <ul> + * <li><b>isDefault : </b> A boolean that specifies whether this is the default profile. One and + * only one profile must set this to true.</li> + * <li><b>maxBusyTimeout : </b> The maximum busy timeout to use. The busy timeout from the last + * attempt configuration grows exponentially up to this max.</li> + * <li><b>scanTypeActivations : </b> A list of scan types that will activate this profile. Scan + * types are specified by setting {@code scan_type=<scan_type>} as execution on the scanner. See + * {@link org.apache.accumulo.core.client.ScannerBase#setExecutionHints(Map)}</li> + * <li><b>group : </b> Scan servers can be started with an optional group. If specified, this option + * will limit the scan servers used to those that were started with this group name. If not + * specified, the set of scan servers that did not specify a group will be used. Grouping scan + * servers supports at least two use cases. First groups can be used to dedicate resources for + * certain scans. Second groups can be used to have different hardware/VM types for scans, for + * example could have some scans use expensive high memory VMs and others use cheaper burstable VMs. + * <li><b>attemptPlans : </b> A list of configuration to use for each scan attempt. Each list object + * has the following fields: + * <ul> + * <li><b>servers : </b> The number of servers to randomly choose from for this attempt.</li> + * <li><b>busyTimeout : </b> The busy timeout to use for this attempt.</li> + * <li><b>salt : </b> An optional string to append when hashing the tablet. When this is set + * differently for attempts it has the potential to cause the set of servers chosen from to be + * disjoint. When not set or the same, the servers between attempts will be subsets.</li> + * </ul> + * </li> + * </ul> + * + * <p> + * Below is an example configuration with two profiles, one is the default and the other is used + * when the scan execution hint {@code scan_type=slow} is set. + * </p> + * + * <pre> + * [ + * { + * "isDefault":true, + * "maxBusyTimeout":"5m", + * "busyTimeoutMultiplier":4, + * "attemptPlans":[ + * {"servers":"3", "busyTimeout":"33ms"}, + * {"servers":"100%", "busyTimeout":"100ms"} + * ] + * }, + * { + * "scanTypeActivations":["slow"], + * "maxBusyTimeout":"20m", + * "busyTimeoutMultiplier":8, + * "group":"lowcost", + * "attemptPlans":[ + * {"servers":"1", "busyTimeout":"10s"}, + * {"servers":"3", "busyTimeout":"30s","salt":"42"}, + * {"servers":"9", "busyTimeout":"60s","salt":"84"} + * ] + * } + * ] + * </pre> + * + * <p> + * For the default profile in the example it will start off by choosing randomly from 3 scan servers + * based on a hash of the tablet with no salt. For the first attempt it will use a busy timeout of + * 33 milliseconds. If the first attempt returns with busy, then it will randomly choose from 100% + * or all servers for the second attempt and use a busy timeout of 100ms. For subsequent attempts it + * will keep choosing from all servers and start multiplying the busy timeout by 4 until the max + * busy timeout of 4 minutes is reached. + * </p> + * + * <p> + * For the profile activated by {@code scan_type=slow} it start off by choosing randomly from 1 scan + * server based on a hash of the tablet with no salt and a busy timeout of 10s. The second attempt + * will choose from 3 scan servers based on a hash of the tablet plus the salt {@literal 42}. + * Without the salt, the single scan servers from the first attempt would always be included in the + * set of 3. With the salt the single scan server from the first attempt may not be included. The + * third attempt will choose a scan server from 9 using the salt {@literal 84} and a busy timeout of + * 60s. The different salt means the set of servers that attempts 2 and 3 choose from may be + * disjoint. Attempt 4 and greater will continue to choose from the same 9 servers as attempt 3 and + * will keep increasing the busy timeout by multiplying 8 until the maximum of 20 minutes is + * reached. For this profile it will choose from scan servers in the group {@literal lowcost}. + * </p> + */ +public class DefaultScanServerSelector implements ScanServerSelector { Review Comment: Addressed in 9ec7f5f. Renamed class to ConfigurableScanServerSelector. I wasn't sure what to call the strategy. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
