keith-turner commented on code in PR #3180:
URL: https://github.com/apache/accumulo/pull/3180#discussion_r1096872490


##########
server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java:
##########
@@ -353,6 +362,38 @@ public short getDefaultReplication(Path path) {
     return getFileSystemByPath(path).getDefaultReplication(path);
   }
 
+  /**
+   * The Hadoop Configuration object does not currently allow for duplicate 
properties to be set in
+   * a single Configuration for different FileSystem URIs. Here we will look 
for properties in the
+   * Accumulo configuration of the form:
+   *
+   * <pre>
+   * general.custom.&lt;volume-uri&gt;.&lt;hdfs-property&gt;
+   * </pre>
+   *
+   * We will use these properties to return a new Configuration object that 
can be used with the
+   * FileSystem URI.
+   *
+   * @param conf AccumuloConfiguration object
+   * @param hadoopConf Hadoop Configuration object
+   * @param filesystemURI Volume Filesystem URI
+   * @return Hadoop Configuration with custom overrides for this FileSystem
+   */
+  private static Configuration 
getVolumeManagerConfiguration(AccumuloConfiguration conf,
+      final Configuration hadoopConf, final String filesystemURI) {
+    final Configuration volumeConfig = new Configuration(hadoopConf);
+    final Map<String,String> customProps =
+        
conf.getAllPropertiesWithPrefixStripped(Property.GENERAL_ARBITRARY_PROP_PREFIX);
+    customProps.forEach((key, value) -> {
+      if (key.startsWith(filesystemURI)) {
+        String property = key.substring(filesystemURI.length() + 1);
+        log.debug("Overriding property {} to {} for volume {}", property, 
value, filesystemURI);

Review Comment:
   Really like the logging.  Does this logging only happen once per volume on a 
server?  If its only once, maybe could promote it to info.



##########
server/base/src/test/java/org/apache/accumulo/server/fs/VolumeManagerImplTest.java:
##########
@@ -104,4 +111,88 @@ public Scope getChooserScope() {
       assertThrows(RuntimeException.class, () -> vm.choose(chooserEnv, 
volumes));
     }
   }
+
+  @Test
+  public void testConfigurationOverrides() throws Exception {
+
+    final String vol1 = "file://127.0.0.1/vol1/";
+    final String vol2 = "file://localhost/vol2/";
+    final String vol3 = "hdfs://127.0.0.1/accumulo";
+    final String vol4 = "hdfs://localhost/accumulo";
+
+    ConfigurationCopy conf = new ConfigurationCopy();
+    conf.set(Property.INSTANCE_VOLUMES, String.join(",", vol1, vol2, vol3, 
vol4));
+    conf.set(Property.GENERAL_VOLUME_CHOOSER, 
Property.GENERAL_VOLUME_CHOOSER.getDefaultValue());
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol1 + "."
+        + HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, "10");
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol1 + "."

Review Comment:
   Could set a third override on one volume that no others set to ensure there 
is no interference for this case.  For example could set a third blocksize prop 
for only vol1. 



##########
server/base/src/test/java/org/apache/accumulo/server/fs/VolumeManagerImplTest.java:
##########
@@ -104,4 +111,88 @@ public Scope getChooserScope() {
       assertThrows(RuntimeException.class, () -> vm.choose(chooserEnv, 
volumes));
     }
   }
+
+  @Test
+  public void testConfigurationOverrides() throws Exception {
+
+    final String vol1 = "file://127.0.0.1/vol1/";
+    final String vol2 = "file://localhost/vol2/";
+    final String vol3 = "hdfs://127.0.0.1/accumulo";
+    final String vol4 = "hdfs://localhost/accumulo";
+
+    ConfigurationCopy conf = new ConfigurationCopy();
+    conf.set(Property.INSTANCE_VOLUMES, String.join(",", vol1, vol2, vol3, 
vol4));
+    conf.set(Property.GENERAL_VOLUME_CHOOSER, 
Property.GENERAL_VOLUME_CHOOSER.getDefaultValue());
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol1 + "."
+        + HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, "10");
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol1 + "."
+        + HdfsClientConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_READS, "true");
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol2 + "."
+        + HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, "20");
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol2 + "."
+        + HdfsClientConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_READS, "false");
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol3 + "."
+        + HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, "30");
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol3 + "."
+        + HdfsClientConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_READS, "TRUE");
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol4 + "."
+        + HdfsClientConfigKeys.HedgedRead.THREADPOOL_SIZE_KEY, "40");
+    conf.set(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol4 + "."
+        + HdfsClientConfigKeys.DFS_CLIENT_CACHE_DROP_BEHIND_READS, "FALSE");
+
+    VolumeManager vm = VolumeManagerImpl.get(conf, hadoopConf);
+
+    FileSystem fs1 = vm.getFileSystemByPath(new Path(vol1));
+    Configuration conf1 = fs1.getConf();
+
+    FileSystem fs2 = vm.getFileSystemByPath(new Path(vol2));
+    Configuration conf2 = fs2.getConf();
+
+    FileSystem fs3 = vm.getFileSystemByPath(new Path(vol3));
+    Configuration conf3 = fs3.getConf();
+
+    FileSystem fs4 = vm.getFileSystemByPath(new Path(vol4));
+    Configuration conf4 = fs4.getConf();

Review Comment:
   Could add a 5th volume that does not set any overrides to ensure there is no 
interference.



##########
server/base/src/main/java/org/apache/accumulo/server/fs/VolumeManagerImpl.java:
##########
@@ -353,6 +362,38 @@ public short getDefaultReplication(Path path) {
     return getFileSystemByPath(path).getDefaultReplication(path);
   }
 
+  /**
+   * The Hadoop Configuration object does not currently allow for duplicate 
properties to be set in
+   * a single Configuration for different FileSystem URIs. Here we will look 
for properties in the
+   * Accumulo configuration of the form:
+   *
+   * <pre>
+   * general.custom.&lt;volume-uri&gt;.&lt;hdfs-property&gt;
+   * </pre>
+   *
+   * We will use these properties to return a new Configuration object that 
can be used with the
+   * FileSystem URI.
+   *
+   * @param conf AccumuloConfiguration object
+   * @param hadoopConf Hadoop Configuration object
+   * @param filesystemURI Volume Filesystem URI
+   * @return Hadoop Configuration with custom overrides for this FileSystem
+   */
+  private static Configuration 
getVolumeManagerConfiguration(AccumuloConfiguration conf,
+      final Configuration hadoopConf, final String filesystemURI) {
+    final Configuration volumeConfig = new Configuration(hadoopConf);
+    final Map<String,String> customProps =
+        
conf.getAllPropertiesWithPrefixStripped(Property.GENERAL_ARBITRARY_PROP_PREFIX);

Review Comment:
   I would use a different prefix for these properties.  I think of the general 
arbitrary prefix as something plugins use.  This config is not related to a 
plugin, its for core functionality.  Could do something like the following.
   
   ```
   instance.volumes.dfs.config.<volume-uri>.<property-name>=<property-value>
   ```
   
   This config prefix also makes the config sort together with the 
instance.volumes config.



##########
test/src/main/java/org/apache/accumulo/test/VolumeManagerIT.java:
##########
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.util.Map;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
+import org.apache.accumulo.test.functional.ConfigurableMacBase;
+import org.apache.accumulo.test.functional.ReadWriteIT;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.junit.jupiter.api.Test;
+
+public class VolumeManagerIT extends ConfigurableMacBase {
+
+  private String vol1;
+  private String vol2;
+
+  @Override
+  protected void configure(MiniAccumuloConfigImpl cfg, Configuration 
hadoopCoreSite) {
+    cfg.useMiniDFS(true);
+    cfg.setFinalSiteConfigUpdater((config) -> {
+      vol1 = config.getSiteConfig().get(Property.INSTANCE_VOLUMES.getKey());
+      assertTrue(vol1.contains("localhost"));
+      vol2 = vol1.replace("localhost", "127.0.0.1");
+      config.setProperty(Property.INSTANCE_VOLUMES.getKey(), String.join(",", 
vol2, vol1));
+
+      // Set Volume specific HDFS overrides
+      config.setProperty("general.volume.chooser",
+          "org.apache.accumulo.core.spi.fs.PreferredVolumeChooser");
+      config.setProperty("general.custom.volume.preferred.default", vol1);
+      config.setProperty("general.custom.volume.preferred.logger", vol2);
+
+      // Need to escape the colons in the custom property volume because it's 
part of the key. It's
+      // being
+      // written out to a file and read in using the Properties object.
+      String vol1FileOutput = vol1.replaceAll(":", "\\\\:");
+      String vol2FileOutput = vol2.replaceAll(":", "\\\\:");
+      config.setProperty(
+          Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol1FileOutput + 
".dfs.blocksize",
+          "10485760");
+      config.setProperty(
+          Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol2FileOutput + 
".dfs.blocksize",
+          "104857600");
+      config.setProperty(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + 
vol1FileOutput
+          + ".dfs.client.use.datanode.hostname", "true");
+      config.setProperty(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + 
vol2FileOutput
+          + ".dfs.client.use.datanode.hostname", "false");
+      config.setProperty(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + 
vol1FileOutput
+          + ".dfs.client.hedged.read.threadpool.size", "0");
+      config.setProperty(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + 
vol2FileOutput
+          + ".dfs.client.hedged.read.threadpool.size", "1");
+    });
+  }
+
+  @Test
+  public void testHdfsConfigOverrides() throws Exception {
+    try (AccumuloClient c = 
Accumulo.newClient().from(getClientProperties()).build()) {
+
+      Map<String,String> siteConfig = 
c.instanceOperations().getSiteConfiguration();
+      assertEquals("10485760", siteConfig
+          .get(Property.GENERAL_ARBITRARY_PROP_PREFIX.getKey() + vol1 + 
".dfs.blocksize"));
+      assertEquals("104857600", siteConfig

Review Comment:
   `104857600` is so similar to `10485760` that at first glance I thought both 
were the same.  Could replace `104857600` with something more visually distinct 
like `51200000`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to