ayushtkn commented on code in PR #379:
URL: https://github.com/apache/tez/pull/379#discussion_r1878576065


##########
tez-api/src/main/java/org/apache/tez/common/counters/FileSystemCounter.java:
##########
@@ -19,16 +19,69 @@
 package org.apache.tez.common.counters;
 
 import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.fs.StorageStatistics.CommonStatisticNames;
 
+/**
+ * FileSystemCounter is an enum for defining which filesystem/storage 
statistics are exposed in Tez.
+ */
 @Private
 public enum FileSystemCounter {
-  BYTES_READ,
-  BYTES_WRITTEN,
-  READ_OPS,
-  LARGE_READ_OPS,
-  WRITE_OPS,
-  HDFS_BYTES_READ,
-  HDFS_BYTES_WRITTEN,
-  FILE_BYTES_READ,
-  FILE_BYTES_WRITTEN
+  BYTES_READ("bytesRead"),
+  BYTES_WRITTEN("bytesWritten"),
+  READ_OPS("readOps"),
+  LARGE_READ_OPS("largeReadOps"),
+  WRITE_OPS("writeOps"),
+
+  // Additional counters from HADOOP-13305
+  // Additional counters from HADOOP-13305

Review Comment:
   Duplicate line



##########
tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/TaskCounterUpdater.java:
##########
@@ -48,11 +49,16 @@ public class TaskCounterUpdater {
   private final TezCounters tezCounters;
   private final Configuration conf;
 
+//  /**
+//   * A Map where Key-> URIScheme and value->FileSystemStatisticUpdater
+//   */
+//  private Map<String, FileSystemStatisticUpdater> statisticUpdaters =
+//     new HashMap<>();

Review Comment:
   why are you commenting this out? Can't we just delete it



##########
tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/TaskCounterUpdater.java:
##########
@@ -69,32 +75,16 @@ public TaskCounterUpdater(TezCounters counters, 
Configuration conf, String pid)
 
   
   public void updateCounters() {
-    // FileSystemStatistics are reset each time a new task is seen by the
-    // container.
-    // This doesn't remove the fileSystem, and does not clear all statistics -
-    // so there is a potential of an unused FileSystem showing up for a
-    // Container, and strange values for READ_OPS etc.
-    Map<String, List<FileSystem.Statistics>> map = new
-        HashMap<String, List<FileSystem.Statistics>>();
-    for(Statistics stat: FileSystem.getAllStatistics()) {
-      String uriScheme = stat.getScheme();
-      if (map.containsKey(uriScheme)) {
-        List<FileSystem.Statistics> list = map.get(uriScheme);
-        list.add(stat);
-      } else {
-        List<FileSystem.Statistics> list = new 
ArrayList<FileSystem.Statistics>();
-        list.add(stat);
-        map.put(uriScheme, list);
-      }
-    }
-    for (Map.Entry<String, List<FileSystem.Statistics>> entry: map.entrySet()) 
{
-      FileSystemStatisticUpdater updater = 
statisticUpdaters.get(entry.getKey());
-      if(updater==null) {//new FileSystem has been found in the cache
-        updater =
-            new FileSystemStatisticUpdater(tezCounters, entry.getValue(),
-                entry.getKey());
-        statisticUpdaters.put(entry.getKey(), updater);
-      }
+    GlobalStorageStatistics globalStorageStatistics = 
FileSystem.getGlobalStorageStatistics();
+    Iterator<StorageStatistics> iter = globalStorageStatistics.iterator();
+    while (iter.hasNext()) {
+      StorageStatistics stats = iter.next();
+      // Fetch or initialize the updater set for the scheme
+      Map<String, FileSystemStatisticUpdater> updaterSet = statisticUpdaters
+          .computeIfAbsent(stats.getScheme(), k -> new TreeMap<>());

Review Comment:
   Why are we using `TreeMap` now? If I decode right, earlier it was `HashMap`, 
it would be some cost using it right?



##########
tez-runtime-internals/src/main/java/org/apache/tez/runtime/metrics/TaskCounterUpdater.java:
##########
@@ -48,11 +49,16 @@ public class TaskCounterUpdater {
   private final TezCounters tezCounters;
   private final Configuration conf;
 
+//  /**
+//   * A Map where Key-> URIScheme and value->FileSystemStatisticUpdater
+//   */
+//  private Map<String, FileSystemStatisticUpdater> statisticUpdaters =
+//     new HashMap<>();
   /**
-   * A Map where Key-> URIScheme and value->FileSystemStatisticUpdater
+   * A Map where Key-> URIScheme and value->Map<Name, 
FileSystemStatisticUpdater>
    */
-  private Map<String, FileSystemStatisticUpdater> statisticUpdaters =
-     new HashMap<String, FileSystemStatisticUpdater>();
+  private Map<String, Map<String, FileSystemStatisticUpdater>> 
statisticUpdaters =
+      new HashMap<>();

Review Comment:
   this we can make `final`



##########
tez-runtime-internals/src/test/java/org/apache/tez/runtime/metrics/TestFileSystemStatisticUpdater.java:
##########
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.runtime.metrics;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.tez.common.counters.FileSystemCounter;
+import org.apache.tez.common.counters.TezCounter;
+import org.apache.tez.common.counters.TezCounters;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestFileSystemStatisticUpdater {
+
+  private static final Logger LOG = LoggerFactory.getLogger(
+      TestFileSystemStatisticUpdater.class);
+
+  private static MiniDFSCluster dfsCluster;
+
+  private static final Configuration CONF = new Configuration();
+  private static FileSystem remoteFs;
+
+  private static final String TEST_ROOT_DIR = "target" + Path.SEPARATOR +
+      TestFileSystemStatisticUpdater.class.getName() + "-tmpDir";
+
+  @BeforeClass
+  public static void setup() throws IOException {
+    try {
+      CONF.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, TEST_ROOT_DIR);
+      dfsCluster = new MiniDFSCluster.Builder(CONF).numDataNodes(2).build();
+      remoteFs = dfsCluster.getFileSystem();
+    } catch (IOException io) {
+      throw new RuntimeException("problem starting mini dfs cluster", io);
+    }
+  }
+
+  @AfterClass
+  public static void tearDown() {
+    if (dfsCluster != null) {
+      dfsCluster.shutdown();
+      dfsCluster = null;
+    }
+  }
+
+  @Test
+  public void basicTest() throws IOException {
+    TezCounters counters = new TezCounters();
+    TaskCounterUpdater updater = new TaskCounterUpdater(counters, CONF, "pid");
+
+    DFSTestUtil.writeFile(remoteFs, new Path("/tmp/foo/abc.txt"), "xyz");
+
+    updater.updateCounters();

Review Comment:
   shouldn't we first do 
   ```
       FileSystem.clearStatistics();
   ```
   In case there is any test added in future which does FS operations, I 
believe this test will screw up. So better to reset everything to 0, before we 
start testing
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to