Repository: systemml
Updated Branches:
  refs/heads/master 8fb74b158 -> 628ffad1b


http://git-wip-us.apache.org/repos/asf/systemml/blob/628ffad1/src/main/java/org/apache/sysml/utils/GPUStatistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/GPUStatistics.java 
b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
index c0f1d5e..e6ea4a7 100644
--- a/src/main/java/org/apache/sysml/utils/GPUStatistics.java
+++ b/src/main/java/org/apache/sysml/utils/GPUStatistics.java
@@ -26,7 +26,7 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.LongAdder;
 
 import org.apache.sysml.api.DMLScript;
 
@@ -35,190 +35,186 @@ import org.apache.sysml.api.DMLScript;
  * Printed as part of {@link Statistics}.
  */
 public class GPUStatistics {
-       //TODO fix formatting 
-       //TODO replace AtomicLong with LongAdder
-       
-       
-  // Whether or not extra per-instruction statistics will be recorded and 
shown for the GPU
-  public static boolean DISPLAY_STATISTICS = false;
-
-  private static int iNoOfExecutedGPUInst = 0;
-
-  public static long cudaInitTime = 0;
-  public static long cudaLibrariesInitTime = 0;
-  public static AtomicLong cudaSparseToDenseTime = new AtomicLong(0);          
// time spent in converting sparse matrix block to dense
-  public static AtomicLong cudaDenseToSparseTime = new AtomicLong(0);          
// time spent in converting dense matrix block to sparse
-  public static AtomicLong cudaSparseConversionTime = new AtomicLong(0);       
// time spent in converting between sparse block types
-  public static AtomicLong cudaSparseToDenseCount = new AtomicLong(0);
-  public static AtomicLong cudaDenseToSparseCount = new AtomicLong(0);
-  public static AtomicLong cudaSparseConversionCount = new AtomicLong(0);
-
-  public static AtomicLong cudaAllocTime = new AtomicLong(0);             // 
time spent in allocating memory on the GPU
-  public static AtomicLong cudaDeAllocTime = new AtomicLong(0);           // 
time spent in deallocating memory on the GPU
-  public static AtomicLong cudaMemSet0Time = new AtomicLong(0);           // 
time spent in setting memory to 0 on the GPU (part of reusing and for new 
allocates)
-  public static AtomicLong cudaToDevTime = new AtomicLong(0);             // 
time spent in copying data from host (CPU) to device (GPU) memory
-  public static AtomicLong cudaFromDevTime = new AtomicLong(0);           // 
time spent in copying data from device to host
-  public static AtomicLong cudaAllocCount = new AtomicLong(0);
-  public static AtomicLong cudaDeAllocCount = new AtomicLong(0);
-  public static AtomicLong cudaMemSet0Count = new AtomicLong(0);
-  public static AtomicLong cudaToDevCount = new AtomicLong(0);
-  public static AtomicLong cudaFromDevCount = new AtomicLong(0);
-  public static AtomicLong cudaEvictionCount = new AtomicLong(0);
-
-  // Per instruction miscellaneous timers.
-  // Used to record events in a CP Heavy Hitter instruction and
-  // provide a breakdown of how time was spent in that instruction
-  private static HashMap<String, HashMap<String, Long>> _cpInstMiscTime = new 
HashMap<String, HashMap<String, Long>> ();
-  private static HashMap<String, HashMap<String, Long>> _cpInstMiscCount = new 
HashMap<String, HashMap<String, Long>> ();
-
-  /**
-   * Resets the miscellaneous timers & counters
-   */
-  public static void resetMiscTimers(){
-    _cpInstMiscTime.clear();
-    _cpInstMiscCount.clear();
-  }
-
-  /**
-   * Resets all the cuda counters and timers, including the misc timers & 
counters
-   */
-  public static void reset(){
-    cudaInitTime = 0;
-    cudaLibrariesInitTime = 0;
-    cudaAllocTime.set(0);
-    cudaDeAllocTime.set(0);
-    cudaMemSet0Time.set(0);
-    cudaMemSet0Count.set(0);
-    cudaToDevTime.set(0);
-    cudaFromDevTime.set(0);
-    cudaAllocCount.set(0);
-    cudaDeAllocCount.set(0);
-    cudaToDevCount.set(0);
-    cudaFromDevCount.set(0);
-    cudaEvictionCount.set(0);
-    resetMiscTimers();
-  }
-
-
-  public static synchronized void setNoOfExecutedGPUInst(int numJobs) {
-    iNoOfExecutedGPUInst = numJobs;
-  }
-
-  public static synchronized void incrementNoOfExecutedGPUInst() {
-    iNoOfExecutedGPUInst ++;
-  }
-
-  public static synchronized int getNoOfExecutedGPUInst() {
-    return iNoOfExecutedGPUInst;
-  }
-
-  /**
-   * "Maintains" or adds time to miscellaneous timers per instruction/op, also 
increments associated count
-   * @param instructionName    name of the instruction/op
-   * @param miscTimer                          name of the miscellaneous timer
-   * @param timeNanos                          time in nano seconds
-   * @param incrementCount     how much to increment the count of the 
miscTimer by
-   */
-  public synchronized static void maintainCPMiscTimes( String instructionName, 
String miscTimer, long timeNanos, long incrementCount)
-  {
-    if (!(DISPLAY_STATISTICS || DMLScript.FINEGRAINED_STATISTICS))
-      return;
-
-    HashMap<String, Long> miscTimesMap = _cpInstMiscTime.get(instructionName);
-    if (miscTimesMap == null) {
-      miscTimesMap = new HashMap<String, Long>();
-      _cpInstMiscTime.put(instructionName, miscTimesMap);
-    }
-    Long oldVal = miscTimesMap.get(miscTimer);
-    Long newVal = timeNanos + ((oldVal!=null) ? oldVal : 0);
-    miscTimesMap.put(miscTimer, newVal);
-
-    HashMap<String, Long> miscCountMap = _cpInstMiscCount.get(instructionName);
-    if (miscCountMap == null){
-      miscCountMap = new HashMap<String, Long>();
-      _cpInstMiscCount.put(instructionName, miscCountMap);
-    }
-    Long oldCnt = miscCountMap.get(miscTimer);
-    Long newCnt = incrementCount + ((oldCnt!=null) ? oldCnt : 0);
-    miscCountMap.put(miscTimer, newCnt);
-  }
-
-  /**
-   * "Maintains" or adds time to miscellaneous timers per instruction/op, also 
increments associated count by 1
-   * @param instructionName    name of the instruction/op
-   * @param miscTimer                          name of the miscellaneous timer
-   * @param timeNanos                          time in nano seconds
-   */
-  public synchronized static void maintainCPMiscTimes( String instructionName, 
String miscTimer, long timeNanos){
-    maintainCPMiscTimes(instructionName, miscTimer, timeNanos, 1);
-  }
-
-  /**
-   * Used to print misc timers (and their counts) for a given instruction/op
-   * @param instructionName name of the instruction/op
-   * @return  a formatted string of misc timers for a given instruction/op
-   */
-  public static String getStringForCPMiscTimesPerInstruction(String 
instructionName) {
-    StringBuffer sb = new StringBuffer();
-    HashMap<String, Long> miscTimerMap = _cpInstMiscTime.get(instructionName);
-    if (miscTimerMap != null) {
-      List<Map.Entry<String, Long>> sortedList = new 
ArrayList<Map.Entry<String, Long>>(miscTimerMap.entrySet());
-      // Sort the times to display by the most expensive first
-      Collections.sort(sortedList, new Comparator<Map.Entry<String, Long>>() {
-        @Override
-        public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> 
o2) {
-          return (int) (o1.getValue() - o2.getValue());
-        }
-      });
-      Iterator<Map.Entry<String, Long>> miscTimeIter = sortedList.iterator();
-      HashMap<String, Long> miscCountMap = 
_cpInstMiscCount.get(instructionName);
-      while (miscTimeIter.hasNext()) {
-        Map.Entry<String, Long> e = miscTimeIter.next();
-        String miscTimerName = e.getKey();
-        Long miscTimerTime = e.getValue();
-        Long miscCount = miscCountMap.get(miscTimerName);
-        sb.append(miscTimerName + "[" + String.format("%.3f", (double) 
miscTimerTime / 1000000000.0) + "s," + miscCount + "]");
-        if (miscTimeIter.hasNext())
-          sb.append(", ");
-      }
-    }
-    return sb.toString();
-  }
-
-  /**
-   * Used to print out cuda timers & counters
-   * @return a formatted string of cuda timers & counters
-   */
-  public static String getStringForCudaTimers() {
-    StringBuffer sb = new StringBuffer();
-    sb.append("CUDA/CuLibraries init time:\t" + String.format("%.3f", 
cudaInitTime*1e-9) + "/"
-            + String.format("%.3f", cudaLibrariesInitTime*1e-9) + " sec.\n");
-    sb.append("Number of executed GPU inst:\t" + getNoOfExecutedGPUInst() + 
".\n");
-    sb.append("GPU mem tx time  (alloc/dealloc/set0/toDev/fromDev):\t"
-            + String.format("%.3f", cudaAllocTime.get()*1e-9) + "/"
-            + String.format("%.3f", cudaDeAllocTime.get()*1e-9) + "/"
-            + String.format("%.3f", cudaMemSet0Time.get()*1e-9) + "/"
-            + String.format("%.3f", cudaToDevTime.get()*1e-9) + "/"
-            + String.format("%.3f", cudaFromDevTime.get()*1e-9)  + " sec.\n");
-    sb.append("GPU mem tx count (alloc/dealloc/set0/toDev/fromDev/evict):\t"
-            + cudaAllocCount.get() + "/"
-            + cudaDeAllocCount.get() + "/"
-            + cudaMemSet0Count.get() + "/"
-            + cudaSparseConversionCount.get() + "/"
-            + cudaToDevCount.get() + "/"
-            + cudaFromDevCount.get() + "/"
-            + cudaEvictionCount.get() + ".\n");
-    sb.append("GPU conversion time  (sparseConv/sp2dense/dense2sp):\t"
-            + String.format("%.3f", cudaSparseConversionTime.get()*1e-9) + "/"
-            + String.format("%.3f", cudaSparseToDenseTime.get()*1e-9) + "/"
-            + String.format("%.3f", cudaDenseToSparseTime.get()*1e-9) + " 
sec.\n");
-    sb.append("GPU conversion count (sparseConv/sp2dense/dense2sp):\t"
-            + cudaSparseConversionCount.get() + "/"
-            + cudaSparseToDenseCount.get() + "/"
-            + cudaDenseToSparseCount.get() + ".\n");
-
-    return sb.toString();
-  }
+       // Whether or not extra per-instruction statistics will be recorded and 
shown for the GPU
+       public static boolean DISPLAY_STATISTICS = false;
+
+       private static int iNoOfExecutedGPUInst = 0;
+
+       public static long cudaInitTime = 0;
+       public static long cudaLibrariesInitTime = 0;
+       public static LongAdder cudaSparseToDenseTime = new LongAdder();        
        // time spent in converting sparse matrix block to dense
+       public static LongAdder cudaDenseToSparseTime = new LongAdder();        
        // time spent in converting dense matrix block to sparse
+       public static LongAdder cudaSparseConversionTime = new LongAdder();     
// time spent in converting between sparse block types
+       public static LongAdder cudaSparseToDenseCount = new LongAdder();
+       public static LongAdder cudaDenseToSparseCount = new LongAdder();
+       public static LongAdder cudaSparseConversionCount = new LongAdder();
+
+       public static LongAdder cudaAllocTime = new LongAdder();             // 
time spent in allocating memory on the GPU
+       public static LongAdder cudaDeAllocTime = new LongAdder();           // 
time spent in deallocating memory on the GPU
+       public static LongAdder cudaMemSet0Time = new LongAdder();           // 
time spent in setting memory to 0 on the GPU (part of reusing and for new 
allocates)
+       public static LongAdder cudaToDevTime = new LongAdder();             // 
time spent in copying data from host (CPU) to device (GPU) memory
+       public static LongAdder cudaFromDevTime = new LongAdder();           // 
time spent in copying data from device to host
+       public static LongAdder cudaAllocCount = new LongAdder();
+       public static LongAdder cudaDeAllocCount = new LongAdder();
+       public static LongAdder cudaMemSet0Count = new LongAdder();
+       public static LongAdder cudaToDevCount = new LongAdder();
+       public static LongAdder cudaFromDevCount = new LongAdder();
+       public static LongAdder cudaEvictionCount = new LongAdder();
+
+       // Per instruction miscellaneous timers.
+       // Used to record events in a CP Heavy Hitter instruction and
+       // provide a breakdown of how time was spent in that instruction
+       private static HashMap<String, HashMap<String, Long>> _cpInstMiscTime = 
new HashMap<String, HashMap<String, Long>> ();
+       private static HashMap<String, HashMap<String, Long>> _cpInstMiscCount 
= new HashMap<String, HashMap<String, Long>> ();
+
+       /**
+        * Resets the miscellaneous timers & counters
+        */
+       public static void resetMiscTimers(){
+               _cpInstMiscTime.clear();
+               _cpInstMiscCount.clear();
+       }
+
+       /**
+        * Resets all the cuda counters and timers, including the misc timers & 
counters
+        */
+       public static void reset(){
+               cudaInitTime = 0;
+               cudaLibrariesInitTime = 0;
+               cudaAllocTime.reset();
+               cudaDeAllocTime.reset();
+               cudaMemSet0Time.reset();
+               cudaMemSet0Count.reset();
+               cudaToDevTime.reset();
+               cudaFromDevTime.reset();
+               cudaAllocCount.reset();
+               cudaDeAllocCount.reset();
+               cudaToDevCount.reset();
+               cudaFromDevCount.reset();
+               cudaEvictionCount.reset();
+               resetMiscTimers();
+       }
+
+
+       public static synchronized void setNoOfExecutedGPUInst(int numJobs) {
+               iNoOfExecutedGPUInst = numJobs;
+       }
+
+       public static synchronized void incrementNoOfExecutedGPUInst() {
+               iNoOfExecutedGPUInst ++;
+       }
+
+       public static synchronized int getNoOfExecutedGPUInst() {
+               return iNoOfExecutedGPUInst;
+       }
+
+       /**
+        * "Maintains" or adds time to miscellaneous timers per instruction/op, 
also increments associated count
+        * @param instructionName       name of the instruction/op
+        * @param miscTimer                             name of the 
miscellaneous timer
+        * @param timeNanos                             time in nano seconds
+        * @param incrementCount        how much to increment the count of the 
miscTimer by
+        */
+       public synchronized static void maintainCPMiscTimes( String 
instructionName, String miscTimer, long timeNanos, long incrementCount)
+       {
+               if (!(DISPLAY_STATISTICS || DMLScript.FINEGRAINED_STATISTICS))
+                       return;
+
+               HashMap<String, Long> miscTimesMap = 
_cpInstMiscTime.get(instructionName);
+               if (miscTimesMap == null) {
+                       miscTimesMap = new HashMap<String, Long>();
+                       _cpInstMiscTime.put(instructionName, miscTimesMap);
+               }
+               Long oldVal = miscTimesMap.get(miscTimer);
+               Long newVal = timeNanos + ((oldVal!=null) ? oldVal : 0);
+               miscTimesMap.put(miscTimer, newVal);
+
+               HashMap<String, Long> miscCountMap = 
_cpInstMiscCount.get(instructionName);
+               if (miscCountMap == null){
+                       miscCountMap = new HashMap<String, Long>();
+                       _cpInstMiscCount.put(instructionName, miscCountMap);
+               }
+               Long oldCnt = miscCountMap.get(miscTimer);
+               Long newCnt = incrementCount + ((oldCnt!=null) ? oldCnt : 0);
+               miscCountMap.put(miscTimer, newCnt);
+       }
+
+       /**
+        * "Maintains" or adds time to miscellaneous timers per instruction/op, 
also increments associated count by 1
+        * @param instructionName       name of the instruction/op
+        * @param miscTimer                             name of the 
miscellaneous timer
+        * @param timeNanos                             time in nano seconds
+        */
+       public synchronized static void maintainCPMiscTimes( String 
instructionName, String miscTimer, long timeNanos){
+               maintainCPMiscTimes(instructionName, miscTimer, timeNanos, 1);
+       }
+
+       /**
+        * Used to print misc timers (and their counts) for a given 
instruction/op
+        * @param instructionName name of the instruction/op
+        * @return  a formatted string of misc timers for a given instruction/op
+        */
+       public static String getStringForCPMiscTimesPerInstruction(String 
instructionName) {
+               StringBuffer sb = new StringBuffer();
+               HashMap<String, Long> miscTimerMap = 
_cpInstMiscTime.get(instructionName);
+               if (miscTimerMap != null) {
+                       List<Map.Entry<String, Long>> sortedList = new 
ArrayList<Map.Entry<String, Long>>(miscTimerMap.entrySet());
+                       // Sort the times to display by the most expensive first
+                       Collections.sort(sortedList, new 
Comparator<Map.Entry<String, Long>>() {
+                               @Override
+                               public int compare(Map.Entry<String, Long> o1, 
Map.Entry<String, Long> o2) {
+                                       return (int) (o1.getValue() - 
o2.getValue());
+                               }
+                       });
+                       Iterator<Map.Entry<String, Long>> miscTimeIter = 
sortedList.iterator();
+                       HashMap<String, Long> miscCountMap = 
_cpInstMiscCount.get(instructionName);
+                       while (miscTimeIter.hasNext()) {
+                               Map.Entry<String, Long> e = miscTimeIter.next();
+                               String miscTimerName = e.getKey();
+                               Long miscTimerTime = e.getValue();
+                               Long miscCount = 
miscCountMap.get(miscTimerName);
+                               sb.append(miscTimerName + "[" + 
String.format("%.3f", (double) miscTimerTime / 1000000000.0) + "s," + miscCount 
+ "]");
+                               if (miscTimeIter.hasNext())
+                                       sb.append(", ");
+                       }
+               }
+               return sb.toString();
+       }
+
+       /**
+        * Used to print out cuda timers & counters
+        * @return a formatted string of cuda timers & counters
+        */
+       public static String getStringForCudaTimers() {
+               StringBuffer sb = new StringBuffer();
+               sb.append("CUDA/CuLibraries init time:\t" + 
String.format("%.3f", cudaInitTime*1e-9) + "/"
+                               + String.format("%.3f", 
cudaLibrariesInitTime*1e-9) + " sec.\n");
+               sb.append("Number of executed GPU inst:\t" + 
getNoOfExecutedGPUInst() + ".\n");
+               sb.append("GPU mem tx time  
(alloc/dealloc/set0/toDev/fromDev):\t"
+                               + String.format("%.3f", 
cudaAllocTime.longValue()*1e-9) + "/"
+                               + String.format("%.3f", 
cudaDeAllocTime.longValue()*1e-9) + "/"
+                               + String.format("%.3f", 
cudaMemSet0Time.longValue()*1e-9) + "/"
+                               + String.format("%.3f", 
cudaToDevTime.longValue()*1e-9) + "/"
+                               + String.format("%.3f", 
cudaFromDevTime.longValue()*1e-9)  + " sec.\n");
+               sb.append("GPU mem tx count 
(alloc/dealloc/set0/toDev/fromDev/evict):\t"
+                               + cudaAllocCount.longValue() + "/"
+                               + cudaDeAllocCount.longValue() + "/"
+                               + cudaMemSet0Count.longValue() + "/"
+                               + cudaSparseConversionCount.longValue() + "/"
+                               + cudaToDevCount.longValue() + "/"
+                               + cudaFromDevCount.longValue() + "/"
+                               + cudaEvictionCount.longValue() + ".\n");
+               sb.append("GPU conversion time  
(sparseConv/sp2dense/dense2sp):\t"
+                               + String.format("%.3f", 
cudaSparseConversionTime.longValue()*1e-9) + "/"
+                               + String.format("%.3f", 
cudaSparseToDenseTime.longValue()*1e-9) + "/"
+                               + String.format("%.3f", 
cudaDenseToSparseTime.longValue()*1e-9) + " sec.\n");
+               sb.append("GPU conversion count 
(sparseConv/sp2dense/dense2sp):\t"
+                               + cudaSparseConversionCount.longValue() + "/"
+                               + cudaSparseToDenseCount.longValue() + "/"
+                               + cudaDenseToSparseCount.longValue() + ".\n");
+
+               return sb.toString();
+       }
 
 
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/628ffad1/src/test/java/org/apache/sysml/test/gpu/RightIndexingTests.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/gpu/RightIndexingTests.java 
b/src/test/java/org/apache/sysml/test/gpu/RightIndexingTests.java
new file mode 100644
index 0000000..e891b7f
--- /dev/null
+++ b/src/test/java/org/apache/sysml/test/gpu/RightIndexingTests.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sysml.test.gpu;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.sysml.api.mlcontext.Matrix;
+import org.apache.sysml.test.utils.TestUtils;
+import org.junit.Test;
+
+/**
+ * This test uses the script: O = X[rl:ru,cl:cu]
+ */
+public class RightIndexingTests extends GPUTests {
+       
+       private final static String TEST_NAME = "RightIndexingTests";
+       private final int [] indexes1 = new int[] {1, 5, 10, 100};
+       private final int [] indexes2 = new int[] {1, 5, 10, 100};
+       private final double[] sparsities = new double[] { 0.0, 0.03, 0.3, 0.9 
};
+       private final int seed = 42;
+
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_DIR, TEST_NAME);
+               getAndLoadTestConfiguration(TEST_NAME);
+       }
+
+       @Test
+       public void runRightIndexingTest() {
+               int dim1 = Arrays.stream(indexes1).max().getAsInt();
+               int dim2 = Arrays.stream(indexes2).max().getAsInt();
+               for(int i1 = 0; i1 < indexes1.length; i1++) {
+                       for(int i2 = i1; i2 < indexes1.length; i2++) {
+                               for(int j1 = 0; j1 < indexes2.length; j1++) {
+                                       for(int j2 = j1; j2 < indexes2.length; 
j2++) {
+                                               int rl = indexes1[i1]; int ru = 
indexes1[i2];
+                                               int cl = indexes2[j1]; int cu = 
indexes2[j2];
+                                               for (int k = 0; k < 
sparsities.length; k++) {
+                                                       double sparsity = 
sparsities[k];
+                                                       Matrix X = 
generateInputMatrix(spark, dim1, dim2, sparsity, seed);
+                                                       Matrix Y = 
generateInputMatrix(spark, dim1, dim2, sparsity, seed);
+                                                       HashMap<String, Object> 
inputs = new HashMap<>();
+                                                       inputs.put("X", X);
+                                                       String scriptStr = "O = 
X[" + rl + ":" + ru + "," +  cl + ":" + cu + "];";
+                                                       
System.out.println("Executing the script: " + scriptStr);
+                                                       List<Object> cpuOut = 
runOnCPU(spark, scriptStr, inputs, Arrays.asList("O"));
+                                                       List<Object> gpuOut = 
runOnGPU(spark, scriptStr, inputs, Arrays.asList("O"));
+                                                       
assertEqualObjects(cpuOut.get(0), gpuOut.get(0));
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+}

Reply via email to