Repository: systemml Updated Branches: refs/heads/master 8fb74b158 -> 628ffad1b
http://git-wip-us.apache.org/repos/asf/systemml/blob/628ffad1/src/main/java/org/apache/sysml/utils/GPUStatistics.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/utils/GPUStatistics.java b/src/main/java/org/apache/sysml/utils/GPUStatistics.java index c0f1d5e..e6ea4a7 100644 --- a/src/main/java/org/apache/sysml/utils/GPUStatistics.java +++ b/src/main/java/org/apache/sysml/utils/GPUStatistics.java @@ -26,7 +26,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; import org.apache.sysml.api.DMLScript; @@ -35,190 +35,186 @@ import org.apache.sysml.api.DMLScript; * Printed as part of {@link Statistics}. */ public class GPUStatistics { - //TODO fix formatting - //TODO replace AtomicLong with LongAdder - - - // Whether or not extra per-instruction statistics will be recorded and shown for the GPU - public static boolean DISPLAY_STATISTICS = false; - - private static int iNoOfExecutedGPUInst = 0; - - public static long cudaInitTime = 0; - public static long cudaLibrariesInitTime = 0; - public static AtomicLong cudaSparseToDenseTime = new AtomicLong(0); // time spent in converting sparse matrix block to dense - public static AtomicLong cudaDenseToSparseTime = new AtomicLong(0); // time spent in converting dense matrix block to sparse - public static AtomicLong cudaSparseConversionTime = new AtomicLong(0); // time spent in converting between sparse block types - public static AtomicLong cudaSparseToDenseCount = new AtomicLong(0); - public static AtomicLong cudaDenseToSparseCount = new AtomicLong(0); - public static AtomicLong cudaSparseConversionCount = new AtomicLong(0); - - public static AtomicLong cudaAllocTime = new AtomicLong(0); // time spent in allocating memory on the GPU - public static AtomicLong cudaDeAllocTime = new AtomicLong(0); // time spent in deallocating memory on the GPU - public static AtomicLong cudaMemSet0Time = new AtomicLong(0); // time spent in setting memory to 0 on the GPU (part of reusing and for new allocates) - public static AtomicLong cudaToDevTime = new AtomicLong(0); // time spent in copying data from host (CPU) to device (GPU) memory - public static AtomicLong cudaFromDevTime = new AtomicLong(0); // time spent in copying data from device to host - public static AtomicLong cudaAllocCount = new AtomicLong(0); - public static AtomicLong cudaDeAllocCount = new AtomicLong(0); - public static AtomicLong cudaMemSet0Count = new AtomicLong(0); - public static AtomicLong cudaToDevCount = new AtomicLong(0); - public static AtomicLong cudaFromDevCount = new AtomicLong(0); - public static AtomicLong cudaEvictionCount = new AtomicLong(0); - - // Per instruction miscellaneous timers. - // Used to record events in a CP Heavy Hitter instruction and - // provide a breakdown of how time was spent in that instruction - private static HashMap<String, HashMap<String, Long>> _cpInstMiscTime = new HashMap<String, HashMap<String, Long>> (); - private static HashMap<String, HashMap<String, Long>> _cpInstMiscCount = new HashMap<String, HashMap<String, Long>> (); - - /** - * Resets the miscellaneous timers & counters - */ - public static void resetMiscTimers(){ - _cpInstMiscTime.clear(); - _cpInstMiscCount.clear(); - } - - /** - * Resets all the cuda counters and timers, including the misc timers & counters - */ - public static void reset(){ - cudaInitTime = 0; - cudaLibrariesInitTime = 0; - cudaAllocTime.set(0); - cudaDeAllocTime.set(0); - cudaMemSet0Time.set(0); - cudaMemSet0Count.set(0); - cudaToDevTime.set(0); - cudaFromDevTime.set(0); - cudaAllocCount.set(0); - cudaDeAllocCount.set(0); - cudaToDevCount.set(0); - cudaFromDevCount.set(0); - cudaEvictionCount.set(0); - resetMiscTimers(); - } - - - public static synchronized void setNoOfExecutedGPUInst(int numJobs) { - iNoOfExecutedGPUInst = numJobs; - } - - public static synchronized void incrementNoOfExecutedGPUInst() { - iNoOfExecutedGPUInst ++; - } - - public static synchronized int getNoOfExecutedGPUInst() { - return iNoOfExecutedGPUInst; - } - - /** - * "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count - * @param instructionName name of the instruction/op - * @param miscTimer name of the miscellaneous timer - * @param timeNanos time in nano seconds - * @param incrementCount how much to increment the count of the miscTimer by - */ - public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos, long incrementCount) - { - if (!(DISPLAY_STATISTICS || DMLScript.FINEGRAINED_STATISTICS)) - return; - - HashMap<String, Long> miscTimesMap = _cpInstMiscTime.get(instructionName); - if (miscTimesMap == null) { - miscTimesMap = new HashMap<String, Long>(); - _cpInstMiscTime.put(instructionName, miscTimesMap); - } - Long oldVal = miscTimesMap.get(miscTimer); - Long newVal = timeNanos + ((oldVal!=null) ? oldVal : 0); - miscTimesMap.put(miscTimer, newVal); - - HashMap<String, Long> miscCountMap = _cpInstMiscCount.get(instructionName); - if (miscCountMap == null){ - miscCountMap = new HashMap<String, Long>(); - _cpInstMiscCount.put(instructionName, miscCountMap); - } - Long oldCnt = miscCountMap.get(miscTimer); - Long newCnt = incrementCount + ((oldCnt!=null) ? oldCnt : 0); - miscCountMap.put(miscTimer, newCnt); - } - - /** - * "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count by 1 - * @param instructionName name of the instruction/op - * @param miscTimer name of the miscellaneous timer - * @param timeNanos time in nano seconds - */ - public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos){ - maintainCPMiscTimes(instructionName, miscTimer, timeNanos, 1); - } - - /** - * Used to print misc timers (and their counts) for a given instruction/op - * @param instructionName name of the instruction/op - * @return a formatted string of misc timers for a given instruction/op - */ - public static String getStringForCPMiscTimesPerInstruction(String instructionName) { - StringBuffer sb = new StringBuffer(); - HashMap<String, Long> miscTimerMap = _cpInstMiscTime.get(instructionName); - if (miscTimerMap != null) { - List<Map.Entry<String, Long>> sortedList = new ArrayList<Map.Entry<String, Long>>(miscTimerMap.entrySet()); - // Sort the times to display by the most expensive first - Collections.sort(sortedList, new Comparator<Map.Entry<String, Long>>() { - @Override - public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) { - return (int) (o1.getValue() - o2.getValue()); - } - }); - Iterator<Map.Entry<String, Long>> miscTimeIter = sortedList.iterator(); - HashMap<String, Long> miscCountMap = _cpInstMiscCount.get(instructionName); - while (miscTimeIter.hasNext()) { - Map.Entry<String, Long> e = miscTimeIter.next(); - String miscTimerName = e.getKey(); - Long miscTimerTime = e.getValue(); - Long miscCount = miscCountMap.get(miscTimerName); - sb.append(miscTimerName + "[" + String.format("%.3f", (double) miscTimerTime / 1000000000.0) + "s," + miscCount + "]"); - if (miscTimeIter.hasNext()) - sb.append(", "); - } - } - return sb.toString(); - } - - /** - * Used to print out cuda timers & counters - * @return a formatted string of cuda timers & counters - */ - public static String getStringForCudaTimers() { - StringBuffer sb = new StringBuffer(); - sb.append("CUDA/CuLibraries init time:\t" + String.format("%.3f", cudaInitTime*1e-9) + "/" - + String.format("%.3f", cudaLibrariesInitTime*1e-9) + " sec.\n"); - sb.append("Number of executed GPU inst:\t" + getNoOfExecutedGPUInst() + ".\n"); - sb.append("GPU mem tx time (alloc/dealloc/set0/toDev/fromDev):\t" - + String.format("%.3f", cudaAllocTime.get()*1e-9) + "/" - + String.format("%.3f", cudaDeAllocTime.get()*1e-9) + "/" - + String.format("%.3f", cudaMemSet0Time.get()*1e-9) + "/" - + String.format("%.3f", cudaToDevTime.get()*1e-9) + "/" - + String.format("%.3f", cudaFromDevTime.get()*1e-9) + " sec.\n"); - sb.append("GPU mem tx count (alloc/dealloc/set0/toDev/fromDev/evict):\t" - + cudaAllocCount.get() + "/" - + cudaDeAllocCount.get() + "/" - + cudaMemSet0Count.get() + "/" - + cudaSparseConversionCount.get() + "/" - + cudaToDevCount.get() + "/" - + cudaFromDevCount.get() + "/" - + cudaEvictionCount.get() + ".\n"); - sb.append("GPU conversion time (sparseConv/sp2dense/dense2sp):\t" - + String.format("%.3f", cudaSparseConversionTime.get()*1e-9) + "/" - + String.format("%.3f", cudaSparseToDenseTime.get()*1e-9) + "/" - + String.format("%.3f", cudaDenseToSparseTime.get()*1e-9) + " sec.\n"); - sb.append("GPU conversion count (sparseConv/sp2dense/dense2sp):\t" - + cudaSparseConversionCount.get() + "/" - + cudaSparseToDenseCount.get() + "/" - + cudaDenseToSparseCount.get() + ".\n"); - - return sb.toString(); - } + // Whether or not extra per-instruction statistics will be recorded and shown for the GPU + public static boolean DISPLAY_STATISTICS = false; + + private static int iNoOfExecutedGPUInst = 0; + + public static long cudaInitTime = 0; + public static long cudaLibrariesInitTime = 0; + public static LongAdder cudaSparseToDenseTime = new LongAdder(); // time spent in converting sparse matrix block to dense + public static LongAdder cudaDenseToSparseTime = new LongAdder(); // time spent in converting dense matrix block to sparse + public static LongAdder cudaSparseConversionTime = new LongAdder(); // time spent in converting between sparse block types + public static LongAdder cudaSparseToDenseCount = new LongAdder(); + public static LongAdder cudaDenseToSparseCount = new LongAdder(); + public static LongAdder cudaSparseConversionCount = new LongAdder(); + + public static LongAdder cudaAllocTime = new LongAdder(); // time spent in allocating memory on the GPU + public static LongAdder cudaDeAllocTime = new LongAdder(); // time spent in deallocating memory on the GPU + public static LongAdder cudaMemSet0Time = new LongAdder(); // time spent in setting memory to 0 on the GPU (part of reusing and for new allocates) + public static LongAdder cudaToDevTime = new LongAdder(); // time spent in copying data from host (CPU) to device (GPU) memory + public static LongAdder cudaFromDevTime = new LongAdder(); // time spent in copying data from device to host + public static LongAdder cudaAllocCount = new LongAdder(); + public static LongAdder cudaDeAllocCount = new LongAdder(); + public static LongAdder cudaMemSet0Count = new LongAdder(); + public static LongAdder cudaToDevCount = new LongAdder(); + public static LongAdder cudaFromDevCount = new LongAdder(); + public static LongAdder cudaEvictionCount = new LongAdder(); + + // Per instruction miscellaneous timers. + // Used to record events in a CP Heavy Hitter instruction and + // provide a breakdown of how time was spent in that instruction + private static HashMap<String, HashMap<String, Long>> _cpInstMiscTime = new HashMap<String, HashMap<String, Long>> (); + private static HashMap<String, HashMap<String, Long>> _cpInstMiscCount = new HashMap<String, HashMap<String, Long>> (); + + /** + * Resets the miscellaneous timers & counters + */ + public static void resetMiscTimers(){ + _cpInstMiscTime.clear(); + _cpInstMiscCount.clear(); + } + + /** + * Resets all the cuda counters and timers, including the misc timers & counters + */ + public static void reset(){ + cudaInitTime = 0; + cudaLibrariesInitTime = 0; + cudaAllocTime.reset(); + cudaDeAllocTime.reset(); + cudaMemSet0Time.reset(); + cudaMemSet0Count.reset(); + cudaToDevTime.reset(); + cudaFromDevTime.reset(); + cudaAllocCount.reset(); + cudaDeAllocCount.reset(); + cudaToDevCount.reset(); + cudaFromDevCount.reset(); + cudaEvictionCount.reset(); + resetMiscTimers(); + } + + + public static synchronized void setNoOfExecutedGPUInst(int numJobs) { + iNoOfExecutedGPUInst = numJobs; + } + + public static synchronized void incrementNoOfExecutedGPUInst() { + iNoOfExecutedGPUInst ++; + } + + public static synchronized int getNoOfExecutedGPUInst() { + return iNoOfExecutedGPUInst; + } + + /** + * "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count + * @param instructionName name of the instruction/op + * @param miscTimer name of the miscellaneous timer + * @param timeNanos time in nano seconds + * @param incrementCount how much to increment the count of the miscTimer by + */ + public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos, long incrementCount) + { + if (!(DISPLAY_STATISTICS || DMLScript.FINEGRAINED_STATISTICS)) + return; + + HashMap<String, Long> miscTimesMap = _cpInstMiscTime.get(instructionName); + if (miscTimesMap == null) { + miscTimesMap = new HashMap<String, Long>(); + _cpInstMiscTime.put(instructionName, miscTimesMap); + } + Long oldVal = miscTimesMap.get(miscTimer); + Long newVal = timeNanos + ((oldVal!=null) ? oldVal : 0); + miscTimesMap.put(miscTimer, newVal); + + HashMap<String, Long> miscCountMap = _cpInstMiscCount.get(instructionName); + if (miscCountMap == null){ + miscCountMap = new HashMap<String, Long>(); + _cpInstMiscCount.put(instructionName, miscCountMap); + } + Long oldCnt = miscCountMap.get(miscTimer); + Long newCnt = incrementCount + ((oldCnt!=null) ? oldCnt : 0); + miscCountMap.put(miscTimer, newCnt); + } + + /** + * "Maintains" or adds time to miscellaneous timers per instruction/op, also increments associated count by 1 + * @param instructionName name of the instruction/op + * @param miscTimer name of the miscellaneous timer + * @param timeNanos time in nano seconds + */ + public synchronized static void maintainCPMiscTimes( String instructionName, String miscTimer, long timeNanos){ + maintainCPMiscTimes(instructionName, miscTimer, timeNanos, 1); + } + + /** + * Used to print misc timers (and their counts) for a given instruction/op + * @param instructionName name of the instruction/op + * @return a formatted string of misc timers for a given instruction/op + */ + public static String getStringForCPMiscTimesPerInstruction(String instructionName) { + StringBuffer sb = new StringBuffer(); + HashMap<String, Long> miscTimerMap = _cpInstMiscTime.get(instructionName); + if (miscTimerMap != null) { + List<Map.Entry<String, Long>> sortedList = new ArrayList<Map.Entry<String, Long>>(miscTimerMap.entrySet()); + // Sort the times to display by the most expensive first + Collections.sort(sortedList, new Comparator<Map.Entry<String, Long>>() { + @Override + public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) { + return (int) (o1.getValue() - o2.getValue()); + } + }); + Iterator<Map.Entry<String, Long>> miscTimeIter = sortedList.iterator(); + HashMap<String, Long> miscCountMap = _cpInstMiscCount.get(instructionName); + while (miscTimeIter.hasNext()) { + Map.Entry<String, Long> e = miscTimeIter.next(); + String miscTimerName = e.getKey(); + Long miscTimerTime = e.getValue(); + Long miscCount = miscCountMap.get(miscTimerName); + sb.append(miscTimerName + "[" + String.format("%.3f", (double) miscTimerTime / 1000000000.0) + "s," + miscCount + "]"); + if (miscTimeIter.hasNext()) + sb.append(", "); + } + } + return sb.toString(); + } + + /** + * Used to print out cuda timers & counters + * @return a formatted string of cuda timers & counters + */ + public static String getStringForCudaTimers() { + StringBuffer sb = new StringBuffer(); + sb.append("CUDA/CuLibraries init time:\t" + String.format("%.3f", cudaInitTime*1e-9) + "/" + + String.format("%.3f", cudaLibrariesInitTime*1e-9) + " sec.\n"); + sb.append("Number of executed GPU inst:\t" + getNoOfExecutedGPUInst() + ".\n"); + sb.append("GPU mem tx time (alloc/dealloc/set0/toDev/fromDev):\t" + + String.format("%.3f", cudaAllocTime.longValue()*1e-9) + "/" + + String.format("%.3f", cudaDeAllocTime.longValue()*1e-9) + "/" + + String.format("%.3f", cudaMemSet0Time.longValue()*1e-9) + "/" + + String.format("%.3f", cudaToDevTime.longValue()*1e-9) + "/" + + String.format("%.3f", cudaFromDevTime.longValue()*1e-9) + " sec.\n"); + sb.append("GPU mem tx count (alloc/dealloc/set0/toDev/fromDev/evict):\t" + + cudaAllocCount.longValue() + "/" + + cudaDeAllocCount.longValue() + "/" + + cudaMemSet0Count.longValue() + "/" + + cudaSparseConversionCount.longValue() + "/" + + cudaToDevCount.longValue() + "/" + + cudaFromDevCount.longValue() + "/" + + cudaEvictionCount.longValue() + ".\n"); + sb.append("GPU conversion time (sparseConv/sp2dense/dense2sp):\t" + + String.format("%.3f", cudaSparseConversionTime.longValue()*1e-9) + "/" + + String.format("%.3f", cudaSparseToDenseTime.longValue()*1e-9) + "/" + + String.format("%.3f", cudaDenseToSparseTime.longValue()*1e-9) + " sec.\n"); + sb.append("GPU conversion count (sparseConv/sp2dense/dense2sp):\t" + + cudaSparseConversionCount.longValue() + "/" + + cudaSparseToDenseCount.longValue() + "/" + + cudaDenseToSparseCount.longValue() + ".\n"); + + return sb.toString(); + } } http://git-wip-us.apache.org/repos/asf/systemml/blob/628ffad1/src/test/java/org/apache/sysml/test/gpu/RightIndexingTests.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/gpu/RightIndexingTests.java b/src/test/java/org/apache/sysml/test/gpu/RightIndexingTests.java new file mode 100644 index 0000000..e891b7f --- /dev/null +++ b/src/test/java/org/apache/sysml/test/gpu/RightIndexingTests.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sysml.test.gpu; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; + +import org.apache.sysml.api.mlcontext.Matrix; +import org.apache.sysml.test.utils.TestUtils; +import org.junit.Test; + +/** + * This test uses the script: O = X[rl:ru,cl:cu] + */ +public class RightIndexingTests extends GPUTests { + + private final static String TEST_NAME = "RightIndexingTests"; + private final int [] indexes1 = new int[] {1, 5, 10, 100}; + private final int [] indexes2 = new int[] {1, 5, 10, 100}; + private final double[] sparsities = new double[] { 0.0, 0.03, 0.3, 0.9 }; + private final int seed = 42; + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + addTestConfiguration(TEST_DIR, TEST_NAME); + getAndLoadTestConfiguration(TEST_NAME); + } + + @Test + public void runRightIndexingTest() { + int dim1 = Arrays.stream(indexes1).max().getAsInt(); + int dim2 = Arrays.stream(indexes2).max().getAsInt(); + for(int i1 = 0; i1 < indexes1.length; i1++) { + for(int i2 = i1; i2 < indexes1.length; i2++) { + for(int j1 = 0; j1 < indexes2.length; j1++) { + for(int j2 = j1; j2 < indexes2.length; j2++) { + int rl = indexes1[i1]; int ru = indexes1[i2]; + int cl = indexes2[j1]; int cu = indexes2[j2]; + for (int k = 0; k < sparsities.length; k++) { + double sparsity = sparsities[k]; + Matrix X = generateInputMatrix(spark, dim1, dim2, sparsity, seed); + Matrix Y = generateInputMatrix(spark, dim1, dim2, sparsity, seed); + HashMap<String, Object> inputs = new HashMap<>(); + inputs.put("X", X); + String scriptStr = "O = X[" + rl + ":" + ru + "," + cl + ":" + cu + "];"; + System.out.println("Executing the script: " + scriptStr); + List<Object> cpuOut = runOnCPU(spark, scriptStr, inputs, Arrays.asList("O")); + List<Object> gpuOut = runOnGPU(spark, scriptStr, inputs, Arrays.asList("O")); + assertEqualObjects(cpuOut.get(0), gpuOut.get(0)); + } + } + } + } + } + } +}
