http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java deleted file mode 100644 index c130031..0000000 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestTool.java +++ /dev/null @@ -1,1411 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.controlprogram.parfor.opt; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.FileReader; -import java.io.FileWriter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.Map.Entry; -import java.util.Random; -import java.util.StringTokenizer; - -import javax.xml.stream.XMLInputFactory; -import javax.xml.stream.XMLOutputFactory; -import javax.xml.stream.XMLStreamConstants; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; -import javax.xml.stream.XMLStreamWriter; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.sysml.api.DMLException; -import org.apache.sysml.api.DMLScript; -import org.apache.sysml.conf.ConfigurationManager; -import org.apache.sysml.lops.Lop; -import org.apache.sysml.lops.MMTSJ.MMTSJType; -import org.apache.sysml.parser.DMLProgram; -import org.apache.sysml.parser.DataIdentifier; -import org.apache.sysml.parser.Expression.DataType; -import org.apache.sysml.parser.Expression.ValueType; -import org.apache.sysml.parser.ExternalFunctionStatement; -import org.apache.sysml.parser.ParseException; -import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlockCP; -import org.apache.sysml.runtime.controlprogram.LocalVariableMap; -import org.apache.sysml.runtime.controlprogram.Program; -import org.apache.sysml.runtime.controlprogram.ProgramBlock; -import org.apache.sysml.runtime.controlprogram.caching.CacheException; -import org.apache.sysml.runtime.controlprogram.caching.LazyWriteBuffer; -import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; -import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; -import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory; -import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.DataFormat; -import org.apache.sysml.runtime.controlprogram.parfor.opt.CostEstimator.TestMeasure; -import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing; -import org.apache.sysml.runtime.controlprogram.parfor.util.IDHandler; -import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence; -import org.apache.sysml.runtime.instructions.CPInstructionParser; -import org.apache.sysml.runtime.instructions.Instruction; -import org.apache.sysml.runtime.instructions.MRJobInstruction; -import org.apache.sysml.runtime.instructions.cp.Data; -import org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction; -import org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction; -import org.apache.sysml.runtime.io.IOUtilFunctions; -import org.apache.sysml.runtime.matrix.MatrixCharacteristics; -import org.apache.sysml.runtime.matrix.MatrixFormatMetaData; -import org.apache.sysml.runtime.matrix.data.InputInfo; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; -import org.apache.sysml.runtime.matrix.data.OutputInfo; -import org.apache.sysml.runtime.util.MapReduceTool; - -import au.com.bytecode.opencsv.CSVReader; -import au.com.bytecode.opencsv.CSVWriter; - -/** - * DML Instructions Performance Test Tool: - * - * Creates an offline performance profile (required once per installation) of DML instructions. - * The profile is a combination of all individual statistical models trained per combination of - * instruction and test configuration. In order to train those models, we execute and measure - * real executions of DML instructions on random input data. Finally, during runtime, the profile - * is used by the costs estimator in order to create statistic estimates for cost-based optimization. - * - * - */ -@Deprecated -public class PerfTestTool -{ - - //public parameters (used for estimation) - public static final long MIN_DATASIZE = 1000; - public static final long MAX_DATASIZE = 1000000; - public static final long DEFAULT_DATASIZE = 500000;//(MAX_DATASIZE-MIN_DATASIZE)/2; - public static final long DATASIZE_MR_SCALE = 20; - public static final double MIN_SPARSITY = 0.1; - public static final double MAX_SPARSITY = 1.0; - public static final double DEFAULT_SPARSITY = 0.5;//(MAX_SPARSITY-MIN_SPARSITY)/2; - - //internal parameters - private static final boolean READ_STATS_ON_STARTUP = false; - private static final int TEST_REPETITIONS = 10; - private static final int NUM_SAMPLES_PER_TEST = 11; - private static final int MODEL_MAX_ORDER = 2; - private static final boolean MODEL_INTERCEPT = true; - - private static final String PERF_TOOL_DIR = "./conf/PerfTestTool/"; -// private static final String PERF_RESULTS_FNAME = PERF_TOOL_DIR + "%id%.dat"; - private static final String PERF_PROFILE_FNAME = PERF_TOOL_DIR + "performance_profile.xml"; - private static final String DML_SCRIPT_FNAME = "./src/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml"; - private static final String DML_TMP_FNAME = PERF_TOOL_DIR + "temp.dml"; - - //XML profile tags and attributes - private static final String XML_PROFILE = "profile"; - private static final String XML_DATE = "date"; - private static final String XML_INSTRUCTION = "instruction"; - private static final String XML_ID = "id"; - private static final String XML_NAME = "name"; - private static final String XML_COSTFUNCTION = "cost_function"; - private static final String XML_MEASURE = "measure"; - private static final String XML_VARIABLE = "lvariable"; - private static final String XML_INTERNAL_VARIABLES = "pvariables"; - private static final String XML_DATAFORMAT = "dataformat"; - private static final String XML_ELEMENT_DELIMITER = "\u002c"; //","; - - //ID sequences for instructions and test definitions - private static IDSequence _seqInst = null; - private static IDSequence _seqTestDef = null; - - //registered instructions and test definitions - private static HashMap<Integer, PerfTestDef> _regTestDef = null; - private static HashMap<Integer, Instruction> _regInst = null; - private static HashMap<Integer, String> _regInst_IDNames = null; - private static HashMap<String, Integer> _regInst_NamesID = null; - private static HashMap<Integer, Integer[]> _regInst_IDTestDef = null; - private static HashMap<Integer, Boolean> _regInst_IDVectors = null; - private static HashMap<Integer, IOSchema> _regInst_IDIOSchema = null; - - protected static final Log LOG = LogFactory.getLog(PerfTestTool.class.getName()); - - - private static Integer[] _defaultConf = null; -// private static Integer[] _MRConf = null; - - //raw measurement data (instID, physical defID, results) - private static HashMap<Integer,HashMap<Integer,LinkedList<Double>>> _results = null; - - //profile data - private static boolean _flagReadData = false; - private static HashMap<Integer,HashMap<Integer,CostFunction>> _profile = null; - - public enum TestVariable //logical test variable - { - DATA_SIZE, - SPARSITY, - PARALLELISM, - - //some mr specific conf properites - SORT_IO_MEM - } - - public enum InternalTestVariable //physical test variable - { - DATA_SIZE, - DIM1_SIZE, - DIM2_SIZE, - DIM3_SIZE, - SPARSITY, - SORT_IO_MEM - } - - public enum IOSchema - { - NONE_NONE, - NONE_UNARY, - UNARY_UNARY, - BINARY_NONE, - BINARY_UNARY - } - - public enum TestConstants //logical test constants - { - DFS_READ_THROUGHPUT, - DFS_WRITE_THROUGHPUT, - LFS_READ_THROUGHPUT, - LFS_WRITE_THROUGHPUT - } - - static - { - //init repository - _seqInst = new IDSequence(); - _seqTestDef = new IDSequence(); - _regTestDef = new HashMap<Integer, PerfTestDef>(); - _regInst = new HashMap<Integer, Instruction>(); - _regInst_IDNames = new HashMap<Integer, String>(); - _regInst_NamesID = new HashMap<String, Integer>(); - _regInst_IDTestDef = new HashMap<Integer, Integer[]>(); - _regInst_IDVectors = new HashMap<Integer, Boolean>(); - _regInst_IDIOSchema = new HashMap<Integer, IOSchema>(); - _results = new HashMap<Integer, HashMap<Integer,LinkedList<Double>>>(); - _profile = new HashMap<Integer, HashMap<Integer,CostFunction>>(); - _flagReadData = false; - - //load existing profile if required - try - { - if( READ_STATS_ON_STARTUP ) - readProfile( PERF_PROFILE_FNAME ); - } - catch(Exception ex) - { - throw new RuntimeException(ex); - } - } - - public static void lazyInit() - throws DMLRuntimeException - { - //read profile for first access - if( !_flagReadData ) - { - try - { - //register all testdefs and instructions - registerTestConfigurations(); - registerInstructions(); - - //read profile - readProfile( PERF_PROFILE_FNAME ); - } - catch(Exception ex) - { - throw new DMLRuntimeException(ex); - } - } - - if( _profile == null ) - throw new DMLRuntimeException("Performance test results have not been loaded completely."); - } - - public static boolean isRegisteredInstruction(String opStr) - throws DMLRuntimeException - { - //init if required - lazyInit(); - - //determine if inst registered - return _regInst_NamesID.containsKey(opStr); - } - - public static CostFunction getCostFunction( String instName, TestMeasure measure, TestVariable variable, DataFormat dataformat ) - throws DMLRuntimeException - { - //init if required - lazyInit(); - - CostFunction tmp = null; - int instID = getInstructionID( instName ); - if( instID != -1 ) //existing profile - { - int tdefID = getMappedTestDefID(instID, measure, variable, dataformat); - tmp = _profile.get(instID).get(tdefID); - } - return tmp; - } - - @SuppressWarnings("all") - public static boolean runTest() - { - boolean ret = false; - - try - { - Timing time = new Timing(); - time.start(); - - //init caching - LazyWriteBuffer.init(); - - //register all testdefs and instructions - registerTestConfigurations(); - registerInstructions(); - - //execute tests for all confs and all instructions - executeTest(); - - //compute regression models - int rows = NUM_SAMPLES_PER_TEST; - int cols = MODEL_MAX_ORDER + (MODEL_INTERCEPT ? 1 : 0); - HashMap<Integer,Long> tmp = writeResults( PERF_TOOL_DIR ); - computeRegressionModels( DML_SCRIPT_FNAME, DML_TMP_FNAME, PERF_TOOL_DIR, tmp.size(), rows, cols); - readRegressionModels( PERF_TOOL_DIR, tmp); - - //execConstantRuntimeTest(); - //execConstantMemoryTest(); - - //write final profile to XML file - writeProfile(PERF_TOOL_DIR, PERF_PROFILE_FNAME); - System.out.format("SystemML PERFORMANCE TEST TOOL: finished profiling (in %.2f min), profile written to "+PERF_PROFILE_FNAME+"%n", time.stop()/60000); - - ret = true; - } - catch(Exception ex) - { - LOG.error("Failed to run performance test.", ex); - } - - return ret; - } - - private static void registerTestConfigurations() - { - //reset ID Sequence for consistent IDs - _seqTestDef.reset(); - - //register default testdefs //TODO - TestMeasure[] M = new TestMeasure[]{ TestMeasure.EXEC_TIME/*, TestMeasure.MEMORY_USAGE*/ }; - DataFormat[] D = new DataFormat[]{DataFormat.DENSE/*,DataFormat.SPARSE*/}; - Integer[] defaultConf = new Integer[M.length*D.length*2]; - int i=0; - for( TestMeasure m : M ) //for all measures - for( DataFormat d : D ) //for all data formats - { - defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.DATA_SIZE, d, InternalTestVariable.DATA_SIZE, - MIN_DATASIZE, MAX_DATASIZE, NUM_SAMPLES_PER_TEST ) ); - defaultConf[i++] = registerTestDef( new PerfTestDef(m, TestVariable.SPARSITY, d, InternalTestVariable.SPARSITY, - MIN_SPARSITY, MAX_SPARSITY, NUM_SAMPLES_PER_TEST ) ); - } - - - //register advanced (multi-dim) test defs - //FIXME enable - /*for( TestMeasure m : M ) //for all measures - for( DataFormat d : D ) //for all data formats - { - registerTestDef( new PerfTestDef( m, TestVariable.DATA_SIZE, d, - new InternalTestVariable[]{InternalTestVariable.DIM1_SIZE,InternalTestVariable.DIM2_SIZE,InternalTestVariable.DIM3_SIZE}, - MIN_DIMSIZE, MAX_DIMSIZE, NUM_SAMPLES_PER_TEST ) ); - }?* - - - //register MR specific instructions FIXME: just for test - /*Integer[] mrConf = new Integer[D.length]; - i = 0; - for( DataFormat d : D ) - { - mrConf[i++] = registerTestDef( new PerfTestDef(TestMeasure.EXEC_TIME, TestVariable.SORT_IO_MEM, d, - InternalTestVariable.SORT_IO_MEM, - MIN_SORT_IO_MEM, MAX_SORT_IO_MEM, NUM_SAMPLES_PER_TEST ) ); - }*/ - - //set default testdefs - _defaultConf = defaultConf; - //_MRConf = mrConf; - } - - private static void registerInstructions() - throws DMLRuntimeException - { - //reset ID sequences for consistent IDs - _seqInst.reset(); - - /////// - // CP instructions - - //matrix multiply mmtsj - registerInstruction( "CP"+Lop.OPERAND_DELIMITOR+"tsmm", CPInstructionParser.parseSingleInstruction("CP"+Lop.OPERAND_DELIMITOR+"tsmm"+Lop.OPERAND_DELIMITOR+"A"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+"C"+Lop.DATATYPE_PREFIX+"MATRIX"+Lop.VALUETYPE_PREFIX+"DOUBLE"+Lop.OPERAND_DELIMITOR+MMTSJType.LEFT), - getDefaultTestDefs(), false, IOSchema.UNARY_UNARY ); - - /* - //matrix multiply - registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"), - getDefaultTestDefs(), false, IOSchema.BINARY_UNARY ); - ////registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"ba+*", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"ba+*"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"), - //// changeToMuliDimTestDefs(TestVariable.DATA_SIZE, getDefaultTestDefs()) ); - //rand - registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"Rand", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"Rand"+Lops.OPERAND_DELIMITOR+"rows=1"+Lops.OPERAND_DELIMITOR+"cols=1"+Lops.OPERAND_DELIMITOR+"rowsInBlock=1000"+Lops.OPERAND_DELIMITOR+"colsInBlock=1000"+Lops.OPERAND_DELIMITOR+"min=1.0"+Lops.OPERAND_DELIMITOR+"max=100.0"+Lops.OPERAND_DELIMITOR+"sparsity=1.0"+Lops.OPERAND_DELIMITOR+"seed=7"+Lops.OPERAND_DELIMITOR+"pdf=uniform"+Lops.OPERAND_DELIMITOR+"dir=."+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"), - getDefaultTestDefs(), false, IOSchema.NONE_UNARY ); - //matrix transpose - registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"r'", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"r'"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"C"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"), - getDefaultTestDefs(), false, IOSchema.UNARY_UNARY ); - //sum - registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"uak+", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"uak+"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"), //needs B instead of C - getDefaultTestDefs(), false, IOSchema.UNARY_UNARY ); - //external function - registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"extfunct", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"extfunct"+Lops.OPERAND_DELIMITOR+DMLProgram.DEFAULT_NAMESPACE+""+Lops.OPERAND_DELIMITOR+"execPerfTestExtFunct"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"1"+Lops.OPERAND_DELIMITOR+"A"+Lops.OPERAND_DELIMITOR+"C"), - getDefaultTestDefs(), false, IOSchema.UNARY_UNARY ); - //central moment - registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cm", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cm"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"2"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"INT"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"), - getDefaultTestDefs(), true, IOSchema.UNARY_NONE ); - //co-variance - registerInstruction( "CP"+Lops.OPERAND_DELIMITOR+"cov", CPInstructionParser.parseSingleInstruction("CP"+Lops.OPERAND_DELIMITOR+"cov"+Lops.OPERAND_DELIMITOR+"A"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"B"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+"c"+Lops.DATATYPE_PREFIX+"SCALAR"+Lops.VALUETYPE_PREFIX+"DOUBLE"), - getDefaultTestDefs(), true, IOSchema.BINARY_NONE ); - */ - - /* - /////// - // MR instructions - registerInstruction( "jobtypeMMRJ", createMRJobInstruction(JobType.MMRJ, - MRInstructionParser.parseSingleInstruction("MR"+Lops.OPERAND_DELIMITOR+ - "rmm"+Lops.OPERAND_DELIMITOR+ - "0"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+ - "1"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE"+Lops.OPERAND_DELIMITOR+ - "2"+Lops.DATATYPE_PREFIX+"MATRIX"+Lops.VALUETYPE_PREFIX+"DOUBLE ")), - _MRConf, false, IOSchema.BINARY_UNARY ); - - */ - /*ADD ADDITIONAL INSTRUCTIONS HERE*/ - - - - //extend list to all (expensive) instructions; maybe also: createvar, assignvar, cpvar, rm, mv, setfilename, rmfilevar - - } - -/* - private static Instruction createMRJobInstruction(JobType type, MRInstruction inst) - { - MRJobInstruction mrinst = new MRJobInstruction(type); - - if( type == JobType.MMRJ ) - { - ArrayList<String> inLab = new ArrayList<String>(); - ArrayList<String> outLab = new ArrayList<String>(); - inLab.add("A"); - inLab.add("B"); - outLab.add("C"); - - mrinst.setMMRJInstructions(new String[]{"A","B"}, - "", - inst.toString(), - "", - "", - new String[]{"C"}, - new byte[]{2}, - 10, 1 ); - - } - - - return mrinst; - } -*/ - - private static int registerTestDef( PerfTestDef def ) - { - int ID = (int)_seqTestDef.getNextID(); - - _regTestDef.put( ID, def ); - - return ID; - } - - private static void registerInstruction( String iname, Instruction inst, Integer[] testDefIDs, boolean vectors, IOSchema schema ) - { - int ID = (int)_seqInst.getNextID(); - registerInstruction(ID, iname, inst, testDefIDs, vectors, schema); - } - - private static void registerInstruction( int ID, String iname, Instruction inst, Integer[] testDefIDs, boolean vector, IOSchema schema ) - { - _regInst.put( ID, inst ); - _regInst_IDNames.put( ID, iname ); - _regInst_NamesID.put( iname, ID ); - _regInst_IDTestDef.put( ID, testDefIDs ); - _regInst_IDVectors.put( ID, vector ); - _regInst_IDIOSchema.put( ID, schema ); - } - - private static int getMappedTestDefID( int instID, TestMeasure measure, TestVariable variable, DataFormat dataformat ) - { - int ret = -1; - - for( Integer defID : _regInst_IDTestDef.get(instID) ) - { - PerfTestDef def = _regTestDef.get(defID); - if( def.getMeasure()==measure - && def.getVariable()==variable - && def.getDataformat()==dataformat ) - { - ret = defID; - break; - } - } - - return ret; - } - - @SuppressWarnings("unused") - private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable pvariable ) - { - return getTestDefID(measure, lvariable, dataformat, new InternalTestVariable[]{pvariable}); - } - - private static int getTestDefID( TestMeasure measure, TestVariable lvariable, DataFormat dataformat, InternalTestVariable[] pvariables ) - { - int ret = -1; - - for( Entry<Integer,PerfTestDef> e : _regTestDef.entrySet() ) - { - PerfTestDef def = e.getValue(); - TestMeasure tmp1 = def.getMeasure(); - TestVariable tmp2 = def.getVariable(); - DataFormat tmp3 = def.getDataformat(); - InternalTestVariable[] tmp4 = def.getInternalVariables(); - - if( tmp1==measure && tmp2==lvariable && tmp3==dataformat ) - { - boolean flag = true; - for( int i=0; i<tmp4.length; i++ ) - flag &= ( tmp4[i] == pvariables[i] ); - - if( flag ) - { - ret = e.getKey(); - break; - } - } - } - - return ret; - } - - private static int getInstructionID( String instName ) - { - Integer ret = _regInst_NamesID.get( instName ); - return ( ret!=null )? ret : -1; - } - - @SuppressWarnings("unused") - private static Integer[] getAllTestDefs() - { - return _regTestDef.keySet().toArray(new Integer[0]); - } - - private static Integer[] getDefaultTestDefs() - { - return _defaultConf; - } - - @SuppressWarnings("unused") - private static Integer[] changeToMuliDimTestDefs( TestVariable v, Integer[] IDs ) - { - Integer[] tmp = new Integer[IDs.length]; - - for( int i=0; i<tmp.length; i++ ) - { - PerfTestDef def = _regTestDef.get(IDs[i]); - if( def.getVariable() == v ) //filter logical variables - { - //find multidim version - InternalTestVariable[] in = null; - switch( v ) - { - case DATA_SIZE: - in = new InternalTestVariable[]{InternalTestVariable.DIM1_SIZE,InternalTestVariable.DIM2_SIZE,InternalTestVariable.DIM3_SIZE}; - break; - default: - //do nothing - } - - int newid = getTestDefID(def.getMeasure(), def.getVariable(), def.getDataformat(), in ); - - //exchange testdef ID - tmp[i] = newid; - } - else - { - tmp[i] = IDs[i]; - } - } - - return tmp; - } - - private static void executeTest( ) - throws DMLRuntimeException, IOException - { - System.out.println("SystemML PERFORMANCE TEST TOOL:"); - - //foreach registered instruction - for( Entry<Integer,Instruction> inst : _regInst.entrySet() ) - { - int instID = inst.getKey(); - System.out.println( "Running INSTRUCTION "+_regInst_IDNames.get(instID) ); - - Integer[] testDefIDs = _regInst_IDTestDef.get(instID); - boolean vectors = _regInst_IDVectors.get(instID); - IOSchema schema = _regInst_IDIOSchema.get(instID); - - //create tmp program block and set instruction - Program prog = new Program(); - ProgramBlock pb = new ProgramBlock( prog ); - ArrayList<Instruction> ainst = new ArrayList<Instruction>(); - ainst.add( inst.getValue() ); - pb.setInstructions(ainst); - - ExecutionContext ec = ExecutionContextFactory.createContext(); - - //foreach registered test configuration - for( Integer defID : testDefIDs ) - { - PerfTestDef def = _regTestDef.get(defID); - TestMeasure m = def.getMeasure(); - TestVariable lv = def.getVariable(); - DataFormat df = def.getDataformat(); - InternalTestVariable[] pv = def.getInternalVariables(); - double min = def.getMin(); - double max = def.getMax(); - double samples = def.getNumSamples(); - - System.out.println( "Running TESTDEF(measure="+m+", variable="+String.valueOf(lv)+" "+pv.length+", format="+String.valueOf(df)+")" ); - - //vary input variable - LinkedList<Double> dmeasure = new LinkedList<Double>(); - LinkedList<Double> dvariable = generateSequence(min, max, samples); - int plen = pv.length; - - if( plen == 1 ) //1D function - { - for( Double var : dvariable ) - { - dmeasure.add(executeTestCase1D(m, pv[0], df, var, pb, vectors, schema, ec)); - } - } - else //multi-dim function - { - //init index stack - int[] index = new int[plen]; - for( int i=0; i<plen; i++ ) - index[i] = 0; - - //execute test - int dlen = dvariable.size(); - double[] buff = new double[plen]; - while( index[0]<dlen ) - { - //set buffer values - for( int i=0; i<plen; i++ ) - buff[i] = dvariable.get(index[i]); - - //core execution - dmeasure.add(executeTestCaseMD(m, pv, df, buff, pb, schema, ec)); //not applicable for vector flag - - //increment indexes - for( int i=plen-1; i>=0; i-- ) - { - if(i==plen-1) - index[i]++; - else if( index[i+1] >= dlen ) - { - index[i]++; - index[i+1]=0; - } - } - } - } - - - //append values to results - if( !_results.containsKey(instID) ) - _results.put(instID, new HashMap<Integer, LinkedList<Double>>()); - _results.get(instID).put(defID, dmeasure); - - } - } - } - - private static double executeTestCase1D( TestMeasure m, InternalTestVariable v, DataFormat df, double varValue, ProgramBlock pb, boolean vectors, IOSchema schema, ExecutionContext ec ) - throws DMLRuntimeException, IOException - { - double datasize = -1; - double dim1 = -1, dim2 = -1; - double sparsity = -1; - //double sortio = -1; - - System.out.println( "VAR VALUE "+varValue ); - - //set test variables - switch ( v ) - { - case DATA_SIZE: - datasize = varValue; - sparsity = DEFAULT_SPARSITY; - break; - case SPARSITY: - datasize = DEFAULT_DATASIZE; - sparsity = varValue; - break; - case SORT_IO_MEM: //FIXME - datasize = DEFAULT_DATASIZE * DATASIZE_MR_SCALE; - sparsity = DEFAULT_SPARSITY; - //sortio = varValue; - break; - default: - //do nothing - } - - //set specific dimensions - if( vectors ) - { - dim1 = datasize; - dim2 = 1; - } - else - { - dim1 = Math.sqrt( datasize ); - dim2 = dim1; - } - - //instruction-specific configurations - Instruction inst = pb.getInstruction(0); //always exactly one instruction - if( inst instanceof DataGenCPInstruction ) - { - DataGenCPInstruction rand = (DataGenCPInstruction) inst; - rand.setRows((long)dim1); - rand.setCols((long)dim2); - rand.setSparsity(sparsity); - } - else if ( inst instanceof FunctionCallCPInstruction ) //ExternalFunctionInvocationInstruction - { - Program prog = pb.getProgram(); - ArrayList<DataIdentifier> in = new ArrayList<DataIdentifier>(); - DataIdentifier dat1 = new DataIdentifier("A"); - dat1.setDataType(DataType.MATRIX); - dat1.setValueType(ValueType.DOUBLE); - in.add(dat1); - ArrayList<DataIdentifier> out = new ArrayList<DataIdentifier>(); - DataIdentifier dat2 = new DataIdentifier("C"); - dat2.setDataType(DataType.MATRIX); - dat2.setValueType(ValueType.DOUBLE); - out.add(dat2); - HashMap<String, String> params = new HashMap<String, String>(); - params.put(ExternalFunctionStatement.CLASS_NAME, PerfTestExtFunctCP.class.getName()); - ExternalFunctionProgramBlockCP fpb = new ExternalFunctionProgramBlockCP(prog, in, out, params, PERF_TOOL_DIR); - prog.addFunctionProgramBlock(DMLProgram.DEFAULT_NAMESPACE, "execPerfTestExtFunct", fpb); - } - else if ( inst instanceof MRJobInstruction ) - { - //FIXME hardcoded for test - //MMRJMR.SORT_IO_MEM = sortio; - } - - //generate input and output matrices - LocalVariableMap vars = ec.getVariables(); - vars.removeAll(); - double mem1 = PerfTestMemoryObserver.getUsedMemory(); - if( schema!=IOSchema.NONE_NONE && schema!=IOSchema.NONE_UNARY ) - vars.put("A", generateInputDataset(PERF_TOOL_DIR+"/A", dim1, dim2, sparsity, df)); - if( schema==IOSchema.BINARY_NONE || schema==IOSchema.BINARY_UNARY || schema==IOSchema.UNARY_UNARY ) - vars.put("B", generateInputDataset(PERF_TOOL_DIR+"/B", dim1, dim2, sparsity, df)); - if( schema==IOSchema.NONE_UNARY || schema==IOSchema.UNARY_UNARY || schema==IOSchema.BINARY_UNARY) - vars.put("C", generateEmptyResult(PERF_TOOL_DIR+"/C", dim1, dim2, df)); - double mem2 = PerfTestMemoryObserver.getUsedMemory(); - - //foreach repetition - double value = 0; - for( int i=0; i<TEST_REPETITIONS; i++ ) - { - System.out.println("run "+i); - value += executeGenericProgramBlock( m, pb, ec ); - } - value/=TEST_REPETITIONS; - - //result correction and print result - switch( m ) - { - case EXEC_TIME: System.out.println("--- RESULT: "+value+" ms"); break; - case MEMORY_USAGE: - //System.out.println("--- RESULT: "+value+" byte"); - if( (mem2-mem1) > 0 ) - value = value + mem2-mem1; //correction: input sizes added - System.out.println("--- RESULT: "+value+" byte"); break; - default: System.out.println("--- RESULT: "+value); break; - } - - return value; - } - - private static double executeTestCaseMD( TestMeasure m, InternalTestVariable[] v, DataFormat df, double[] varValue, ProgramBlock pb, IOSchema schema, ExecutionContext ec ) - throws DMLRuntimeException, IOException - { - //double datasize = DEFAULT_DATASIZE; - double sparsity = DEFAULT_SPARSITY; - double dim1 = -1; - double dim2 = -1; - double dim3 = -1; - - - for( int i=0; i<v.length; i++ ) - { - System.out.println( "VAR VALUE "+varValue[i] ); - - switch( v[i] ) - { - case DIM1_SIZE: dim1=varValue[i]; break; - case DIM2_SIZE: dim2=varValue[i]; break; - case DIM3_SIZE: dim3=varValue[i]; break; - default: //do nothing - } - } - - //generate input and output matrices - LocalVariableMap vars = ec.getVariables(); - vars.removeAll(); - double mem1 = PerfTestMemoryObserver.getUsedMemory(); - if( schema!=IOSchema.NONE_NONE && schema!=IOSchema.NONE_UNARY ) - vars.put("A", generateInputDataset(PERF_TOOL_DIR+"/A", dim1, dim2, sparsity, df)); - if( schema==IOSchema.BINARY_NONE || schema==IOSchema.BINARY_UNARY || schema==IOSchema.UNARY_UNARY ) - vars.put("B", generateInputDataset(PERF_TOOL_DIR+"/B", dim2, dim3, sparsity, df)); - if( schema==IOSchema.NONE_UNARY || schema==IOSchema.UNARY_UNARY || schema==IOSchema.BINARY_UNARY) - vars.put("C", generateEmptyResult(PERF_TOOL_DIR+"/C", dim1, dim3, df)); - double mem2 = PerfTestMemoryObserver.getUsedMemory(); - - //foreach repetition - double value = 0; - for( int i=0; i<TEST_REPETITIONS; i++ ) - { - System.out.println("run "+i); - value += executeGenericProgramBlock( m, pb, ec ); - } - value/=TEST_REPETITIONS; - - //result correction and print result - switch( m ) - { - case EXEC_TIME: System.out.println("--- RESULT: "+value+" ms"); break; - case MEMORY_USAGE: - //System.out.println("--- RESULT: "+value+" byte"); - if( (mem2-mem1) > 0 ) - value = value + mem2-mem1; //correction: input sizes added - System.out.println("--- RESULT: "+value+" byte"); break; - default: System.out.println("--- RESULT: "+value); break; - } - - return value; - } - - public static double executeGenericProgramBlock( TestMeasure measure, ProgramBlock pb, ExecutionContext ec ) - throws DMLRuntimeException - { - double value = 0; - try - { - switch( measure ) - { - case EXEC_TIME: - Timing time = new Timing(); - time.start(); - pb.execute( ec ); - value = time.stop(); - break; - case MEMORY_USAGE: - PerfTestMemoryObserver mo = new PerfTestMemoryObserver(); - mo.measureStartMem(); - Thread t = new Thread(mo); - t.start(); - pb.execute( ec ); - mo.setStopped(); - value = mo.getMaxMemConsumption(); - t.join(); - break; - } - } - catch(Exception ex) - { - throw new DMLRuntimeException(ex); - } - - //clear matrixes from cache - for( String str : ec.getVariables().keySet() ) - { - Data dat = ec.getVariable(str); - if( dat instanceof MatrixObject ) - ((MatrixObject)dat).clearData(); - } - - return value; - } - - public static LinkedList<Double> generateSequence( double min, double max, double num ) - { - LinkedList<Double> data = new LinkedList<Double>(); - double increment = (max-min)/(num-1); - - for( int i=0; i<num; i++ ) - data.add( Double.valueOf(min+i*increment) ); - - return data; - } - - public static MatrixObject generateInputDataset(String fname, double dim1, double dim2, double sparsity, DataFormat df) - throws IOException, CacheException - { - int d1 = (int) dim1; - int d2 = (int) dim2; - - System.out.println(d1+" "+d2); - - //create random test data - double[][] d = generateTestMatrix(d1, d2, 1, 100, sparsity, 7); - - //create matrix block - MatrixBlock mb = null; - switch( df ) - { - case DENSE: - mb = new MatrixBlock(d1,d2,false); - break; - case SPARSE: - mb = new MatrixBlock(d1,d2,true, (int)(sparsity*dim1*dim2)); - break; - } - - //insert data - for(int i=0; i < d1; i++) - for(int j=0; j < d2; j++) - if( d[i][j]!=0 ) - mb.setValue(i, j, d[i][j]); - - MapReduceTool.deleteFileIfExistOnHDFS(fname); - - MatrixCharacteristics mc = new MatrixCharacteristics(d1, d2, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize()); - MatrixFormatMetaData md = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); - MatrixObject mo = new MatrixObject(ValueType.DOUBLE,fname,md); - mo.acquireModify(mb); - mo.release(); - mo.exportData(); //write to HDFS - - return mo; - } - - public static MatrixObject generateEmptyResult(String fname, double dim1, double dim2, DataFormat df ) - throws IOException, CacheException - { - int d1 = (int)dim1; - int d2 = (int)dim2; - - /* - MatrixBlock mb = null; - switch( df ) - { - case DENSE: - mb = new MatrixBlock(dim,dim,false); - break; - case SPARSE: - mb = new MatrixBlock(dim,dim,true); - break; - }*/ - - MatrixCharacteristics mc = new MatrixCharacteristics(d1, d2, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize()); - MatrixFormatMetaData md = new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); - MatrixObject mo = new MatrixObject(ValueType.DOUBLE,fname,md); - - return mo; - } - - - /** - * NOTE: This is a copy of TestUtils.generateTestMatrix, it was replicated in order to prevent - * dependency of SystemML.jar to our test package. - * - * @param rows number of rows - * @param cols number of columns - * @param min minimum value - * @param max maximum value - * @param sparsity sparsity as a percentage - * @param seed random seed value (-1 if use System time) - * @return matrix as 2D double array - */ - public static double[][] generateTestMatrix(int rows, int cols, double min, double max, double sparsity, long seed) { - double[][] matrix = new double[rows][cols]; - Random random; - if (seed == -1) - random = new Random(System.nanoTime()); - else - random = new Random(seed); - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - if (random.nextDouble() > sparsity) - continue; - matrix[i][j] = (random.nextDouble() * (max - min) + min); - } - } - - return matrix; - } - - @SuppressWarnings("all") - private static HashMap<Integer,Long> writeResults( String dirname ) - throws IOException, DMLRuntimeException - { - HashMap<Integer,Long> map = new HashMap<Integer, Long>(); - int count = 1; - int offset = (MODEL_INTERCEPT ? 1 : 0); - int cols = MODEL_MAX_ORDER + offset; - - for( Entry<Integer,HashMap<Integer,LinkedList<Double>>> inst : _results.entrySet() ) - { - int instID = inst.getKey(); - HashMap<Integer,LinkedList<Double>> instCF = inst.getValue(); - - for( Entry<Integer,LinkedList<Double>> cfun : instCF.entrySet() ) - { - int tDefID = cfun.getKey(); - long ID = IDHandler.concatIntIDsToLong(instID, tDefID); - LinkedList<Double> dmeasure = cfun.getValue(); - - PerfTestDef def = _regTestDef.get(tDefID); - LinkedList<Double> dvariable = generateSequence(def.getMin(), def.getMax(), NUM_SAMPLES_PER_TEST); - int dlen = dvariable.size(); - int plen = def.getInternalVariables().length; - - //write variable data set - CSVWriter writer1 = new CSVWriter( new FileWriter( dirname+count+"_in1.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER); - if( plen == 1 ) //one dimensional function - { - //write 1, x, x^2, x^3, ... - String[] sbuff = new String[cols]; - for( Double val : dvariable ) - { - for( int j=0; j<cols; j++ ) - sbuff[j] = String.valueOf( Math.pow(val, j+1-offset) ); - writer1.writeNext(sbuff); - } - } - else // multi-dimensional function - { - //write 1, x,y,z,x^2,y^2,z^2, xy, xz, yz, xyz - - String[] sbuff = new String[(int)Math.pow(2,plen)-1+plen+offset-1]; - //String[] sbuff = new String[plen+offset]; - if(offset==1) - sbuff[0]="1"; - - //init index stack - int[] index = new int[plen]; - for( int i=0; i<plen; i++ ) - index[i] = 0; - - //execute test - double[] buff = new double[plen]; - while( index[0]<dlen ) - { - //set buffer values - for( int i=0; i<plen; i++ ) - buff[i] = dvariable.get(index[i]); - - //core writing - for( int i=1; i<=plen; i++ ) - { - if( i==1 ) - { - for( int j=0; j<plen; j++ ) - sbuff[offset+j] = String.valueOf( buff[j] ); - for( int j=0; j<plen; j++ ) - sbuff[offset+plen+j] = String.valueOf( Math.pow(buff[j],2) ); - } - else if( i==2 ) - { - int ix=0; - for( int j=0; j<plen-1; j++ ) - for( int k=j+1; k<plen; k++, ix++ ) - sbuff[offset+2*plen+ix] = String.valueOf( buff[j]*buff[k] ); - } - else if( i==plen ) - { - //double tmp=1; - //for( int j=0; j<plen; j++ ) - // tmp *= buff[j]; - //sbuff[offset+2*plen+plen*(plen-1)/2] = String.valueOf(tmp); - } - else - throw new DMLRuntimeException("More than 3 dims currently not supported."); - - } - - //for( int i=0; i<plen; i++ ) - // sbuff[offset+i] = String.valueOf( buff[i] ); - - writer1.writeNext(sbuff); - - //increment indexes - for( int i=plen-1; i>=0; i-- ) - { - if(i==plen-1) - index[i]++; - else if( index[i+1] >= dlen ) - { - index[i]++; - index[i+1]=0; - } - } - } - } - writer1.close(); - - - //write measure data set - CSVWriter writer2 = new CSVWriter( new FileWriter( dirname+count+"_in2.csv" ),',', CSVWriter.NO_QUOTE_CHARACTER); - String[] buff2 = new String[1]; - for( Double val : dmeasure ) - { - buff2[0] = String.valueOf( val ); - writer2.writeNext(buff2); - } - writer2.close(); - - map.put(count, ID); - count++; - } - } - - return map; - } - - private static void computeRegressionModels( String dmlname, String dmltmpname, String dir, int models, int rows, int cols ) - throws IOException, ParseException, DMLException - { - //clean scratch space - //AutomatedTestBase.cleanupScratchSpace(); - - //read DML template - StringBuilder buffer = new StringBuilder(); - BufferedReader br = new BufferedReader( new FileReader(new File( dmlname )) ); - - try - { - String line = null; - while( (line=br.readLine()) != null ) - { - buffer.append(line); - buffer.append("\n"); - } - } - finally - { - if( br != null ) - br.close(); - } - - //replace parameters - String template = buffer.toString(); - template = template.replaceAll("%numModels%", String.valueOf(models)); - template = template.replaceAll("%numRows%", String.valueOf(rows)); - template = template.replaceAll("%numCols%", String.valueOf(cols)); - template = template.replaceAll("%indir%", String.valueOf(dir)); - - // write temp DML file - File fout = new File(dmltmpname); - FileOutputStream fos = new FileOutputStream(fout); - try { - fos.write(template.getBytes()); - } - finally - { - if( fos != null ) - fos.close(); - } - - // execute DML script - DMLScript.main(new String[] { "-f", dmltmpname }); - } - - private static void readRegressionModels( String dname, HashMap<Integer,Long> IDMapping ) - throws IOException - { - for( Entry<Integer,Long> e : IDMapping.entrySet() ) - { - int count = e.getKey(); - long ID = e.getValue(); - int instID = IDHandler.extractIntIDFromLong(ID, 1); - int tDefID = IDHandler.extractIntIDFromLong(ID, 2); - - //read file and parse - LinkedList<Double> params = new LinkedList<Double>(); - CSVReader reader1 = new CSVReader( new FileReader(dname+count+"_out.csv"), ',' ); - String[] nextline = null; - while( (nextline = reader1.readNext()) != null ) - { - params.add(Double.parseDouble(nextline[0])); - } - reader1.close(); - - double[] dparams = new double[params.size()]; - int i=0; - for( Double d : params ) - { - dparams[i] = d; - i++; - } - - //create new cost function - boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1; - CostFunction cf = new CostFunction(dparams, multidim); - - //append to profile - if( !_profile.containsKey(instID) ) - _profile.put(instID, new HashMap<Integer, CostFunction>()); - _profile.get(instID).put(tDefID, cf); - } - } - - private static String serializeTestVariables( InternalTestVariable[] vars ) - { - StringBuilder sb = new StringBuilder(); - for( int i=0; i<vars.length; i++ ) - { - if( i>0 ) - sb.append( XML_ELEMENT_DELIMITER ); - sb.append( String.valueOf(vars[i]) ); - } - return sb.toString(); - } - - private static InternalTestVariable[] parseTestVariables(String vars) - { - StringTokenizer st = new StringTokenizer(vars, XML_ELEMENT_DELIMITER); - InternalTestVariable[] v = new InternalTestVariable[st.countTokens()]; - for( int i=0; i<v.length; i++ ) - v[i] = InternalTestVariable.valueOf(st.nextToken()); - return v; - } - - private static String serializeParams( double[] vals ) - { - StringBuilder sb = new StringBuilder(); - for( int i=0; i<vals.length; i++ ) - { - if( i>0 ) - sb.append( XML_ELEMENT_DELIMITER ); - sb.append( String.valueOf(vals[i]) ); - } - return sb.toString(); - } - - private static double[] parseParams( String valStr ) - { - StringTokenizer st = new StringTokenizer(valStr, XML_ELEMENT_DELIMITER); - double[] params = new double[st.countTokens()]; - for( int i=0; i<params.length; i++ ) - params[i] = Double.parseDouble(st.nextToken()); - return params; - } - - private static void readProfile( String fname ) - throws XMLStreamException, IOException - { - //init profile map - _profile = new HashMap<Integer, HashMap<Integer,CostFunction>>(); - - //read existing profile - FileInputStream fis = new FileInputStream( fname ); - - try - { - //xml parsing - XMLInputFactory xif = XMLInputFactory.newInstance(); - XMLStreamReader xsr = xif.createXMLStreamReader( fis ); - - int e = xsr.nextTag(); // profile start - - while( true ) //read all instructions - { - e = xsr.nextTag(); // instruction start - if( e == XMLStreamConstants.END_ELEMENT ) - break; //reached profile end tag - - //parse instruction - int ID = Integer.parseInt( xsr.getAttributeValue(null, XML_ID) ); - //String name = xsr.getAttributeValue(null, XML_NAME).trim().replaceAll(" ", Lops.OPERAND_DELIMITOR); - HashMap<Integer, CostFunction> tmp = new HashMap<Integer, CostFunction>(); - _profile.put( ID, tmp ); - - while( true ) - { - e = xsr.nextTag(); // cost function start - if( e == XMLStreamConstants.END_ELEMENT ) - break; //reached instruction end tag - - //parse cost function - TestMeasure m = TestMeasure.valueOf( xsr.getAttributeValue(null, XML_MEASURE) ); - TestVariable lv = TestVariable.valueOf( xsr.getAttributeValue(null, XML_VARIABLE) ); - InternalTestVariable[] pv = parseTestVariables( xsr.getAttributeValue(null, XML_INTERNAL_VARIABLES) ); - DataFormat df = DataFormat.valueOf( xsr.getAttributeValue(null, XML_DATAFORMAT) ); - int tDefID = getTestDefID(m, lv, df, pv); - - xsr.next(); //read characters - double[] params = parseParams(xsr.getText()); - boolean multidim = _regTestDef.get(tDefID).getInternalVariables().length > 1; - CostFunction cf = new CostFunction( params, multidim ); - tmp.put(tDefID, cf); - - xsr.nextTag(); // cost function end - //System.out.println("added cost function"); - } - } - xsr.close(); - } - finally - { - IOUtilFunctions.closeSilently(fis); - } - - //mark profile as successfully read - _flagReadData = true; - } - - /** - * StAX for efficient streaming XML writing. - * - * @param dname directory name - * @param fname file name - * @throws IOException if IOException occurs - * @throws XMLStreamException if XMLStreamException occurs - */ - private static void writeProfile( String dname, String fname ) - throws IOException, XMLStreamException - { - //create initial directory and file - File dir = new File( dname ); - if( !dir.exists() ) - dir.mkdir(); - File f = new File( fname ); - f.createNewFile(); - - FileOutputStream fos = new FileOutputStream( f ); - - try - { - //create document - XMLOutputFactory xof = XMLOutputFactory.newInstance(); - XMLStreamWriter xsw = xof.createXMLStreamWriter( fos ); - //TODO use an alternative way for intentation - //xsw = new IndentingXMLStreamWriter( xsw ); //remove this line if no indenting required - - //write document content - xsw.writeStartDocument(); - xsw.writeStartElement( XML_PROFILE ); - xsw.writeAttribute(XML_DATE, String.valueOf(new Date()) ); - - //foreach instruction (boundle of cost functions) - for( Entry<Integer,HashMap<Integer,CostFunction>> inst : _profile.entrySet() ) - { - int instID = inst.getKey(); - String instName = _regInst_IDNames.get( instID ); - - xsw.writeStartElement( XML_INSTRUCTION ); - xsw.writeAttribute(XML_ID, String.valueOf( instID )); - xsw.writeAttribute(XML_NAME, instName.replaceAll(Lop.OPERAND_DELIMITOR, " ")); - - //foreach testdef cost function - for( Entry<Integer,CostFunction> cfun : inst.getValue().entrySet() ) - { - int tdefID = cfun.getKey(); - PerfTestDef def = _regTestDef.get(tdefID); - CostFunction cf = cfun.getValue(); - - xsw.writeStartElement( XML_COSTFUNCTION ); - xsw.writeAttribute( XML_ID, String.valueOf( tdefID )); - xsw.writeAttribute( XML_MEASURE, def.getMeasure().toString() ); - xsw.writeAttribute( XML_VARIABLE, def.getVariable().toString() ); - xsw.writeAttribute( XML_INTERNAL_VARIABLES, serializeTestVariables(def.getInternalVariables()) ); - xsw.writeAttribute( XML_DATAFORMAT, def.getDataformat().toString() ); - xsw.writeCharacters(serializeParams( cf.getParams() )); - xsw.writeEndElement();// XML_COSTFUNCTION - } - - xsw.writeEndElement(); //XML_INSTRUCTION - } - - xsw.writeEndElement();//XML_PROFILE - xsw.writeEndDocument(); - xsw.close(); - } - finally - { - IOUtilFunctions.closeSilently(fos); - } - } - - - - /** - * Main for invoking the actual performance test in order to produce profile.xml - * - * @param args string arguments to main() method - */ - public static void main(String[] args) - { - //execute the local / remote performance test - PerfTestTool.runTest(); - } - - -}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml deleted file mode 100644 index c216d52..0000000 --- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/PerfTestToolRegression.dml +++ /dev/null @@ -1,59 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - - -#PerfTestTool: DML template for estimation cost functions. -#Deprecated in SystemML 0.13 - -dynRead = externalFunction(Matrix[Double] d, String fname, Integer m, Integer n) -return (Matrix[Double] D) -implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicReadMatrix2DCP",exectype="mem") - -dynWrite = externalFunction(Matrix[Double] R, String fname) -return (Matrix[Double] D) -implemented in (classname="org.apache.sysml.runtime.controlprogram.parfor.test.dml.DynamicWriteMatrix2DCP",exectype="mem") - -solve = externalFunction(Matrix[Double] A, Matrix[Double] y) -return (Matrix[Double] b) -implemented in (classname="org.apache.sysml.packagesupport.LinearSolverWrapperCP",exectype="mem") - -k = %numModels%; -m = -1; -n = -1; - -dummy = matrix(1,rows=1,cols=1); - -for( i in 1:k, par=8, mode=LOCAL ) -{ - sin1 = "./conf/PerfTestTool/"+i+"_in1.csv"; - sin2 = "./conf/PerfTestTool/"+i+"_in2.csv"; - - D = dynRead( dummy, sin1, m, n ); - y = dynRead( dummy, sin2, m, 1 ); - - A = t(D) %*% D; # X'X - b = t(D) %*% y; # X'y - beta = solve(A,b); - - sout = "./conf/PerfTestTool/"+i+"_out.csv"; - - X=dynWrite( beta, sout ); -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java b/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java index beb08bd..343d846 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/parfor/ParForRulebasedOptimizerTest.java @@ -30,15 +30,13 @@ import org.apache.sysml.test.utils.TestUtils; public class ParForRulebasedOptimizerTest extends AutomatedTestBase { - - private final static String TEST_NAME1 = "parfor_optimizer1"; - private final static String TEST_NAME2 = "parfor_optimizer2"; - private final static String TEST_NAME3 = "parfor_optimizer3"; + private final static String TEST_NAME1 = "parfor_optimizer1"; //+b for dml + private final static String TEST_NAME2 = "parfor_optimizer2"; //+b for dml + private final static String TEST_NAME3 = "parfor_optimizer3"; //+b for dml private final static String TEST_DIR = "functions/parfor/"; private final static String TEST_CLASS_DIR = TEST_DIR + ParForRulebasedOptimizerTest.class.getSimpleName() + "/"; private final static double eps = 1e-10; - - + private final static int rows1 = 1000; //small CP private final static int rows2 = 10000; //large MR @@ -67,82 +65,127 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase @Test - public void testParForOptimizerCorrelationSmallSmall() - { - runParForOptimizerTest(1, false, false); + public void testParForRulebasedOptimizerCorrelationSmallSmall() { + runParForOptimizerTest(1, false, false, false); } + @Test + public void testParForRulebasedOptimizerCorrelationSmallLarge() { + runParForOptimizerTest(1, false, true, false); + } @Test - public void testParForOptimizerCorrelationSmallLarge() - { - runParForOptimizerTest(1, false, true); + public void testParForRulebasedOptimizerCorrelationLargeSmall() { + runParForOptimizerTest(1, true, false, false); } + @Test + public void testParForRulebasedOptimizerCorrelationLargeLarge() { + runParForOptimizerTest(1, true, true, false); + } @Test - public void testParForOptimizerCorrelationLargeSmall() - { - runParForOptimizerTest(1, true, false); + public void testParForRulebasedOptimizerBivariateStatsSmallSmall() { + runParForOptimizerTest(2, false, false, false); } @Test - public void testParForOptimizerCorrelationLargeLarge() - { - runParForOptimizerTest(1, true, true); + public void testParForRulebasedOptimizerBivariateStatsSmallLarge() { + runParForOptimizerTest(2, false, true, false); } + @Test + public void testParForRulebasedOptimizerBivariateStatsLargeSmall() { + runParForOptimizerTest(2, true, false, false); + } @Test - public void testParForOptimizerBivariateStatsSmallSmall() - { - runParForOptimizerTest(2, false, false); + public void testParForRulebasedOptimizerBivariateStatsLargeLarge() { + runParForOptimizerTest(2, true, true, false); } @Test - public void testParForOptimizerBivariateStatsSmallLarge() - { - runParForOptimizerTest(2, false, true); + public void testParForRulebasedOptimizerFunctionInvocationSmallSmall() { + runParForOptimizerTest(3, false, false, false); } @Test - public void testParForOptimizerBivariateStatsLargeSmall() - { - runParForOptimizerTest(2, true, false); + public void testParForRulebasedOptimizerFunctionInvocationSmallLarge() { + runParForOptimizerTest(3, false, true, false); } @Test - public void testParForOptimizerBivariateStatsLargeLarge() - { - runParForOptimizerTest(2, true, true); + public void testParForRulebasedOptimizerFunctionInvocationLargeSmall() { + runParForOptimizerTest(3, true, false, false); } @Test - public void testParForOptimizerFunctionInvocationSmallSmall() - { - runParForOptimizerTest(3, false, false); + public void testParForRulebasedOptimizerFunctionInvocationLargeLarge() { + runParForOptimizerTest(3, true, true, false); } @Test - public void testParForOptimizerFunctionInvocationSmallLarge() - { - runParForOptimizerTest(3, false, true); + public void testParForHeuristicOptimizerCorrelationSmallSmall() { + runParForOptimizerTest(1, false, false, true); } @Test - public void testParForOptimizerFunctionInvocationLargeSmall() - { - runParForOptimizerTest(3, true, false); + public void testParForHeuristicOptimizerCorrelationSmallLarge() { + runParForOptimizerTest(1, false, true, true); } @Test - public void testParForOptimizerFunctionInvocationLargeLarge() - { - runParForOptimizerTest(3, true, true); + public void testParForHeuristicOptimizerCorrelationLargeSmall() { + runParForOptimizerTest(1, true, false, true); + } + + @Test + public void testParForHeuristicOptimizerCorrelationLargeLarge() { + runParForOptimizerTest(1, true, true, true); } + @Test + public void testParForHeuristicOptimizerBivariateStatsSmallSmall() { + runParForOptimizerTest(2, false, false, true); + } - private void runParForOptimizerTest( int scriptNum, boolean largeRows, boolean largeCols ) + @Test + public void testParForHeuristicOptimizerBivariateStatsSmallLarge() { + runParForOptimizerTest(2, false, true, true); + } + + @Test + public void testParForHeuristicOptimizerBivariateStatsLargeSmall() { + runParForOptimizerTest(2, true, false, true); + } + + @Test + public void testParForHeuristicOptimizerBivariateStatsLargeLarge() { + runParForOptimizerTest(2, true, true, true); + } + + @Test + public void testParForHeuristicOptimizerFunctionInvocationSmallSmall() { + runParForOptimizerTest(3, false, false, true); + } + + @Test + public void testParForHeuristicOptimizerFunctionInvocationSmallLarge() { + runParForOptimizerTest(3, false, true, true); + } + + @Test + public void testParForHeuristicOptimizerFunctionInvocationLargeSmall() { + runParForOptimizerTest(3, true, false, true); + } + + @Test + public void testParForHeuristicOptimizerFunctionInvocationLargeLarge() { + runParForOptimizerTest(3, true, true, true); + } + + + private void runParForOptimizerTest( int scriptNum, boolean largeRows, boolean largeCols, boolean timebasedOpt ) { //find right rows and cols configuration int rows=-1, cols=-1; @@ -171,31 +214,34 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase switch( scriptNum ) { case 1: - runUnaryTest(scriptNum, rows, cols); + runUnaryTest(scriptNum, timebasedOpt, rows, cols); break; case 2: - runNaryTest(scriptNum, rows, cols); + runNaryTest(scriptNum, timebasedOpt, rows, cols); break; case 3: - runUnaryTest(scriptNum, rows, cols); + runUnaryTest(scriptNum, timebasedOpt, rows, cols); break; } } - private void runUnaryTest(int scriptNum, int rows, int cols ) + private void runUnaryTest(int scriptNum, boolean timebasedOpt, int rows, int cols ) { TestConfiguration config = null; String HOME = SCRIPT_DIR + TEST_DIR; if( scriptNum==1 ) { config=getTestConfiguration(TEST_NAME1); - fullDMLScriptName = HOME + TEST_NAME1 + ".dml"; + String testname = TEST_NAME1 + (timebasedOpt ? "b" : ""); + fullDMLScriptName = HOME + testname + ".dml"; } else if( scriptNum==3 ) { config=getTestConfiguration(TEST_NAME3); - fullDMLScriptName = HOME + TEST_NAME3 + ".dml"; + String testname = TEST_NAME3 + (timebasedOpt ? "b" : ""); + fullDMLScriptName = HOME + testname + ".dml"; } + config.addVariable("rows", rows); config.addVariable("cols", cols); loadTestConfiguration(config); @@ -235,7 +281,7 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase TestUtils.compareMatrices(dmlfile, rfile, eps, "DML", "R"); } - private void runNaryTest(int scriptNum, int rows, int cols) + private void runNaryTest(int scriptNum, boolean timebasedOpt, int rows, int cols) { TestConfiguration config = getTestConfiguration(TEST_NAME2); config.addVariable("rows", rows); @@ -244,7 +290,8 @@ public class ParForRulebasedOptimizerTest extends AutomatedTestBase /* This is for running the junit test the new way, i.e., construct the arguments directly */ String HOME = SCRIPT_DIR + TEST_DIR; - fullDMLScriptName = HOME + TEST_NAME2 + ".dml"; + String testname = TEST_NAME2 + (timebasedOpt ? "b" : ""); + fullDMLScriptName = HOME + testname + ".dml"; programArgs = new String[]{"-args", input("D"), input("S1"), input("S2"), http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer1b.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/parfor/parfor_optimizer1b.dml b/src/test/scripts/functions/parfor/parfor_optimizer1b.dml new file mode 100644 index 0000000..cd0a3f7 --- /dev/null +++ b/src/test/scripts/functions/parfor/parfor_optimizer1b.dml @@ -0,0 +1,53 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +V = read($1,rows=$2,cols=$3); +m = $2; +n = $3; +W = m; + +R = matrix(0, rows=n,cols=n); +dummy = matrix(1, rows=1, cols=1); + +parfor( i in 1:(n-1), opt=HEURISTIC ) +{ + X = V[,i]; + m2X = moment(X,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + + parfor( j in (i+1):n ) + { + Y = V[,j]; + + #corr computation + m2Y = moment(Y,2); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + covXY = cov(X,Y); + rXY = covXY / (sigmaX*sigmaY); + + #print("R[("+i+","+j+")]="+rXY); + R[i,j] = dummy * rXY; + + } +} + +write(R, $4); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer2b.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/parfor/parfor_optimizer2b.dml b/src/test/scripts/functions/parfor/parfor_optimizer2b.dml new file mode 100644 index 0000000..6b41058 --- /dev/null +++ b/src/test/scripts/functions/parfor/parfor_optimizer2b.dml @@ -0,0 +1,277 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + + +/* + * + * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs + * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} + * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n) + * + * Seven inputs: + * $1) D - input data + * $2) S1 - First attribute set {A_11, A_12, ... A_1m} + * $3) S2 - Second attribute set {A_21, A_22, ... A_2n} + * $4) K1 - kind for attributes in S1 + * $5) K2 - kind for attributes in S2 + * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal + * $6) numPairs - total number of pairs (m*n) + * $7) maxC - maximum number of categories in any categorical attribute + * + * One output: + * $6) output directory in which following four statistics files are created + * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs + * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F) + * + categorical.counts - + * + categorical.means - + * + categorical.variances - + * -> Values in these three matrices are applicable only for scale-categorical attribute pairs. + * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k. + */ + +D = read($1, rows=$7, cols=$8); # input data set +S1 = read($2, rows=1, cols=$9); # attribute set 1 +S2 = read($3, rows=1, cols=$9); # attribute set 2 +K1 = read($4, rows=1, cols=$9); # kind for attributes in S1 +K2 = read($5, rows=1, cols=$9); # kind for attributes in S2 +numPairs = $10; # number of attribute pairs (|S1|*|S2|) +maxC = $11; # max number of categories in any categorical attribute + +s1size = ncol(S1); +s2size = ncol(S2); + +#numpairs = s1size * s2size; +#print(s1size + ", " + s2size + ", " + numpairs); + +# R, chisq, cramers, spearman, eta, anovaf +numstats = 8; +basestats = matrix(0, rows=numstats, cols=numPairs); +cat_counts = matrix(0, rows=maxC, cols=numPairs); +cat_means = matrix(0, rows=maxC, cols=numPairs); +cat_vars = matrix(0, rows=maxC, cols=numPairs); + +dummy = matrix(1, rows=1, cols=1); + + +parfor( i in 1:s1size, check=0, opt=HEURISTIC) { + a1 = as.scalar(S1[,i]); + k1 = as.scalar(K1[1,i]); + A1 = D[,a1]; + + parfor( j in 1:s2size, check=0) { + pairID = (i-1)*s2size+j; + a2 = as.scalar(S2[,j]); + k2 = as.scalar(K2[1,j]); + A2 = D[,a2]; + + if (k1 == k2) { + if (k1 == 1) { + # scale-scale + print("[" + i + "," + j + "] scale-scale"); + r = bivar_ss(A1,A2); + basestats[1,pairID] = dummy*r; + } else { + # nominal-nominal or ordinal-ordinal + print("[" + i + "," + j + "] categorical-categorical"); + [chisq, df, pval, cramersv] = bivar_cc(A1,A2); + basestats[2,pairID] = dummy*chisq; + basestats[3,pairID] = dummy*df; + basestats[4,pairID] = dummy*pval; + basestats[5,pairID] = dummy*cramersv; + + if ( k1 == 3 ) { + # ordinal-ordinal + print("[" + i + "," + j + "] ordinal-ordinal"); + sp = bivar_oo(A1, A2); + basestats[6,pairID] = dummy*sp; + } + } + } + else { + if (k1 == 1 | k2 == 1) { + # Scale-nominal/ordinal + print("[" + i + "," + j + "] scale-categorical"); + + if ( k1 == 1 ) { + [eta,f, counts, means, vars] = bivar_sc(A1,A2); + } + else { + [eta,f, counts, means, vars] = bivar_sc(A2,A1); + } + basestats[7,pairID] = dummy*eta; + basestats[8,pairID] = dummy*f; + cat_counts[,pairID] = counts; + cat_means[,pairID] = means; + cat_vars[,pairID] = vars; + } + else { + # nominal-ordinal or ordinal-nominal + print("[" + i + "," + j + "] categorical-categorical"); + [chisq, df, pval, cramersv] = bivar_cc(A1,A2); + basestats[2,pairID] = dummy*chisq; + basestats[3,pairID] = dummy*df; + basestats[4,pairID] = dummy*pval; + basestats[5,pairID] = dummy*cramersv; + } + } + } +} + +write(basestats, $6 + "/bivar.stats"); +write(cat_counts, $6 + "/category.counts"); +write(cat_means, $6 + "/category.means"); +write(cat_vars, $6 + "/category.variances"); + + +# ----------------------------------------------------------------------------------------------------------- + +bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) { + + # Contingency Table + F = table(A,B); + + # Chi-Squared + W = sum(F); + r = rowSums(F); + c = colSums(F); + E = (r %*% c)/W; + T = (F-E)^2/E; + chi_squared = sum(T); + + # compute p-value + degFreedom = (nrow(F)-1)*(ncol(F)-1); + pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE); + + # Cramer's V + R = nrow(F); + C = ncol(F); + q = min(R,C); + cramers_v = sqrt(chi_squared/(W*(q-1))); + + # Assign return values + chisq = chi_squared; + df = degFreedom; + pval = pValue; + cramersv = cramers_v; +} + +# ----------------------------------------------------------------------------------------------------------- + +bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) { + + # Unweighted co-variance + covXY = cov(X,Y); + + # compute standard deviations for both X and Y by computing 2^nd central moment + W = nrow(X); + m2X = moment(X,2); + m2Y = moment(Y,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + + # Pearson's R + R = covXY / (sigmaX*sigmaY); +} + +# ----------------------------------------------------------------------------------------------------------- + +# Y points to SCALE variable +# A points to CATEGORICAL variable +bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) { + + # mean and variance in target variable + W = nrow(A); + my = mean(Y); + varY = moment(Y,2) * W/(W-1.0) + + # category-wise (frequencies, means, variances) + CFreqs = aggregate(target=Y, groups=A, fn="count"); + CMeans = aggregate(target=Y, groups=A, fn="mean"); + CVars = aggregate(target=Y, groups=A, fn="variance"); + + # number of categories + R = nrow(CFreqs); + + Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) )); + + anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1); + anova_den = sum( (CFreqs-1)*CVars )/(W-R); + AnovaF = anova_num/anova_den; +} + +# ----------------------------------------------------------------------------------------------------------- + + +# ----------------------------------------------------------------------------------------------------------- +# Function to compute ranks +# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category +computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) { + dummy = matrix(1, rows=1, cols=1); + Rks = X; + size = nrow(X); + for(i in 1:size) { + prefixSum = 0.0; + if( i>1 ){ + prefixSum = sum(X[1:(i-1),1]); + } + Rks[i,1] = dummy * (prefixSum + ((as.scalar(X[i,1])+1)/2)); + } + Ranks = Rks; +} + +#------------------------------------------------------------------------- + +bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) { + + # compute contingency table + F = table(A,B); + + catA = nrow(F); # number of categories in A + catB = ncol(F); # number of categories in B + + # compute category-wise counts for both the attributes + R = rowSums(F); + S = colSums(F); + + # compute scores, both are column vectors + [C] = computeRanks(R); + meanX = mean(C,R); + + columnS = t(S); + [D] = computeRanks(columnS); + + # scores (C,D) are individual values, and counts (R,S) act as weights + meanY = mean(D,columnS); + + W = sum(F); # total weight, or total #cases + varX = moment(C,R,2)*(W/(W-1.0)); + varY = moment(D,columnS,2)*(W/(W-1.0)); + + covXY = 0.0; + for(i in 1:catA) { + covXY = covXY + sum((F[i,]/(W-1)) * (as.scalar(C[i,1])-meanX) * (t(D[,1])-meanY)); + } + + sp = covXY/(sqrt(varX)*sqrt(varY)); +} + +# ----------------------------------------------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/35da413a/src/test/scripts/functions/parfor/parfor_optimizer3b.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/parfor/parfor_optimizer3b.dml b/src/test/scripts/functions/parfor/parfor_optimizer3b.dml new file mode 100644 index 0000000..6eae759 --- /dev/null +++ b/src/test/scripts/functions/parfor/parfor_optimizer3b.dml @@ -0,0 +1,52 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +V = read($1,rows=$2,cols=$3); +n = $3; +nd = $4; + +R = matrix(0, rows=1,cols=nd); +dummy = matrix(1, rows=1, cols=1); + +parfor( i in 1:(n/2), opt=HEURISTIC ) +{ + X = V[ ,i]; + Y = V[ ,n-i+1]; + sx = execSum(X); + sy = execSum(Y); + R[1,i] = dummy*( sx+sy ); +} + +write(R, $5); + + +execSum = function(Matrix[Double] X) return (Double sx) +{ + if( ncol(X) > 0 ) + { + sx = sum(X); + } + else + { + sx = sum(X); + } +} \ No newline at end of file