SAMOA-67 : Integration of MOA instances in SAMOA
Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/26c21912 Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/26c21912 Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/26c21912 Branch: refs/heads/master Commit: 26c219124df97f7cd82ca0535ffe083b369f2507 Parents: dbc3aab Author: mehdidb <[email protected]> Authored: Sun Jul 16 16:10:52 2017 +0200 Committer: mehdidb <[email protected]> Committed: Sun Jul 16 16:10:52 2017 +0200 ---------------------------------------------------------------------- .../org/apache/samoa/instances/ArffLoader.java | 456 ++++++++++------- .../org/apache/samoa/instances/Attribute.java | 160 ++++-- .../samoa/instances/AttributesInformation.java | 152 ++++++ .../samoa/instances/AvroBinaryLoader.java | 3 - .../apache/samoa/instances/AvroJsonLoader.java | 3 - .../org/apache/samoa/instances/AvroLoader.java | 23 +- .../apache/samoa/instances/DenseInstance.java | 75 +-- .../samoa/instances/DenseInstanceData.java | 103 +++- .../org/apache/samoa/instances/Instance.java | 316 ++++++++++-- .../apache/samoa/instances/InstanceData.java | 76 ++- .../apache/samoa/instances/InstanceImpl.java | 483 ++++++++++++++++++ .../samoa/instances/InstanceInformation.java | 158 +++++- .../org/apache/samoa/instances/Instances.java | 484 ++++++++++++++++--- .../apache/samoa/instances/InstancesHeader.java | 60 +-- .../java/org/apache/samoa/instances/Loader.java | 6 - .../samoa/instances/MultiLabelInstance.java | 25 + .../samoa/instances/MultiLabelPrediction.java | 139 ++++++ .../samoa/instances/MultiTargetArffLoader.java | 47 ++ .../org/apache/samoa/instances/Prediction.java | 110 +++++ .../java/org/apache/samoa/instances/Range.java | 114 +++++ .../instances/SingleClassInstanceData.java | 86 ---- .../samoa/instances/SingleLabelInstance.java | 260 ---------- .../apache/samoa/instances/SparseInstance.java | 73 +-- .../samoa/instances/SparseInstanceData.java | 165 ++++++- .../java/org/apache/samoa/instances/Utils.java | 2 +- 25 files changed, 2686 insertions(+), 893 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java index a25dc62..dd82eda 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java @@ -29,39 +29,117 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -/** - * @author abifet - */ public class ArffLoader implements Loader { + /** + * The instance information. + */ protected InstanceInformation instanceInformation; - transient protected StreamTokenizer streamTokenizer; + protected InstancesHeader streamHeader; - protected Reader reader; + /** + * The stream tokenizer. + */ + protected transient StreamTokenizer streamTokenizer; - protected int size; + /** + * Instantiates a new arff loader. + * + * @param reader the reader + * @param size the size + * @param classAttribute the class attribute + */ + public ArffLoader(Reader reader, int size, int classAttribute) { + // size is not used + this(reader); + if (classAttribute < 0) { + this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1); + //System.out.print(this.instanceInformation.classIndex()); + } else if (classAttribute > 0) { + this.instanceInformation.setClassIndex(classAttribute - 1); + } + } - protected int classAttribute; + protected Range range; - public ArffLoader() { + /** + * Instantiates a new arff loader. + * + * @param reader the reader + */ + public ArffLoader(Reader reader) { + this(reader, null); } - public ArffLoader(Reader reader, int size, int classAttribute) { - this.reader = reader; - this.size = size; - this.classAttribute = classAttribute; - initStreamTokenizer(reader); + /** + * Instantiates a new arff loader. + * + * @param reader the reader + * @param range + */ + public ArffLoader(Reader reader, Range range) { + this.range = range; + BufferedReader br = new BufferedReader(reader); + + //Init streamTokenizer + streamTokenizer = new StreamTokenizer(br); + streamTokenizer.resetSyntax(); + streamTokenizer.whitespaceChars(0, ' '); + streamTokenizer.wordChars(' ' + 1, '\u00FF'); + streamTokenizer.whitespaceChars(',', ','); + streamTokenizer.commentChar('%'); + streamTokenizer.quoteChar('"'); + streamTokenizer.quoteChar('\''); + streamTokenizer.ordinaryChar('{'); + streamTokenizer.ordinaryChar('}'); + streamTokenizer.eolIsSignificant(true); + + this.instanceInformation = this.getHeader(); + + if (range != null) { //is MultiLabel + this.instanceInformation.setRangeOutputIndices(range); + } + } + /** + * Gets the structure. + * + * @return the structure + */ public InstanceInformation getStructure() { return this.instanceInformation; } - public Instance readInstance(Reader reader) { - if (streamTokenizer == null) { - initStreamTokenizer(reader); + /** + * Reads instance. It detects if it is dense or sparse. + * + * @return the instance + */ + public Instance readInstance() { + while (streamTokenizer.ttype == StreamTokenizer.TT_EOL) { + try { + streamTokenizer.nextToken(); + } catch (IOException ex) { + Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); + } } + if (streamTokenizer.ttype == '{') { + return readInstanceSparse(); + // return readDenseInstanceSparse(); + } else { + return readInstanceDense(); + } + + } + + /** + * Reads instance. It detects if it is dense or sparse. + * + * @return the instance + */ + public Instance readInstance(Reader reader) { while (streamTokenizer.ttype == StreamTokenizer.TT_EOL) { try { streamTokenizer.nextToken(); @@ -78,94 +156,99 @@ public class ArffLoader implements Loader { } + /** + * Reads a dense instance from the file. + * + * @return the instance + */ public Instance readInstanceDense() { - Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1); - // System.out.println(this.instanceInformation.numAttributes()); + Instance instance = newDenseInstance(this.instanceInformation.numAttributes()); + //System.out.println(this.instanceInformation.numAttributes()); int numAttribute = 0; try { while (numAttribute == 0 && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - // For each line + //For each line while (streamTokenizer.ttype != StreamTokenizer.TT_EOL && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - // For each item + //For each item if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { - // System.out.println(streamTokenizer.nval + "Num "); - this.setValue(instance, numAttribute, streamTokenizer.nval, true); - //numAttribute++; - - } else if (streamTokenizer.sval != null && ( - streamTokenizer.ttype == StreamTokenizer.TT_WORD - || streamTokenizer.ttype == 34 || streamTokenizer.ttype == 39)) { - // System.out.println(streamTokenizer.sval + "Str"); - boolean isNumeric = attributes.get(numAttribute).isNumeric(); + //System.out.println(streamTokenizer.nval + "Num "); + instance.setValue(numAttribute, streamTokenizer.nval);//this.setValue(instance, numAttribute, streamTokenizer.nval, true); + ++numAttribute; + + } else if (streamTokenizer.sval != null && (streamTokenizer.ttype == StreamTokenizer.TT_WORD + || streamTokenizer.ttype == 34 || streamTokenizer.ttype == 39)) { + //System.out.println(streamTokenizer.sval + "Str"); + boolean isNumeric = this.auxAttributes.get(numAttribute).isNumeric(); double value; if ("?".equals(streamTokenizer.sval)) { - value = Double.NaN; // Utils.missingValue(); + value = Double.NaN; //Utils.missingValue(); } else if (isNumeric == true) { value = Double.valueOf(streamTokenizer.sval).doubleValue(); } else { - value = this.instanceInformation.attribute(numAttribute).indexOfValue( - streamTokenizer.sval); + value = this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval); } - this.setValue(instance, numAttribute, value, isNumeric); - //numAttribute++; + instance.setValue(numAttribute,value);//this.setValue(instance, numAttribute, value, isNumeric); + ++numAttribute; } - numAttribute++; streamTokenizer.nextToken(); } streamTokenizer.nextToken(); - // System.out.println("EOL"); + //System.out.println("EOL"); } } catch (IOException ex) { Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); } - //System.out.println(instance); return (numAttribute > 0) ? instance : null; } - private void setValue(Instance instance, int numAttribute, double value, boolean isNumber) { + protected void setValue(Instance instance, int numAttribute, double value, boolean isNumber) { double valueAttribute; - if (this.instanceInformation.attribute(numAttribute).isNominal) { - valueAttribute = value; - //this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value)); - // System.out.println(value +"/"+valueAttribute+" "); + + if (isNumber && this.auxAttributes.get(numAttribute).isNominal) { + valueAttribute = value;//this.auxAttributes.get(numAttribute).indexOfValue(Double.toString(value)); + //System.out.println(value +"/"+valueAttribute+" "); } else { valueAttribute = value; - // System.out.println(value +"/"+valueAttribute+" "); + //System.out.println(value +"/"+valueAttribute+" "); } if (this.instanceInformation.classIndex() == numAttribute) { - instance.setClassValue(valueAttribute); - // System.out.println(value - // +"<"+this.instanceInformation.classIndex()+">"); + setClassValue(instance, valueAttribute); + //System.out.println(value +"<"+this.instanceInformation.classIndex()+">"); } else { + //if(numAttribute>this.instanceInformation.classIndex()) + // numAttribute--; instance.setValue(numAttribute, valueAttribute); } } + /** + * Reads a sparse instance. + * + * @return the instance + */ private Instance readInstanceSparse() { - // Return a Sparse Instance - Instance instance = new SparseInstance(1.0, null); // (this.instanceInformation.numAttributes() - // + 1); - // System.out.println(this.instanceInformation.numAttributes()); + //Return a Sparse Instance + Instance instance = newSparseInstance(1.0); //, null); //(this.instanceInformation.numAttributes() + 1); + //System.out.println(this.instanceInformation.numAttributes()); int numAttribute; ArrayList<Double> attributeValues = new ArrayList<Double>(); List<Integer> indexValues = new ArrayList<Integer>(); try { - // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { + //while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { streamTokenizer.nextToken(); // Remove the '{' char - // For each line + //For each line while (streamTokenizer.ttype != StreamTokenizer.TT_EOL && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { while (streamTokenizer.ttype != '}') { - // For each item - // streamTokenizer.nextToken(); - // while (streamTokenizer.ttype != '}'){ - // System.out.println(streamTokenizer.nval +"-"+ - // streamTokenizer.sval); - // numAttribute = (int) streamTokenizer.nval; + //For each item + //streamTokenizer.nextToken(); + //while (streamTokenizer.ttype != '}'){ + //System.out.println(streamTokenizer.nval +"-"+ streamTokenizer.sval); + //numAttribute = (int) streamTokenizer.nval; if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { numAttribute = (int) streamTokenizer.nval; } else { @@ -174,32 +257,26 @@ public class ArffLoader implements Loader { streamTokenizer.nextToken(); if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { - // System.out.print(streamTokenizer.nval + " "); - this.setSparseValue(instance, indexValues, attributeValues, numAttribute, - streamTokenizer.nval, true); - // numAttribute++; + //System.out.print(streamTokenizer.nval + " "); + this.setSparseValue(instance, indexValues, attributeValues, numAttribute, streamTokenizer.nval, true); + //numAttribute++; - } else if (streamTokenizer.sval != null && ( - streamTokenizer.ttype == StreamTokenizer.TT_WORD - || streamTokenizer.ttype == 34)) { - // System.out.print(streamTokenizer.sval + "-"); - if (attributes.get(numAttribute).isNumeric()) { - this.setSparseValue(instance, indexValues, attributeValues, numAttribute, - Double.valueOf(streamTokenizer.sval).doubleValue(), true); + } else if (streamTokenizer.sval != null && (streamTokenizer.ttype == StreamTokenizer.TT_WORD + || streamTokenizer.ttype == 34 || streamTokenizer.ttype == 39)) { + //System.out.print(streamTokenizer.sval + "-"); + if (this.auxAttributes.get(numAttribute).isNumeric()) { + this.setSparseValue(instance, indexValues, attributeValues, numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue(), true); } else { - this.setSparseValue(instance, indexValues, attributeValues, numAttribute, - this.instanceInformation - .attribute(numAttribute).indexOfValue(streamTokenizer.sval), - false); + this.setSparseValue(instance, indexValues, attributeValues, numAttribute, this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval), false); } } streamTokenizer.nextToken(); } - streamTokenizer.nextToken(); // Remove the '}' char + streamTokenizer.nextToken(); //Remove the '}' char } streamTokenizer.nextToken(); - // System.out.println("EOL"); - // } + //System.out.println("EOL"); + //} } catch (IOException ex) { Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); @@ -210,78 +287,74 @@ public class ArffLoader implements Loader { arrayIndexValues[i] = indexValues.get(i).intValue(); arrayAttributeValues[i] = attributeValues.get(i).doubleValue(); } - instance.addSparseValues(arrayIndexValues, arrayAttributeValues, - this.instanceInformation.numAttributes()); + instance.addSparseValues(arrayIndexValues, arrayAttributeValues, this.instanceInformation.numAttributes()); return instance; } - private void setSparseValue(Instance instance, List<Integer> indexValues, - List<Double> attributeValues, - int numAttribute, double value, boolean isNumber) { + private void setSparseValue(Instance instance, List<Integer> indexValues, List<Double> attributeValues, int numAttribute, double value, boolean isNumber) { double valueAttribute; - if (isNumber && this.instanceInformation.attribute(numAttribute).isNominal) { - valueAttribute = - this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value)); + if (isNumber && this.auxAttributes.get(numAttribute).isNominal) { + valueAttribute = this.auxAttributes.get(numAttribute).indexOfValue(Double.toString(value)); } else { valueAttribute = value; } - if (this.instanceInformation.classIndex() == numAttribute) { - instance.setClassValue(valueAttribute); - } else { - // instance.setValue(numAttribute, valueAttribute); - indexValues.add(numAttribute); - attributeValues.add(valueAttribute); - } - // System.out.println(numAttribute+":"+valueAttribute+","+this.instanceInformation.classIndex()+","+value); + //if (this.instanceInformation.classIndex() == numAttribute) { + // setClassValue(instance, valueAttribute); + //} else { + //instance.setValue(numAttribute, valueAttribute); + indexValues.add(numAttribute); + attributeValues.add(valueAttribute); + //} + //System.out.println(numAttribute+":"+valueAttribute+","+this.instanceInformation.classIndex()+","+value); } + /** + * Reads an instance sparse and returns a dense one. + * + * @return the instance + */ private Instance readDenseInstanceSparse() { - // Returns a dense instance - Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1); - // System.out.println(this.instanceInformation.numAttributes()); + //Returns a dense instance + Instance instance = newDenseInstance(this.instanceInformation.numAttributes()); + //System.out.println(this.instanceInformation.numAttributes()); int numAttribute; try { - // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { + //while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { streamTokenizer.nextToken(); // Remove the '{' char - // For each line + //For each line while (streamTokenizer.ttype != StreamTokenizer.TT_EOL && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { while (streamTokenizer.ttype != '}') { - // For each item - // streamTokenizer.nextToken(); - // while (streamTokenizer.ttype != '}'){ - // System.out.print(streamTokenizer.nval+":"); + //For each item + //streamTokenizer.nextToken(); + //while (streamTokenizer.ttype != '}'){ + //System.out.print(streamTokenizer.nval+":"); numAttribute = (int) streamTokenizer.nval; streamTokenizer.nextToken(); if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { - // System.out.print(streamTokenizer.nval + " "); - this.setValue(instance, numAttribute, streamTokenizer.nval, true); - // numAttribute++; + //System.out.print(streamTokenizer.nval + " "); + instance.setValue(numAttribute, streamTokenizer.nval);//this.setValue(instance, numAttribute, streamTokenizer.nval, true); + //numAttribute++; - } else if (streamTokenizer.sval != null && ( - streamTokenizer.ttype == StreamTokenizer.TT_WORD + } else if (streamTokenizer.sval != null && (streamTokenizer.ttype == StreamTokenizer.TT_WORD || streamTokenizer.ttype == 34)) { - // System.out.print(streamTokenizer.sval + - // "/"+this.instanceInformation.attribute(numAttribute).indexOfValue(streamTokenizer.sval)+" "); - if (attributes.get(numAttribute).isNumeric()) { - this.setValue(instance, numAttribute, - Double.valueOf(streamTokenizer.sval).doubleValue(), true); + //System.out.print(streamTokenizer.sval + "/"+this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval)+" "); + if (this.auxAttributes.get(numAttribute).isNumeric()) { + instance.setValue(numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue());//this.setValue(instance, numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue(), true); } else { - this.setValue(instance, numAttribute, - this.instanceInformation.attribute(numAttribute) - .indexOfValue(streamTokenizer.sval), false); - // numAttribute++; + instance.setValue(numAttribute, this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval));//this.setValue(instance, numAttribute, this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval), false); + //numAttribute++; } } streamTokenizer.nextToken(); } - streamTokenizer.nextToken(); // Remove the '}' char + streamTokenizer.nextToken(); //Remove the '}' char } streamTokenizer.nextToken(); - // System.out.println("EOL"); - // } + //System.out.println("EOL"); + //} } catch (IOException ex) { Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); @@ -289,30 +362,37 @@ public class ArffLoader implements Loader { return instance; } - protected List<Attribute> attributes; + //protected List<Attribute> inputAttributes; + // protected List<Attribute> outputAttributes; - private InstanceInformation getHeader() { + protected List<Attribute> auxAttributes; + private InstanceInformation getHeader() { + //commented JD + //this.range.setUpper(10000); //TO DO: Create a new range object with isInRange that does not need the upper limit String relation = "file stream"; - // System.out.println("RELATION " + relation); - attributes = new ArrayList<Attribute>(); + //System.out.println("RELATION " + relation); + //inputAttributes = new ArrayList<Attribute>(); + //outputAttributes = new ArrayList<Attribute>(); + //ArrayList<Attribute> + auxAttributes = new ArrayList<Attribute>();//JD + int numAttributes = 0; try { streamTokenizer.nextToken(); while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - // For each line - // if (streamTokenizer.ttype == '@') { - if (streamTokenizer.ttype == StreamTokenizer.TT_WORD - && streamTokenizer.sval.startsWith("@") == true) { - // streamTokenizer.nextToken(); + //For each line + //if (streamTokenizer.ttype == '@') { + if (streamTokenizer.ttype == StreamTokenizer.TT_WORD && streamTokenizer.sval.startsWith("@") == true) { + //streamTokenizer.nextToken(); String token = streamTokenizer.sval.toUpperCase(); if (token.startsWith("@RELATION")) { streamTokenizer.nextToken(); relation = streamTokenizer.sval; - // System.out.println("RELATION " + relation); + // System.out.println("RELATION " + relation); } else if (token.startsWith("@ATTRIBUTE")) { streamTokenizer.nextToken(); String name = streamTokenizer.sval; - // System.out.println("* " + name); + //System.out.println("* " + name); if (name == null) { name = Double.toString(streamTokenizer.nval); } @@ -320,81 +400,95 @@ public class ArffLoader implements Loader { String type = streamTokenizer.sval; // System.out.println("* " + name + ":" + type + " "); if (streamTokenizer.ttype == '{') { - parseDoubleBrackests(name); - } else if (streamTokenizer.ttype == 10) {//for the buggy non-formal input arff file streamTokenizer.nextToken(); - if (streamTokenizer.ttype == '{') { - parseDoubleBrackests(name); + List<String> attributeLabels = new ArrayList<String>(); + while (streamTokenizer.ttype != '}') { + + if (streamTokenizer.sval != null) { + attributeLabels.add(streamTokenizer.sval); + // System.out.print(streamTokenizer.sval + ","); + } else { + attributeLabels.add(Double.toString(streamTokenizer.nval)); + //System.out.print(streamTokenizer.nval + ","); + } + + streamTokenizer.nextToken(); } + // System.out.println(); + //attributes.add(new Attribute(name, attributeLabels)); + //commented JD + /* if (this.range.isInRange(numAttribute)) { + outputAttributes.add(new Attribute(name, attributeLabels)); + } else { + inputAttributes.add(new Attribute(name, attributeLabels)); + }*/ + auxAttributes.add(new Attribute(name, attributeLabels)); + ++numAttributes; } else { // Add attribute - attributes.add(new Attribute(name)); + //commented JD + /*if (this.range.isInRange(numAttribute)) { + outputAttributes.add(new Attribute(name)); + } else { + inputAttributes.add(new Attribute(name)); + }*/ + auxAttributes.add(new Attribute(name)); + ++numAttributes; } } else if (token.startsWith("@DATA")) { - // System.out.print("END"); + //System.out.print("END"); streamTokenizer.nextToken(); break; } } streamTokenizer.nextToken(); } + if (range != null) { + this.range.setUpper(numAttributes); + } + /*if (range==null) //is single-target. All instances should go to inputAtrributes (see setClassIndex(int) from InstanceInformation ) + inputAttributes=auxAttributes; + else//is multi-target + { + this.range.setUpper(numAttribute); + for (int i=0; i<auxAttributes.size();i++) + { + //if (this.range.isInRange(i)) + // outputAttributes.add(auxAttributes.get(i)); + //else + inputAttributes.add(auxAttributes.get(i)); + + } + }*/ } catch (IOException ex) { Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); } - return new InstanceInformation(relation, attributes); + // this.range.setUpper(inputAttributes.size()+outputAttributes.size()); + return new InstanceInformation(relation, auxAttributes); } - private void parseDoubleBrackests(String name) throws IOException { - - streamTokenizer.nextToken(); - List<String> attributeLabels = new ArrayList<String>(); - while (streamTokenizer.ttype != '}') { - - if (streamTokenizer.sval != null) { - attributeLabels.add(streamTokenizer.sval); - // System.out.print(streamTokenizer.sval + ","); - } else { - attributeLabels.add(Double.toString(streamTokenizer.nval)); - // System.out.print(streamTokenizer.nval + ","); - } - - streamTokenizer.nextToken(); - } - // System.out.println(); - attributes.add(new Attribute(name, attributeLabels)); - + protected Instance newSparseInstance(double d, double[] res) { + Instance inst = new SparseInstance(d, res); //is it dense? + //inst.setInstanceInformation(this.instanceInformation); + return inst; } - private void initStreamTokenizer(Reader reader) { - BufferedReader br = new BufferedReader(reader); - - // Init streamTokenizer - streamTokenizer = new StreamTokenizer(br); - - streamTokenizer.resetSyntax(); - streamTokenizer.whitespaceChars(0, ' '); - streamTokenizer.wordChars(' ' + 1, '\u00FF'); - streamTokenizer.whitespaceChars(',', ','); - streamTokenizer.commentChar('%'); - streamTokenizer.quoteChar('"'); - streamTokenizer.quoteChar('\''); - streamTokenizer.ordinaryChar('{'); - streamTokenizer.ordinaryChar('}'); - streamTokenizer.eolIsSignificant(true); + protected Instance newSparseInstance(double d) { + Instance inst = new SparseInstance(d); + //inst.setInstanceInformation(this.instanceInformation); + return inst; + } - this.instanceInformation = this.getHeader(); - if (classAttribute < 0) { - this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1); - // System.out.print(this.instanceInformation.classIndex()); - } else if (classAttribute > 0) { - this.instanceInformation.setClassIndex(classAttribute - 1); - } + protected Instance newDenseInstance(int numberAttributes) { + Instance inst = new DenseInstance(numberAttributes); + //inst.setInstanceInformation(this.instanceInformation); + return inst; } - @Override - public Instance readInstance() { - return readInstance(this.reader); + private void setClassValue(Instance instance, double valueAttribute) { + instance.setValue(this.instanceInformation.classIndex(), valueAttribute); } + } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java index f14e6c3..128ace7 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java @@ -1,7 +1,3 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ package org.apache.samoa.instances; /* @@ -28,53 +24,79 @@ import java.io.Serializable; import java.text.SimpleDateFormat; import java.util.*; -/** - * @author abifet - */ public class Attribute implements Serializable { - public static final String ARFF_ATTRIBUTE = "@attribute"; - public static final String ARFF_ATTRIBUTE_NUMERIC = "numeric"; - public static final String ARFF_ATTRIBUTE_NOMINAL = "nominal"; - public static final String ARFF_ATTRIBUTE_DATE = "date"; + /** The keyword used to denote the start of an arff attribute declaration */ + public final static String ARFF_ATTRIBUTE = "@attribute"; + + /** A keyword used to denote a numeric attribute */ + public final static String ARFF_ATTRIBUTE_INTEGER = "integer"; + + /** A keyword used to denote a numeric attribute */ + public final static String ARFF_ATTRIBUTE_REAL = "real"; + + /** A keyword used to denote a numeric attribute */ + public final static String ARFF_ATTRIBUTE_NUMERIC = "numeric"; + + /** The keyword used to denote a string attribute */ + public final static String ARFF_ATTRIBUTE_STRING = "string"; + + /** The keyword used to denote a date attribute */ + public final static String ARFF_ATTRIBUTE_DATE = "date"; + + /** The keyword used to denote a relation-valued attribute */ + public final static String ARFF_ATTRIBUTE_RELATIONAL = "relational"; + + /** The keyword used to denote the end of the declaration of a subrelation */ + public final static String ARFF_END_SUBRELATION = "@end"; + + /** Strings longer than this will be stored compressed. */ + private static final int STRING_COMPRESS_THRESHOLD = 200; /** - * + * The is nominal. */ protected boolean isNominal; + /** - * + * The is numeric. */ protected boolean isNumeric; + /** - * + * The is date. */ protected boolean isDate; + + /** + * Date format specification for date attributes + */ + protected SimpleDateFormat m_DateFormat; + /** - * + * The name. */ protected String name; + /** - * + * The attribute values. */ protected List<String> attributeValues; /** + * Gets the attribute values. * - * @return + * @return the attribute values */ public List<String> getAttributeValues() { return attributeValues; } - /** - * - */ - protected int index; /** + * Instantiates a new attribute. * - * @param string + * @param string the string */ public Attribute(String string) { this.name = string; @@ -82,9 +104,10 @@ public class Attribute implements Serializable { } /** + * Instantiates a new attribute. * - * @param attributeName - * @param attributeValues + * @param attributeName the attribute name + * @param attributeValues the attribute values */ public Attribute(String attributeName, List<String> attributeValues) { this.name = attributeName; @@ -93,48 +116,71 @@ public class Attribute implements Serializable { } /** + * Instantiates a new attribute. * + * @param attributeName the attribute name + * @param dateFormat the format of the date used + */ + public Attribute(String attributeName, String dateFormat) { + this.name = attributeName; + this.valuesStringAttribute = null; + this.isDate = true; + + if (dateFormat != null) { + m_DateFormat = new SimpleDateFormat(dateFormat); + } else { + m_DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); + } + } + + /** + * Instantiates a new attribute. */ public Attribute() { this(""); } /** + * Checks if is nominal. * - * @return + * @return true, if is nominal */ public boolean isNominal() { return this.isNominal; } /** + * Name. * - * @return + * @return the string */ public String name() { return this.name; } /** + * Value. * - * @param value - * @return + * @param value the value + * @return the string */ public String value(int value) { return attributeValues.get(value); } /** + * Checks if is numeric. * - * @return + * @return true, if is numeric */ public boolean isNumeric() { return isNumeric; } /** + * Num values. * - * @return + * @return the int */ public int numValues() { if (isNumeric()) { @@ -145,28 +191,43 @@ public class Attribute implements Serializable { } /** + * Index. * - * @return + * @return the int */ - public int index() { // RuleClassifier - return this.index; - } + // public int index() { //RuleClassifier + // return this.index; + // } + /** + * Format date. + * + * @param value the value + * @return the string + */ String formatDate(double value) { - SimpleDateFormat sdf = new SimpleDateFormat(); - return sdf.format(new Date((long) value)); + return this.m_DateFormat.format(new Date((long) value)); } + /** + * Checks if is date. + * + * @return true, if is date + */ boolean isDate() { return isDate; } + /** + * The values string attribute. + */ private Map<String, Integer> valuesStringAttribute; /** + * Index of value. * - * @param value - * @return + * @param value the value + * @return the int */ public final int indexOfValue(String value) { @@ -189,13 +250,20 @@ public class Attribute implements Serializable { } } - @Override - public String toString() { + /** + * Returns a description of this attribute in ARFF format. Quotes + * strings if they contain whitespace characters, or if they + * are a question mark. + * + * @return a description of this attribute as a string + */ + public final String toString() { + StringBuffer text = new StringBuffer(); - text.append(ARFF_ATTRIBUTE).append(" ").append(Utils.quote(this.name)).append(" "); + text.append(ARFF_ATTRIBUTE).append(" ").append(Utils.quote(this.name())).append(" "); - if (isNominal) { + if (this.isNominal){ text.append('{'); Enumeration enu = enumerateValues(); while (enu.hasMoreElements()) { @@ -204,10 +272,12 @@ public class Attribute implements Serializable { text.append(','); } text.append('}'); - } else if (isNumeric) { + } else if (this.isNumeric){ text.append(ARFF_ATTRIBUTE_NUMERIC); - } else if (isDate) { - text.append(ARFF_ATTRIBUTE_DATE); + } else if (this.isDate){ + text.append(ARFF_ATTRIBUTE_DATE).append(" ").append(Utils.quote(m_DateFormat.toPattern())); + } else{ + text.append("UNKNOW"); } return text.toString(); http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java new file mode 100644 index 0000000..58ece8e --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java @@ -0,0 +1,152 @@ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +public class AttributesInformation implements Serializable { + + /** + * The attribute information. + */ + protected List<Attribute> attributes; + protected List<Integer> indexValues; + /** + * The number of attributes. + */ + protected int numberAttributes; + + /** + * The attribute used for default for numerical values + */ + protected Attribute defaultNumericAttribute; + + public AttributesInformation(AttributesInformation chunk) { + this.attributes = chunk.attributes; + this.indexValues = chunk.indexValues; + this.numberAttributes = chunk.numberAttributes; + } + + public AttributesInformation(List<Attribute> v, List<Integer> i, int numberAttributes) { + this.attributes = v; + this.indexValues = i; + this.numberAttributes = numberAttributes; + } + + public AttributesInformation(List<Attribute> v, int numberAttributes) { + this.attributes = v; + this.indexValues = new ArrayList<Integer>(numberAttributes); + for (int i = 0; i < numberAttributes; i++) { + this.indexValues.add(i); + } + this.numberAttributes = numberAttributes; + } + + public AttributesInformation() { + this.attributes = null; + this.indexValues = null; + this.numberAttributes = 0; + this.defaultNumericAttribute = null; + } + + /** + * Attribute. + * + * @param indexAttribute the index Attribute + * @return the attribute + */ + public Attribute attribute(int indexAttribute) { + if (this.attributes == null) { + //All attributes are numeric + return defaultNumericAttribute(); + } + int location = locateIndex(indexAttribute); + if (location == -1) { + //if there is not attribute information, it is numeric + return defaultNumericAttribute(); + } + return attributes.get(location); + } + + public void add(Attribute attribute, int value) { + this.attributes.add(attribute); + this.indexValues.add(value); + } + + /** + * Sets the attribute information. + * + * @param v the new attribute information + */ + public void setAttributes(List<Attribute> v) { + this.attributes = v; + this.numberAttributes=v.size(); + } + + /** + * Locates the greatest index that is not greater than the given index. + * + * @return the internal index of the attribute index. Returns -1 if no index + * with this property could be found + */ + public int locateIndex(int index) { + + int min = 0; + int max = this.indexValues.size() - 1; + + if (max == -1) { + return -1; + } + + // Binary search + while ((this.indexValues.get(min) <= index) && (this.indexValues.get(max) >= index)) { + int current = (max + min) / 2; + if (this.indexValues.get(current) > index) { + max = current - 1; + } else if (this.indexValues.get(current) < index) { + min = current + 1; + } else { + return current; + } + } + if (this.indexValues.get(max) < index) { + return max; + } else { + return min - 1; + } + } + + private Attribute defaultNumericAttribute() { + if (this.defaultNumericAttribute == null) { + this.defaultNumericAttribute = new Attribute("default"); + } + return this.defaultNumericAttribute; + } + + public void setAttributes(List<Attribute> v, List<Integer> indexValues) { + this.attributes = v; + this.numberAttributes=v.size(); + this.indexValues=indexValues; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java index 5c57aa1..84b7eec 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java @@ -29,9 +29,6 @@ import org.apache.avro.generic.GenericRecord; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** - * Load Data from Binary Avro Stream and parse to corresponding Dense & Parse Instances - */ public class AvroBinaryLoader extends AvroLoader { private static final long serialVersionUID = 1L; http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java index b765405..622347a 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java @@ -34,9 +34,6 @@ import org.apache.avro.io.DecoderFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -/** - * Load Data from JSON Avro Stream and parse to corresponding Dense & Parse Instances - */ public class AvroJsonLoader extends AvroLoader { private static final long serialVersionUID = 1L; http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java index 2b36744..d3e7f27 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java @@ -31,11 +31,6 @@ import org.apache.avro.generic.GenericData.EnumSymbol; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.DatumReader; -/** - * Load Data from Avro Stream and parse to corresponding Dense & Parse Instances Abstract Class: Subclass this class for - * different types of Avro Encodings - * - */ public abstract class AvroLoader implements Loader { private static final long serialVersionUID = 1L; @@ -133,11 +128,7 @@ public abstract class AvroLoader implements Loader { */ private void setDenseValue(Instance instance, int numAttribute, double valueAttribute) { - - if (this.instanceInformation.classIndex() == numAttribute) - instance.setClassValue(valueAttribute); - else - instance.setValue(numAttribute, valueAttribute); + instance.setValue(numAttribute, valueAttribute); } /** @@ -267,16 +258,16 @@ public abstract class AvroLoader implements Loader { private boolean isNumeric(Field field) { if (field.schema().getType() == Schema.Type.DOUBLE - || field.schema().getType() == Schema.Type.FLOAT - || field.schema().getType() == Schema.Type.LONG - || field.schema().getType() == Schema.Type.INT) + || field.schema().getType() == Schema.Type.FLOAT + || field.schema().getType() == Schema.Type.LONG + || field.schema().getType() == Schema.Type.INT) return true; if (field.schema().getType() == Schema.Type.UNION) { for (Schema schema: field.schema().getTypes()) { if (schema.getType() == Schema.Type.DOUBLE - || schema.getType() == Schema.Type.FLOAT - || schema.getType() == Schema.Type.LONG - || schema.getType() == Schema.Type.INT) + || schema.getType() == Schema.Type.FLOAT + || schema.getType() == Schema.Type.LONG + || schema.getType() == Schema.Type.INT) return true; } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java index 80feb11..d8a789b 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java @@ -1,7 +1,3 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ package org.apache.samoa.instances; /* @@ -24,65 +20,42 @@ package org.apache.samoa.instances; * #L% */ -import java.text.SimpleDateFormat; - -/** - * @author abifet - */ -public class DenseInstance extends SingleLabelInstance { - - private static final long serialVersionUID = 280360594027716737L; - - public DenseInstance() { - // necessary for kryo serializer - } +public class DenseInstance extends InstanceImpl { + /** + * Instantiates a new dense instance. + * + * @param weight the weight + * @param res the res + */ public DenseInstance(double weight, double[] res) { super(weight, res); } - public DenseInstance(SingleLabelInstance inst) { + /** + * Instantiates a new dense instance. + * + * @param inst the inst + */ + public DenseInstance(InstanceImpl inst) { super(inst); } + /** + * Instantiates a new dense instance. + * + * @param inst the inst + */ public DenseInstance(Instance inst) { - super((SingleLabelInstance) inst); + super((InstanceImpl) inst); } + /** + * Instantiates a new dense instance. + * + * @param numberAttributes the number attributes + */ public DenseInstance(double numberAttributes) { super((int) numberAttributes); - // super(1, new double[(int) numberAttributes-1]); - // Add missing values - // for (int i = 0; i < numberAttributes-1; i++) { - // //this.setValue(i, Double.NaN); - // } - - } - - @Override - public String toString() { - StringBuffer text = new StringBuffer(); - - //append all attributes except the class attribute. - for (int attIndex = 0; attIndex < this.numAttributes()-1; attIndex++) { - if (!this.isMissing(attIndex)) { - if (this.attribute(attIndex).isNominal()) { - int valueIndex = (int) this.value(attIndex); - String stringValue = this.attribute(attIndex).value(valueIndex); - text.append(stringValue).append(","); - } else if (this.attribute(attIndex).isNumeric()) { - text.append(this.value(attIndex)).append(","); - } else if (this.attribute(attIndex).isDate()) { - SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); - text.append(dateFormatter.format(this.value(attIndex))).append(","); - } - } else { - text.append("?,"); - } - } - //append the class value at the end of the instance. - text.append(this.classAttribute().value((int)classValue())); - - return text.toString(); } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java index ecb2f88..6781e91 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java @@ -1,7 +1,3 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ package org.apache.samoa.instances; /* @@ -24,74 +20,151 @@ package org.apache.samoa.instances; * #L% */ -/** - * - * @author abifet - */ -public class DenseInstanceData implements InstanceData { +public class DenseInstanceData implements InstanceData{ + /** + * Instantiates a new dense instance data. + * + * @param array the array + */ public DenseInstanceData(double[] array) { this.attributeValues = array; } + /** + * Instantiates a new dense instance data. + * + * @param length the length + */ public DenseInstanceData(int length) { this.attributeValues = new double[length]; } + /** + * Instantiates a new dense instance data. + */ public DenseInstanceData() { this(0); } + /** The attribute values. */ protected double[] attributeValues; + /** + * Num attributes. + * + * @return the int + */ @Override public int numAttributes() { return this.attributeValues.length; } + /** + * Value. + * + * @param indexAttribute the index attribute + * @return the double + */ @Override public double value(int indexAttribute) { - return this.attributeValues[indexAttribute]; + if (this.attributeValues.length <= indexAttribute) + return this.attributeValues[this.attributeValues.length-1]; + + return this.attributeValues[indexAttribute]; } + /** + * Checks if is missing. + * + * @param indexAttribute the index attribute + * @return true, if is missing + */ @Override public boolean isMissing(int indexAttribute) { return Double.isNaN(this.value(indexAttribute)); } + /** + * Num values. + * + * @return the int + */ @Override public int numValues() { return numAttributes(); } + /** + * Index. + * + * @param indexAttribute the index attribute + * @return the int + */ @Override public int index(int indexAttribute) { return indexAttribute; } + /** + * Value sparse. + * + * @param indexAttribute the index attribute + * @return the double + */ @Override public double valueSparse(int indexAttribute) { return value(indexAttribute); } + /** + * Checks if is missing sparse. + * + * @param indexAttribute the index attribute + * @return true, if is missing sparse + */ @Override public boolean isMissingSparse(int indexAttribute) { return isMissing(indexAttribute); } - /* - * @Override public double value(Attribute attribute) { return - * value(attribute.index()); } + /** + * To double array. + * + * @return the double[] */ - @Override public double[] toDoubleArray() { - return attributeValues.clone(); + return attributeValues; } + /** + * Sets the value. + * + * @param attributeIndex the attribute index + * @param d the d + */ @Override public void setValue(int attributeIndex, double d) { this.attributeValues[attributeIndex] = d; } + @Override + public void deleteAttributeAt(int index) { + + double[] newValues = new double[attributeValues.length - 1]; + + System.arraycopy(attributeValues, 0, newValues, 0, index); + if (index < attributeValues.length - 1) { + System.arraycopy(attributeValues, index + 1, newValues, index, + attributeValues.length - (index + 1)); + } + attributeValues = newValues; + } + + @Override + public InstanceData copy() { + return new DenseInstanceData(this.attributeValues); + } + } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java index ee99914..da4dcdd 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java @@ -1,7 +1,3 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ package org.apache.samoa.instances; /* @@ -26,68 +22,310 @@ package org.apache.samoa.instances; import java.io.Serializable; -/** - * - * @author abifet - */ - public interface Instance extends Serializable { - double weight(); + /** + * Gets the weight of the instance. + * + * @return the weight + */ + public double weight(); - void setWeight(double weight); + /** + * Sets the weight. + * + * @param weight the new weight + */ + public void setWeight(double weight); - // Attributes - Attribute attribute(int instAttIndex); + /** + * Attribute. + * + * @param instAttIndex the inst att index + * @return the attribute + */ + public Attribute attribute(int instAttIndex); - void deleteAttributeAt(int i); + /** + * Delete attribute at. + * + * @param i the index + */ + public void deleteAttributeAt(int i); - void insertAttributeAt(int i); + /** + * Insert attribute at. + * + * @param i the index + */ + public void insertAttributeAt(int i); - int numAttributes(); + /** + * Gets the number of attributes. + * + * @return the number of attributes + */ + public int numAttributes(); + /** + * Adds the sparse values. + * + * @param indexValues the index values + * @param attributeValues the attribute values + * @param numberAttributes the number attributes + */ public void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes); - // Values - int numValues(); + /** + * Gets the number of values, mainly for sparse instances. + * + * @return the number of values + */ + public int numValues(); + + /** + * Gets the value of a discrete attribute as a string. + * + * @param i the i + * @return the string + */ + public String stringValue(int i); + + /** + * Gets the value of an attribute. + * + * @param instAttIndex the inst att index + * @return the double + */ + public double value(int instAttIndex); + + /** + * Sets an attribute as missing + * + * @param instAttIndex, the attribute's index + */ + public void setMissing(int instAttIndex); + + /** + * Sets the value of an attribute. + * + * @param instAttIndex the index + * @param value the value + */ + public void setValue(int instAttIndex, double value); + + /** + * Checks if an attribute is missing. + * + * @param instAttIndex the inst att index + * @return true, if is missing + */ + public boolean isMissing(int instAttIndex); + + /** + * Gets the index of the attribute given the index of the array in a sparse + * representation. + * + * @param arrayIndex the index of the array + * @return the index + */ + public int index(int arrayIndex); + + /** + * Gets the value of an attribute in a sparse representation of the + * instance. + * + * @param i the i + * @return the value + */ + public double valueSparse(int i); + + /** + * Checks if the attribute is missing sparse. + * + * @param p1 the p1 + * @return true, if is missing sparse + */ + public boolean isMissingSparse(int p1); + + /** + * To double array. + * + * @return the double[] + */ + public double[] toDoubleArray(); + + /** + * Class attribute. + * + * @return the attribute + */ + public Attribute classAttribute(); + + /** + * Class index. + * + * @return the int + */ + public int classIndex(); + + /** + * Class is missing. + * + * @return true, if successful + */ + public boolean classIsMissing(); + + /** + * Class value. + * + * @return the double + */ + public double classValue(); + + /** + * Num classes. + * + * @return the int + */ + public int numClasses(); + + /** + * Sets the class value. + * + * @param d the new class value + */ + public void setClassValue(double d); + + /** + * Copy. + * + * @return the instance + */ + public Instance copy(); - String stringValue(int i); + /** + * Sets the dataset. + * + * @param dataset the new dataset + */ + public void setDataset(Instances dataset); - double value(int instAttIndex); + /** + * Dataset. + * + * @return the instances + */ + public Instances dataset(); - double value(Attribute attribute); + /** + * Gets the number of input attributes. + * + * @return the number of input attributes + */ + public int numInputAttributes(); - void setValue(int m_numAttributes, double d); + /** + * Gets the number of output attributes. + * + * @return the number of output attributes + */ + public int numOutputAttributes(); - boolean isMissing(int instAttIndex); + /** + * Gets the number of output attributes. + * + * @return the number of output attributes + */ + public int numberOutputTargets(); - int index(int i); + /** + * Gets the value of an output attribute. + * + * @param attributeIndex the index + * @return the value + */ + public double classValue(int attributeIndex); - double valueSparse(int i); + /** + * Sets the value of an output attribute. + * + * @param indexClass the output attribute index + * @param valueAttribute the value of the attribute + */ + public void setClassValue(int indexClass, double valueAttribute); - boolean isMissingSparse(int p1); + /** + * Gets an output attribute given its index. + * + * @param attributeIndex the index + * @return the attribute + */ + public Attribute outputAttribute(int attributeIndex); - double[] toDoubleArray(); + /** + * Gets an input attribute given its index. + * + * @param attributeIndex the index + * @return the attribute + */ + public Attribute inputAttribute(int attributeIndex); - // Class - Attribute classAttribute(); + /** + * Gets the value of an input attribute. + * + * @param attributeIndex the index + * @return the value + */ + public double valueInputAttribute(int attributeIndex); - int classIndex(); + /** + * Gets the value of an output attribute. + * + * @param attributeIndex the index + * @return the value + */ + public double valueOutputAttribute(int attributeIndex); - boolean classIsMissing(); + /** + * Index of an Attribute. + * + * @param attribute, the attribute to be found. + * @return the index of an attribute + */ + public int indexOfAttribute(Attribute attribute); - double classValue(); + /** + * Gets the value of an attribute, given the attribute. + * + * @param attribute the attribute + * @return the double + */ + public double value(Attribute attribute); - int numClasses(); + /** + * Sets an attribute as missing + * + * @param attribute, the Attribute + */ + public void setMissing(Attribute attribute); - void setClassValue(double d); + /** + * Sets the value of an attribute. + * + * @param attribute, the Attribute + * @param value the value + */ + public void setValue(Attribute attribute, double value); - Instance copy(); + /** + * Checks if an attribute is missing. + * + * @param attribute, the Attribute + * @return true, if is missing + */ + public boolean isMissing(Attribute attribute); - // Dataset - void setDataset(Instances dataset); - Instances dataset(); - String toString(); } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java index eca4145..b735ea5 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java @@ -1,7 +1,3 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ package org.apache.samoa.instances; /* @@ -26,30 +22,90 @@ package org.apache.samoa.instances; import java.io.Serializable; -/** - * - * @author abifet - */ public interface InstanceData extends Serializable { + /** + * Num attributes. + * + * @return the int + */ public int numAttributes(); + /** + * Value. + * + * @param instAttIndex the inst att index + * @return the double + */ public double value(int instAttIndex); + /** + * Checks if is missing. + * + * @param instAttIndex the inst att index + * @return true, if is missing + */ public boolean isMissing(int instAttIndex); + /** + * Num values. + * + * @return the int + */ public int numValues(); + /** + * Index. + * + * @param i the i + * @return the int + */ public int index(int i); + /** + * Value sparse. + * + * @param i the i + * @return the double + */ public double valueSparse(int i); + /** + * Checks if is missing sparse. + * + * @param p1 the p1 + * @return true, if is missing sparse + */ public boolean isMissingSparse(int p1); - // public double value(Attribute attribute); - + /** + * To double array. + * + * @return the double[] + */ public double[] toDoubleArray(); + /** + * Sets the value. + * + * @param m_numAttributes the m_num attributes + * @param d the d + */ public void setValue(int m_numAttributes, double d); + + /** + * Deletes an attribute. + * + * @param index the indes + */ + public void deleteAttributeAt(int index); + + /** + * Produces a shallow copy of this instance data. + * + * @return the shallow copy + */ + public InstanceData copy(); + } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java new file mode 100644 index 0000000..ff77dc2 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java @@ -0,0 +1,483 @@ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.text.SimpleDateFormat; + +public class InstanceImpl implements MultiLabelInstance { + + /** + * The weight. + */ + protected double weight; + + /** + * The instance data. + */ + protected InstanceData instanceData; + + /** + * The instance information. + */ + protected InstancesHeader instanceHeader; + + /** + * Instantiates a new instance. + * + * @param inst the inst + */ + public InstanceImpl(InstanceImpl inst) { + this.weight = inst.weight; + this.instanceData = inst.instanceData; + this.instanceHeader = inst.instanceHeader; + } + + //Dense + /** + * Instantiates a new instance. + * + * @param weight the weight + * @param res the res + */ + public InstanceImpl(double weight, double[] res) { + this.weight = weight; + this.instanceData = new DenseInstanceData(res); + } + + //Sparse + /** + * Instantiates a new instance. + * + * @param weight the weight + * @param attributeValues the attribute values + * @param indexValues the index values + * @param numberAttributes the number attributes + */ + public InstanceImpl(double weight, double[] attributeValues, int[] indexValues, int numberAttributes) { + this.weight = weight; + this.instanceData = new SparseInstanceData(attributeValues, indexValues, numberAttributes); + } + + /** + * Instantiates a new instance. + * + * @param weight the weight + * @param instanceData the instance data + */ + public InstanceImpl(double weight, InstanceData instanceData) { + this.weight = weight; + this.instanceData = instanceData; + } + + /** + * Instantiates a new instance. + * + * @param numAttributes the num attributes + */ + public InstanceImpl(int numAttributes) { + this.instanceData = new DenseInstanceData(new double[numAttributes]); //JD + this.weight = 1; + } + + /** + * Weight. + * + * @return the double + */ + @Override + public double weight() { + return weight; + } + + /** + * Sets the weight. + * + * @param weight the new weight + */ + @Override + public void setWeight(double weight) { + this.weight = weight; + } + + /** + * Attribute. + * + * @param instAttIndex the inst att index + * @return the attribute + */ + @Override + public Attribute attribute(int instAttIndex) { + return this.instanceHeader.attribute(instAttIndex); + } + + public int indexOfAttribute(Attribute attribute){ + return this.instanceHeader.indexOf(attribute); + } + + /** + * Delete attribute at. + * + * @param i the i + */ + @Override + public void deleteAttributeAt(int i) { + this.instanceData.deleteAttributeAt(i); + } + + /** + * Insert attribute at. + * + * @param i the i + */ + @Override + public void insertAttributeAt(int i) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + /** + * Num attributes. + * + * @return the int + */ + @Override + public int numAttributes() { + return this.instanceData.numAttributes(); + } + + /** + * Value. + * + * @param instAttIndex the inst att index + * @return the double + */ + @Override + public double value(int instAttIndex) { + return this.instanceData.value(instAttIndex); + } + + /** + * Checks if is missing. + * + * @param instAttIndex the inst att index + * @return true, if is missing + */ + @Override + public boolean isMissing(int instAttIndex) { + return this.instanceData.isMissing(instAttIndex); + } + + /** + * Num values. + * + * @return the int + */ + @Override + public int numValues() { + return this.instanceData.numValues(); + } + + /** + * Index. + * + * @param i the i + * @return the int + */ + @Override + public int index(int i) { + return this.instanceData.index(i); + } + + /** + * Value sparse. + * + * @param i the i + * @return the double + */ + @Override + public double valueSparse(int i) { + return this.instanceData.valueSparse(i); + } + + /** + * Checks if is missing sparse. + * + * @param p the p + * @return true, if is missing sparse + */ + @Override + public boolean isMissingSparse(int p) { + return this.instanceData.isMissingSparse(p); + } + + /** + * String value. + * + * @param i the i + * @return the string + */ + @Override + public String stringValue(int i) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + /** + * To double array. + * + * @return the double[] + */ + @Override + public double[] toDoubleArray() { + return this.instanceData.toDoubleArray(); + } + + /** + * Sets the value. + * + * @param numAttribute the num attribute + * @param d the d + */ + @Override + public void setValue(int numAttribute, double d) { + this.instanceData.setValue(numAttribute, d); + } + + /** + * Class value. + * + * @return the double + */ + @Override + public double classValue() { + return this.instanceData.value(classIndex()); + } + + /** + * Class index. + * + * @return the int + */ + @Override + public int classIndex() { + int classIndex = instanceHeader.classIndex(); + //return classIndex != Integer.MAX_VALUE ? classIndex : 0; + // return ? classIndex : 0; + if(classIndex == Integer.MAX_VALUE) + if(this.instanceHeader.instanceInformation.range!=null) + classIndex=instanceHeader.instanceInformation.range.getStart(); + else + classIndex=0; + return classIndex; + } + + /** + * Num classes. + * + * @return the int + */ + @Override + public int numClasses() { + return this.instanceHeader.numClasses(); + } + + /** + * Class is missing. + * + * @return true, if successful + */ + @Override + public boolean classIsMissing() { + return this.instanceData.isMissing(classIndex()); + } + + /** + * Class attribute. + * + * @return the attribute + */ + @Override + public Attribute classAttribute() { + return this.instanceHeader.attribute(classIndex()); + } + + /** + * Sets the class value. + * + * @param d the new class value + */ + @Override + public void setClassValue(double d) { + this.setValue(classIndex(), d); + } + + /** + * Copy. + * + * @return the instance + */ + @Override + public Instance copy() { + InstanceImpl inst = new InstanceImpl(this); + return inst; + } + + /** + * Dataset. + * + * @return the instances + */ + @Override + public Instances dataset() { + return this.instanceHeader; + } + + /** + * Sets the dataset. + * + * @param dataset the new dataset + */ + @Override + public void setDataset(Instances dataset) { + this.instanceHeader = new InstancesHeader(dataset); + } + + /** + * Adds the sparse values. + * + * @param indexValues the index values + * @param attributeValues the attribute values + * @param numberAttributes the number attributes + */ + @Override + public void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes) { + this.instanceData = new SparseInstanceData(attributeValues, indexValues, numberAttributes); //??? + } + + /** + * Text representation of a InstanceImpl. + */ + @Override + public String toString() { + StringBuilder str = new StringBuilder(); + for (int attIndex = 0; attIndex < this.numAttributes(); attIndex++) { + if (!this.isMissing(attIndex)) { + if (this.attribute(attIndex).isNominal()) { + int valueIndex = (int) this.value(attIndex); + String stringValue = this.attribute(attIndex).value(valueIndex); + str.append(stringValue).append(","); + } else if (this.attribute(attIndex).isNumeric()) { + str.append(this.value(attIndex)).append(","); + } else if (this.attribute(attIndex).isDate()) { + SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); + str.append(dateFormatter.format(this.value(attIndex))).append(","); + } + } else { + str.append("?,"); + } + } + return str.toString(); + } + + @Override + public int numInputAttributes() { + return this.instanceHeader.numInputAttributes(); + } + + @Override + public int numOutputAttributes() { + return numberOutputTargets(); + } + + @Override + public int numberOutputTargets() { + return this.instanceHeader.numOutputAttributes(); + } + + @Override + public double classValue(int instAttIndex) { + return valueOutputAttribute(instAttIndex); + } + + @Override + public void setClassValue(int indexClass, double valueAttribute) { + InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation(); + this.instanceData.setValue(instanceInformation.outputAttributeIndex(indexClass), valueAttribute); + + } + + @Override + public Attribute outputAttribute(int outputIndex) { + InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation(); + return instanceInformation.outputAttribute(outputIndex); + } + + @Override + public Attribute inputAttribute(int attributeIndex) { + InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation(); + return instanceInformation.inputAttribute(attributeIndex); + } + + @Override + public double valueInputAttribute(int attributeIndex) { + InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation(); + return this.instanceData.value(instanceInformation.inputAttributeIndex(attributeIndex)); + } + + @Override + public double valueOutputAttribute(int attributeIndex) { + InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation(); + return this.instanceData.value(instanceInformation.outputAttributeIndex(attributeIndex)); + } + + @Override + public void setMissing(int instAttIndex) { + this.setValue(instAttIndex, Double.NaN); + } + + /** + * Value. + * + * @param attribute the attribute + * @return the double + */ + @Override + public double value(Attribute attribute) { + int index = this.instanceHeader.indexOf(attribute); + return value(index); + } + + @Override + public void setMissing(Attribute attribute) { + int index = this.instanceHeader.indexOf(attribute); + this.setMissing(index); + } + + @Override + public boolean isMissing(Attribute attribute) { + int index = this.instanceHeader.indexOf(attribute); + return this.isMissing(index); + } + + @Override + public void setValue(Attribute attribute, double value) { + int index = this.instanceHeader.indexOf(attribute); + this.setValue(index, value); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/26c21912/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java index 639f065..cfd7f51 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java @@ -1,7 +1,3 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ package org.apache.samoa.instances; /* @@ -27,82 +23,194 @@ package org.apache.samoa.instances; import java.io.Serializable; import java.util.List; -/** - * - * @author abifet - */ public class InstanceInformation implements Serializable { - // Should we split Instances as a List of Instances, and InformationInstances - - /** The dataset's name. */ + /** + * The dataset's name. + */ protected String relationName; - /** The attribute information. */ - protected List<Attribute> attributes; + protected AttributesInformation attributesInformation; + + /** + * The class index. + */ + protected int classIndex = Integer.MAX_VALUE; //By default is multilabel - protected int classIndex; + /** + * Range for multi-label instances. + */ + protected Range range; + public Attribute inputAttribute(int w) { + return this.attributesInformation.attribute(inputAttributeIndex(w)); + } + + public Attribute outputAttribute(int w) { + return this.attributesInformation.attribute(outputAttributeIndex(w)); + } + + /** + * Instantiates a new instance information. + * + * @param chunk the chunk + */ public InstanceInformation(InstanceInformation chunk) { this.relationName = chunk.relationName; - this.attributes = chunk.attributes; + this.attributesInformation = chunk.attributesInformation; this.classIndex = chunk.classIndex; } - public InstanceInformation(String st, List<Attribute> v) { + /** + * Instantiates a new instance information. + * + * @param st the st + * @param v the v + */ + public InstanceInformation(String st, List<Attribute> input) { this.relationName = st; - this.attributes = v; + this.attributesInformation = new AttributesInformation(input, input.size()); } + /** + * Instantiates a new instance information. + */ public InstanceInformation() { this.relationName = null; - this.attributes = null; + this.attributesInformation = null; } - // Information Instances - + //Information Instances + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#setRelationName(java.lang.String) + */ public void setRelationName(String string) { this.relationName = string; } + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#getRelationName() + */ public String getRelationName() { return this.relationName; } + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#classIndex() + */ public int classIndex() { - return classIndex; + return this.classIndex; } + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#setClassIndex(int) + */ public void setClassIndex(int classIndex) { this.classIndex = classIndex; } + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#classAttribute() + */ public Attribute classAttribute() { return this.attribute(this.classIndex()); } + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#numAttributes() + */ public int numAttributes() { - return this.attributes.size(); + return this.attributesInformation.numberAttributes; } + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#attribute(int) + */ public Attribute attribute(int w) { - return this.attributes.get(w); + return this.attributesInformation.attribute(w); } + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#numClasses() + */ public int numClasses() { - return this.attributes.get(this.classIndex()).numValues(); + return this.attributesInformation.attribute(classIndex()).numValues(); } + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#deleteAttributeAt(java.lang.Integer) + */ public void deleteAttributeAt(Integer integer) { throw new UnsupportedOperationException("Not yet implemented"); } + /* (non-Javadoc) + * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#insertAttributeAt(com.yahoo.labs.samoa.instances.Attribute, int) + */ public void insertAttributeAt(Attribute attribute, int i) { throw new UnsupportedOperationException("Not yet implemented"); } public void setAttributes(List<Attribute> v) { - this.attributes = v; + if(this.attributesInformation==null) + this.attributesInformation= new AttributesInformation(); + this.attributesInformation.setAttributes(v); + } + + public int inputAttributeIndex(int index) { + int ret = 0; + if (classIndex == Integer.MAX_VALUE) {//Multi Label + if(index<range.getStart())//JD + ret= index; + else + ret= index+range.getSelectionLength(); + + } else { //Single Label + ret = classIndex() > index ? index : index + 1; + } + return ret; + } + + public int outputAttributeIndex(int attributeIndex) { + int ret = 0; + if (classIndex == Integer.MAX_VALUE) {//Multi Label + ret=attributeIndex+range.getStart(); //JD - Range should be a "block" + } else { //Single Label + ret = classIndex; + } + return ret; + } + + public int numInputAttributes() { + int ret = 0; + if (classIndex == Integer.MAX_VALUE) {//Multi Label + ret=this.numAttributes()-range.getSelectionLength(); //JD + } else { //Single Label + ret = this.numAttributes() - 1; + } + return ret; + } + + public int numOutputAttributes() { + int ret = 0; + if (classIndex == Integer.MAX_VALUE) {//Multi Label + ret=range.getSelectionLength(); //JD + } else { //Single Label + ret = 1; + } + return ret; + } + + public void setRangeOutputIndices(Range range) { + this.setClassIndex(Integer.MAX_VALUE); + this.range = range; + } + + public void setAttributes(List<Attribute> v, List<Integer> indexValues) { + if(this.attributesInformation==null) + this.attributesInformation= new AttributesInformation(); + this.attributesInformation.setAttributes(v,indexValues); + } }
