Repository: incubator-samoa Updated Branches: refs/heads/master a1cc23747 -> a92b303de
SAMOA-70: Fix bug in DenseInstance toString method which resulted in always writing an arff file with class equal to zero. fix #60 Project: http://git-wip-us.apache.org/repos/asf/incubator-samoa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-samoa/commit/a92b303d Tree: http://git-wip-us.apache.org/repos/asf/incubator-samoa/tree/a92b303d Diff: http://git-wip-us.apache.org/repos/asf/incubator-samoa/diff/a92b303d Branch: refs/heads/master Commit: a92b303de33d4e317bca75e8ec80f0869df1df47 Parents: a1cc237 Author: fobeligi <[email protected]> Authored: Sat May 20 17:24:21 2017 +0300 Committer: Gianmarco De Francisci Morales <[email protected]> Committed: Tue Jul 4 15:25:03 2017 +0300 ---------------------------------------------------------------------- .../org/apache/samoa/streams/TextGenerator.java | 2 +- .../org/apache/samoa/instances/Attribute.java | 34 +++++++++++++----- .../apache/samoa/instances/DenseInstance.java | 24 ++++++++++--- .../samoa/instances/SingleLabelInstance.java | 4 +-- .../apache/samoa/instances/SparseInstance.java | 36 ++++++++++++++++++++ 5 files changed, 84 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a92b303d/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java b/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java index 6af0df2..bdb3e73 100644 --- a/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java +++ b/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java @@ -109,7 +109,7 @@ public class TextGenerator extends AbstractOptionHandler implements InstanceStre } } while (votes[1] == votes[2]); - Instance inst = new DenseInstance(1.0, attVals); + Instance inst = new SparseInstance(1.0, attVals); inst.setDataset(getHeader()); inst.setClassValue((votes[1] > votes[2]) ? 0 : 1); this.countTweets++; http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a92b303d/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java index b1c9d8b..f14e6c3 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java @@ -26,10 +26,7 @@ package org.apache.samoa.instances; import java.io.Serializable; import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * @author abifet @@ -37,9 +34,9 @@ import java.util.Map; public class Attribute implements Serializable { public static final String ARFF_ATTRIBUTE = "@attribute"; - public static final String ARFF_ATTRIBUTE_NUMERIC = "NUMERIC"; - public static final String ARFF_ATTRIBUTE_NOMINAL = "NOMINAL"; - public static final String ARFF_ATTRIBUTE_DATE = "DATE"; + public static final String ARFF_ATTRIBUTE_NUMERIC = "numeric"; + public static final String ARFF_ATTRIBUTE_NOMINAL = "nominal"; + public static final String ARFF_ATTRIBUTE_DATE = "date"; /** * @@ -199,7 +196,14 @@ public class Attribute implements Serializable { text.append(ARFF_ATTRIBUTE).append(" ").append(Utils.quote(this.name)).append(" "); if (isNominal) { - text.append(ARFF_ATTRIBUTE_NOMINAL); + text.append('{'); + Enumeration enu = enumerateValues(); + while (enu.hasMoreElements()) { + text.append(Utils.quote((String) enu.nextElement())); + if (enu.hasMoreElements()) + text.append(','); + } + text.append('}'); } else if (isNumeric) { text.append(ARFF_ATTRIBUTE_NUMERIC); } else if (isDate) { @@ -208,4 +212,18 @@ public class Attribute implements Serializable { return text.toString(); } + + /** + * Returns an enumeration of all the attribute's values if the + * attribute is nominal, null otherwise. + * + * @return enumeration of all the attribute's values + */ + public final /*@ pure @*/ Enumeration enumerateValues() { + + if (this.isNominal()) { + return Collections.enumeration(this.attributeValues); + } + return null; + } } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a92b303d/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java index 1a1f5ce..80feb11 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java @@ -24,6 +24,8 @@ package org.apache.samoa.instances; * #L% */ +import java.text.SimpleDateFormat; + /** * @author abifet */ @@ -61,13 +63,25 @@ public class DenseInstance extends SingleLabelInstance { public String toString() { StringBuffer text = new StringBuffer(); - for (int i = 0; i < this.instanceData.numAttributes(); i++) { - if (i > 0) { - text.append(","); + //append all attributes except the class attribute. + for (int attIndex = 0; attIndex < this.numAttributes()-1; attIndex++) { + if (!this.isMissing(attIndex)) { + if (this.attribute(attIndex).isNominal()) { + int valueIndex = (int) this.value(attIndex); + String stringValue = this.attribute(attIndex).value(valueIndex); + text.append(stringValue).append(","); + } else if (this.attribute(attIndex).isNumeric()) { + text.append(this.value(attIndex)).append(","); + } else if (this.attribute(attIndex).isDate()) { + SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); + text.append(dateFormatter.format(this.value(attIndex))).append(","); + } + } else { + text.append("?,"); } - text.append(this.value(i)); } - text.append(",").append(this.weight()); + //append the class value at the end of the instance. + text.append(this.classAttribute().value((int)classValue())); return text.toString(); } http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a92b303d/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java index 4018d97..d69a0f5 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java @@ -212,13 +212,13 @@ public class SingleLabelInstance implements Instance { @Override public Attribute classAttribute() { - return this.instanceInformation.attribute(0); + //return the class attribute + return this.instanceInformation.attribute(classIndex()); } @Override public void setClassValue(double d) { this.classData.setValue(0, d); - // this.classValue = d; } @Override http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/a92b303d/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java index 54dd417..56dbc7f 100644 --- a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java @@ -24,6 +24,8 @@ package org.apache.samoa.instances; * #L% */ +import java.text.SimpleDateFormat; + /** * * @author abifet @@ -47,4 +49,38 @@ public class SparseInstance extends SingleLabelInstance { super(weight, attributeValues, indexValues, numberAttributes); } + @Override + public String toString() { + StringBuffer str = new StringBuffer(); + + str.append("{"); + + for (int i=0; i<this.numAttributes()-1;i++){ + if (!this.isMissing(i)) { + + //if the attribute is Nominal we print the string value of the attribute. + if (this.attribute(i).isNominal()) { + int valueIndex = (int) this.value(i); + String stringValue = this.attribute(i).value(valueIndex); + str.append(i).append(" ").append(stringValue).append(","); + } else if (this.attribute(i).isNumeric()) { + //if the attribute is numeric we print the value of the attribute only if it is not equal 0 + if (this.value(i) != 0) { + str.append(i).append(" ").append(this.value(i)).append(","); + } + } else if (this.attribute(i).isDate()) { + SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"); + str.append(i).append(" ").append(dateFormatter.format(this.value(i))).append(","); + } + } else { //represent missing values + str.append(i).append(" ").append("?,"); + } + } + //append the class value at the end of the instance. + str.append(classIndex()).append(" ").append(this.classAttribute().value((int)classValue())); + + str.append("}"); + + return str.toString(); + } }
