Author: adeneche
Date: Mon Oct 24 19:39:40 2011
New Revision: 1188332
URL: http://svn.apache.org/viewvc?rev=1188332&view=rev
Log:
MAHOUT-840 Instance.id removed
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Data.java Mon Oct
24 19:39:40 2011
@@ -305,7 +305,7 @@ public class Data implements Cloneable {
try {
int index = 0;
while (iterator.hasNext()) {
- labels[index++] = (int) converter.convert(0,
iterator.next()).get(labelId);
+ labels[index++] = (int)
converter.convert(iterator.next()).get(labelId);
}
} finally {
Closeables.closeQuietly(iterator);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataConverter.java
Mon Oct 24 19:39:40 2011
@@ -42,7 +42,7 @@ public class DataConverter {
this.dataset = dataset;
}
- public Instance convert(int id, CharSequence string) {
+ public Instance convert(CharSequence string) {
// all attributes (categorical, numerical, label), ignored
int nball = dataset.nbAttributes() + dataset.getIgnored().length;
@@ -83,10 +83,10 @@ public class DataConverter {
}
if (label == -1) {
- log.error("Label not found, instance id : {}, string : {}", id, string);
+ log.error("Label not found, instance string : {}", string);
throw new IllegalStateException("Label not found!");
}
- return new Instance(id, vector);
+ return new Instance(vector);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/DataLoader.java
Mon Oct 24 19:39:40 2011
@@ -54,15 +54,13 @@ public final class DataLoader {
/**
* Converts a comma-separated String to a Vector.
*
- * @param id
- * unique id for the current instance
* @param attrs
* attributes description
* @param values
* used to convert CATEGORICAL attribute values to Integer
* @return null if there are missing values '?'
*/
- private static Instance parseString(int id, Attribute[] attrs,
List<String>[] values, CharSequence string) {
+ private static Instance parseString(Attribute[] attrs, List<String>[]
values, CharSequence string) {
String[] tokens = COMMA_SPACE.split(string);
Preconditions.checkArgument(tokens.length == attrs.length, "Wrong number
of attributes in the string");
@@ -112,7 +110,7 @@ public final class DataLoader {
throw new IllegalStateException("Label not found!");
}
- return new Instance(id, vector);
+ return new Instance(vector);
}
/**
@@ -141,7 +139,7 @@ public final class DataLoader {
continue;
}
- Instance instance = converter.convert(instances.size(), line);
+ Instance instance = converter.convert(line);
if (instance == null) {
// missing values found
log.warn("{}: missing values", instances.size());
@@ -170,7 +168,7 @@ public final class DataLoader {
continue;
}
- Instance instance = converter.convert(instances.size(), line);
+ Instance instance = converter.convert(line);
if (instance == null) {
// missing values found
log.warn("{}: missing values", instances.size());
@@ -205,21 +203,21 @@ public final class DataLoader {
// used to convert CATEGORICAL attribute to Integer
List<String>[] values = new List[attrs.length];
- int id = 0;
+ int size = 0;
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
if (line.isEmpty()) {
continue;
}
- if (parseString(id, attrs, values, line) != null) {
- id++;
+ if (parseString(attrs, values, line) != null) {
+ size++;
}
}
scanner.close();
- return new Dataset(attrs, values, id, regression);
+ return new Dataset(attrs, values, size, regression);
}
/**
@@ -234,18 +232,18 @@ public final class DataLoader {
// used to convert CATEGORICAL and LABEL attributes to Integer
List<String>[] values = new List[attrs.length];
- int id = 0;
+ int size = 0;
for (String aData : data) {
if (aData.isEmpty()) {
continue;
}
- if (parseString(id, attrs, values, aData) != null) {
- id++;
+ if (parseString(attrs, values, aData) != null) {
+ size++;
}
}
- return new Dataset(attrs, values, id, regression);
+ return new Dataset(attrs, values, size, regression);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/data/Instance.java Mon
Oct 24 19:39:40 2011
@@ -24,13 +24,10 @@ import org.apache.mahout.math.Vector;
*/
public class Instance {
- private final int id;
-
/** attributes, except LABEL and IGNORED */
private final Vector attrs;
- public Instance(int id, Vector attrs) {
- this.id = id;
+ public Instance(Vector attrs) {
this.attrs = attrs;
}
@@ -67,17 +64,12 @@ public class Instance {
Instance instance = (Instance) obj;
- return id == instance.id && attrs.equals(instance.attrs);
+ return /*id == instance.id &&*/ attrs.equals(instance.attrs);
}
@Override
public int hashCode() {
- return id + attrs.hashCode();
- }
-
- /** instance unique id */
- public int getId() {
- return id;
+ return /*id +*/ attrs.hashCode();
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/Classifier.java
Mon Oct 24 19:39:40 2011
@@ -241,7 +241,7 @@ public class Classifier {
String line = value.toString();
if (!line.isEmpty()) {
- Instance instance = converter.convert(0, line);
+ Instance instance = converter.convert(line);
int prediction = forest.classify(rng, instance);
key.set(dataset.getLabel(instance));
lvalue.set(Integer.toString(prediction));
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
Mon Oct 24 19:39:40 2011
@@ -64,9 +64,6 @@ public class Step1Mapper extends MapredM
/** will contain all instances if this mapper's split */
private final List<Instance> instances = Lists.newArrayList();
- /** current instance's id */
- private int id;
-
public int getFirstTreeId() {
return firstTreeId;
}
@@ -142,7 +139,7 @@ public class Step1Mapper extends MapredM
@Override
protected void map(LongWritable key, Text value, Context context) throws
IOException, InterruptedException {
- instances.add(converter.convert(id++, value.toString()));
+ instances.add(converter.convert(value.toString()));
}
@Override
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/FrequenciesJob.java
Mon Oct 24 19:39:40 2011
@@ -187,7 +187,7 @@ public class FrequenciesJob {
firstId = new LongWritable(key.get());
}
- Instance instance = converter.convert((int) key.get(), value.toString());
+ Instance instance = converter.convert(value.toString());
context.write(firstId, new IntWritable(dataset.getLabel(instance)));
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/tools/UDistrib.java
Mon Oct 24 19:39:40 2011
@@ -174,7 +174,7 @@ public final class UDistrib {
}
// write the tuple in files[tuple.label]
- Instance instance = converter.convert(id++, line);
+ Instance instance = converter.convert(line);
int label = dataset.getLabel(instance);
files[currents[label]].writeBytes(line);
files[currents[label]].writeChar('\n');
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataConverterTest.java
Mon Oct 24 19:39:40 2011
@@ -42,7 +42,7 @@ public final class DataConverterTest ext
DataConverter converter = new DataConverter(dataset);
for (int index = 0; index < data.size(); index++) {
- assertEquals(data.get(index), converter.convert(index, sData[index]));
+ assertEquals(data.get(index), converter.convert(sData[index]));
}
}
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/df/data/DataLoaderTest.java
Mon Oct 24 19:39:40 2011
@@ -146,9 +146,6 @@ public final class DataLoaderTest extend
double[] vector = data[index];
Instance instance = loaded.get(lind);
- // make sure the id is correct
- assertEquals(lind, instance.getId());
-
int aId = 0;
for (int attr = 0; attr < nbAttributes; attr++) {
if (attrs[attr].isIgnored()) {
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java?rev=1188332&r1=1188331&r2=1188332&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/df/mapreduce/TestForest.java
Mon Oct 24 19:39:40 2011
@@ -252,7 +252,7 @@ public class TestForest extends Configur
continue; // skip empty lines
}
- Instance instance = converter.convert(0, line);
+ Instance instance = converter.convert(line);
int prediction = forest.classify(rng, instance);
if (outputPath != null) {