http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java new file mode 100644 index 0000000..3d314f0 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java @@ -0,0 +1,396 @@ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.io.Serializable; +import java.io.StreamTokenizer; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * @author abifet + */ +public class ArffLoader implements Serializable { + + protected InstanceInformation instanceInformation; + + transient protected StreamTokenizer streamTokenizer; + + protected Reader reader; + + protected int size; + + protected int classAttribute; + + public ArffLoader() { + } + + public ArffLoader(Reader reader, int size, int classAttribute) { + this.reader = reader; + this.size = size; + this.classAttribute = classAttribute; + initStreamTokenizer(reader); + } + + public InstanceInformation getStructure() { + return this.instanceInformation; + } + + public Instance readInstance(Reader reader) { + if (streamTokenizer == null) { + initStreamTokenizer(reader); + } + while (streamTokenizer.ttype == StreamTokenizer.TT_EOL) { + try { + streamTokenizer.nextToken(); + } catch (IOException ex) { + Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); + } + } + if (streamTokenizer.ttype == '{') { + return readInstanceSparse(); + // return readDenseInstanceSparse(); + } else { + return readInstanceDense(); + } + + } + + public Instance readInstanceDense() { + Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1); + // System.out.println(this.instanceInformation.numAttributes()); + int numAttribute = 0; + try { + while (numAttribute == 0 && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { + // For each line + while (streamTokenizer.ttype != StreamTokenizer.TT_EOL + && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { + // For each item + if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { + // System.out.println(streamTokenizer.nval + "Num "); + this.setValue(instance, numAttribute, streamTokenizer.nval, true); + //numAttribute++; + + } else if (streamTokenizer.sval != null && ( + streamTokenizer.ttype == StreamTokenizer.TT_WORD + || streamTokenizer.ttype == 34 || streamTokenizer.ttype == 39)) { + // System.out.println(streamTokenizer.sval + "Str"); + boolean isNumeric = attributes.get(numAttribute).isNumeric(); + double value; + if ("?".equals(streamTokenizer.sval)) { + value = Double.NaN; // Utils.missingValue(); + } else if (isNumeric == true) { + value = Double.valueOf(streamTokenizer.sval).doubleValue(); + } else { + value = this.instanceInformation.attribute(numAttribute).indexOfValue( + streamTokenizer.sval); + } + + this.setValue(instance, numAttribute, value, isNumeric); + //numAttribute++; + } + numAttribute++; + streamTokenizer.nextToken(); + } + streamTokenizer.nextToken(); + // System.out.println("EOL"); + } + + } catch (IOException ex) { + Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); + } + //System.out.println(instance); + return (numAttribute > 0) ? instance : null; + } + + private void setValue(Instance instance, int numAttribute, double value, boolean isNumber) { + double valueAttribute; + if (this.instanceInformation.attribute(numAttribute).isNominal) { + valueAttribute = value; + //this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value)); + // System.out.println(value +"/"+valueAttribute+" "); + + } else { + valueAttribute = value; + // System.out.println(value +"/"+valueAttribute+" "); + } + if (this.instanceInformation.classIndex() == numAttribute) { + instance.setClassValue(valueAttribute); + // System.out.println(value + // +"<"+this.instanceInformation.classIndex()+">"); + } else { + instance.setValue(numAttribute, valueAttribute); + } + } + + private Instance readInstanceSparse() { + // Return a Sparse Instance + Instance instance = new SparseInstance(1.0, null); // (this.instanceInformation.numAttributes() + // + 1); + // System.out.println(this.instanceInformation.numAttributes()); + int numAttribute; + ArrayList<Double> attributeValues = new ArrayList<Double>(); + List<Integer> indexValues = new ArrayList<Integer>(); + try { + // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { + streamTokenizer.nextToken(); // Remove the '{' char + // For each line + while (streamTokenizer.ttype != StreamTokenizer.TT_EOL + && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { + while (streamTokenizer.ttype != '}') { + // For each item + // streamTokenizer.nextToken(); + // while (streamTokenizer.ttype != '}'){ + // System.out.println(streamTokenizer.nval +"-"+ + // streamTokenizer.sval); + // numAttribute = (int) streamTokenizer.nval; + if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { + numAttribute = (int) streamTokenizer.nval; + } else { + numAttribute = Integer.parseInt(streamTokenizer.sval); + } + streamTokenizer.nextToken(); + + if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { + // System.out.print(streamTokenizer.nval + " "); + this.setSparseValue(instance, indexValues, attributeValues, numAttribute, + streamTokenizer.nval, true); + // numAttribute++; + + } else if (streamTokenizer.sval != null && ( + streamTokenizer.ttype == StreamTokenizer.TT_WORD + || streamTokenizer.ttype == 34)) { + // System.out.print(streamTokenizer.sval + "-"); + if (attributes.get(numAttribute).isNumeric()) { + this.setSparseValue(instance, indexValues, attributeValues, numAttribute, + Double.valueOf(streamTokenizer.sval).doubleValue(), true); + } else { + this.setSparseValue(instance, indexValues, attributeValues, numAttribute, + this.instanceInformation + .attribute(numAttribute).indexOfValue(streamTokenizer.sval), + false); + } + } + streamTokenizer.nextToken(); + } + streamTokenizer.nextToken(); // Remove the '}' char + } + streamTokenizer.nextToken(); + // System.out.println("EOL"); + // } + + } catch (IOException ex) { + Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); + } + int[] arrayIndexValues = new int[attributeValues.size()]; + double[] arrayAttributeValues = new double[attributeValues.size()]; + for (int i = 0; i < arrayIndexValues.length; i++) { + arrayIndexValues[i] = indexValues.get(i).intValue(); + arrayAttributeValues[i] = attributeValues.get(i).doubleValue(); + } + instance.addSparseValues(arrayIndexValues, arrayAttributeValues, + this.instanceInformation.numAttributes()); + return instance; + + } + + private void setSparseValue(Instance instance, List<Integer> indexValues, + List<Double> attributeValues, + int numAttribute, double value, boolean isNumber) { + double valueAttribute; + if (isNumber && this.instanceInformation.attribute(numAttribute).isNominal) { + valueAttribute = + this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value)); + } else { + valueAttribute = value; + } + if (this.instanceInformation.classIndex() == numAttribute) { + instance.setClassValue(valueAttribute); + } else { + // instance.setValue(numAttribute, valueAttribute); + indexValues.add(numAttribute); + attributeValues.add(valueAttribute); + } + // System.out.println(numAttribute+":"+valueAttribute+","+this.instanceInformation.classIndex()+","+value); + } + + private Instance readDenseInstanceSparse() { + // Returns a dense instance + Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1); + // System.out.println(this.instanceInformation.numAttributes()); + int numAttribute; + try { + // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { + streamTokenizer.nextToken(); // Remove the '{' char + // For each line + while (streamTokenizer.ttype != StreamTokenizer.TT_EOL + && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { + while (streamTokenizer.ttype != '}') { + // For each item + // streamTokenizer.nextToken(); + // while (streamTokenizer.ttype != '}'){ + // System.out.print(streamTokenizer.nval+":"); + numAttribute = (int) streamTokenizer.nval; + streamTokenizer.nextToken(); + + if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { + // System.out.print(streamTokenizer.nval + " "); + this.setValue(instance, numAttribute, streamTokenizer.nval, true); + // numAttribute++; + + } else if (streamTokenizer.sval != null && ( + streamTokenizer.ttype == StreamTokenizer.TT_WORD + || streamTokenizer.ttype == 34)) { + // System.out.print(streamTokenizer.sval + + // "/"+this.instanceInformation.attribute(numAttribute).indexOfValue(streamTokenizer.sval)+" "); + if (attributes.get(numAttribute).isNumeric()) { + this.setValue(instance, numAttribute, + Double.valueOf(streamTokenizer.sval).doubleValue(), true); + } else { + this.setValue(instance, numAttribute, + this.instanceInformation.attribute(numAttribute) + .indexOfValue(streamTokenizer.sval), false); + // numAttribute++; + } + } + streamTokenizer.nextToken(); + } + streamTokenizer.nextToken(); // Remove the '}' char + } + streamTokenizer.nextToken(); + // System.out.println("EOL"); + // } + + } catch (IOException ex) { + Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); + } + return instance; + } + + protected List<Attribute> attributes; + + private InstanceInformation getHeader() { + + String relation = "file stream"; + // System.out.println("RELATION " + relation); + attributes = new ArrayList<Attribute>(); + try { + streamTokenizer.nextToken(); + while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { + // For each line + // if (streamTokenizer.ttype == '@') { + if (streamTokenizer.ttype == StreamTokenizer.TT_WORD + && streamTokenizer.sval.startsWith("@") == true) { + // streamTokenizer.nextToken(); + String token = streamTokenizer.sval.toUpperCase(); + if (token.startsWith("@RELATION")) { + streamTokenizer.nextToken(); + relation = streamTokenizer.sval; + // System.out.println("RELATION " + relation); + } else if (token.startsWith("@ATTRIBUTE")) { + streamTokenizer.nextToken(); + String name = streamTokenizer.sval; + // System.out.println("* " + name); + if (name == null) { + name = Double.toString(streamTokenizer.nval); + } + streamTokenizer.nextToken(); + String type = streamTokenizer.sval; + // System.out.println("* " + name + ":" + type + " "); + if (streamTokenizer.ttype == '{') { + parseDoubleBrackests(name); + } else if (streamTokenizer.ttype == 10) {//for the buggy non-formal input arff file + streamTokenizer.nextToken(); + if (streamTokenizer.ttype == '{') { + parseDoubleBrackests(name); + } + } else { + // Add attribute + attributes.add(new Attribute(name)); + } + + } else if (token.startsWith("@DATA")) { + // System.out.print("END"); + streamTokenizer.nextToken(); + break; + } + } + streamTokenizer.nextToken(); + } + + } catch (IOException ex) { + Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); + } + return new InstanceInformation(relation, attributes); + } + + private void parseDoubleBrackests(String name) throws IOException { + + streamTokenizer.nextToken(); + List<String> attributeLabels = new ArrayList<String>(); + while (streamTokenizer.ttype != '}') { + + if (streamTokenizer.sval != null) { + attributeLabels.add(streamTokenizer.sval); + // System.out.print(streamTokenizer.sval + ","); + } else { + attributeLabels.add(Double.toString(streamTokenizer.nval)); + // System.out.print(streamTokenizer.nval + ","); + } + + streamTokenizer.nextToken(); + } + // System.out.println(); + attributes.add(new Attribute(name, attributeLabels)); + + } + + private void initStreamTokenizer(Reader reader) { + BufferedReader br = new BufferedReader(reader); + + // Init streamTokenizer + streamTokenizer = new StreamTokenizer(br); + + streamTokenizer.resetSyntax(); + streamTokenizer.whitespaceChars(0, ' '); + streamTokenizer.wordChars(' ' + 1, '\u00FF'); + streamTokenizer.whitespaceChars(',', ','); + streamTokenizer.commentChar('%'); + streamTokenizer.quoteChar('"'); + streamTokenizer.quoteChar('\''); + streamTokenizer.ordinaryChar('{'); + streamTokenizer.ordinaryChar('}'); + streamTokenizer.eolIsSignificant(true); + + this.instanceInformation = this.getHeader(); + if (classAttribute < 0) { + this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1); + // System.out.print(this.instanceInformation.classIndex()); + } else if (classAttribute > 0) { + this.instanceInformation.setClassIndex(classAttribute - 1); + } + } +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java new file mode 100644 index 0000000..b1c9d8b --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java @@ -0,0 +1,211 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * @author abifet + */ +public class Attribute implements Serializable { + + public static final String ARFF_ATTRIBUTE = "@attribute"; + public static final String ARFF_ATTRIBUTE_NUMERIC = "NUMERIC"; + public static final String ARFF_ATTRIBUTE_NOMINAL = "NOMINAL"; + public static final String ARFF_ATTRIBUTE_DATE = "DATE"; + + /** + * + */ + protected boolean isNominal; + /** + * + */ + protected boolean isNumeric; + /** + * + */ + protected boolean isDate; + /** + * + */ + protected String name; + /** + * + */ + protected List<String> attributeValues; + + /** + * + * @return + */ + public List<String> getAttributeValues() { + return attributeValues; + } + + /** + * + */ + protected int index; + + /** + * + * @param string + */ + public Attribute(String string) { + this.name = string; + this.isNumeric = true; + } + + /** + * + * @param attributeName + * @param attributeValues + */ + public Attribute(String attributeName, List<String> attributeValues) { + this.name = attributeName; + this.attributeValues = attributeValues; + this.isNominal = true; + } + + /** + * + */ + public Attribute() { + this(""); + } + + /** + * + * @return + */ + public boolean isNominal() { + return this.isNominal; + } + + /** + * + * @return + */ + public String name() { + return this.name; + } + + /** + * + * @param value + * @return + */ + public String value(int value) { + return attributeValues.get(value); + } + + /** + * + * @return + */ + public boolean isNumeric() { + return isNumeric; + } + + /** + * + * @return + */ + public int numValues() { + if (isNumeric()) { + return 0; + } else { + return attributeValues.size(); + } + } + + /** + * + * @return + */ + public int index() { // RuleClassifier + return this.index; + } + + String formatDate(double value) { + SimpleDateFormat sdf = new SimpleDateFormat(); + return sdf.format(new Date((long) value)); + } + + boolean isDate() { + return isDate; + } + + private Map<String, Integer> valuesStringAttribute; + + /** + * + * @param value + * @return + */ + public final int indexOfValue(String value) { + + if (isNominal() == false) { + return -1; + } + if (this.valuesStringAttribute == null) { + this.valuesStringAttribute = new HashMap<String, Integer>(); + int count = 0; + for (String stringValue : attributeValues) { + this.valuesStringAttribute.put(stringValue, count); + count++; + } + } + Integer val = (Integer) this.valuesStringAttribute.get(value); + if (val == null) { + return -1; + } else { + return val.intValue(); + } + } + + @Override + public String toString() { + StringBuffer text = new StringBuffer(); + + text.append(ARFF_ATTRIBUTE).append(" ").append(Utils.quote(this.name)).append(" "); + + if (isNominal) { + text.append(ARFF_ATTRIBUTE_NOMINAL); + } else if (isNumeric) { + text.append(ARFF_ATTRIBUTE_NUMERIC); + } else if (isDate) { + text.append(ARFF_ATTRIBUTE_DATE); + } + + return text.toString(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java new file mode 100644 index 0000000..1a1f5ce --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java @@ -0,0 +1,74 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * @author abifet + */ +public class DenseInstance extends SingleLabelInstance { + + private static final long serialVersionUID = 280360594027716737L; + + public DenseInstance() { + // necessary for kryo serializer + } + + public DenseInstance(double weight, double[] res) { + super(weight, res); + } + + public DenseInstance(SingleLabelInstance inst) { + super(inst); + } + + public DenseInstance(Instance inst) { + super((SingleLabelInstance) inst); + } + + public DenseInstance(double numberAttributes) { + super((int) numberAttributes); + // super(1, new double[(int) numberAttributes-1]); + // Add missing values + // for (int i = 0; i < numberAttributes-1; i++) { + // //this.setValue(i, Double.NaN); + // } + + } + + @Override + public String toString() { + StringBuffer text = new StringBuffer(); + + for (int i = 0; i < this.instanceData.numAttributes(); i++) { + if (i > 0) { + text.append(","); + } + text.append(this.value(i)); + } + text.append(",").append(this.weight()); + + return text.toString(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java new file mode 100644 index 0000000..ecb2f88 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java @@ -0,0 +1,97 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * + * @author abifet + */ +public class DenseInstanceData implements InstanceData { + + public DenseInstanceData(double[] array) { + this.attributeValues = array; + } + + public DenseInstanceData(int length) { + this.attributeValues = new double[length]; + } + + public DenseInstanceData() { + this(0); + } + + protected double[] attributeValues; + + @Override + public int numAttributes() { + return this.attributeValues.length; + } + + @Override + public double value(int indexAttribute) { + return this.attributeValues[indexAttribute]; + } + + @Override + public boolean isMissing(int indexAttribute) { + return Double.isNaN(this.value(indexAttribute)); + } + + @Override + public int numValues() { + return numAttributes(); + } + + @Override + public int index(int indexAttribute) { + return indexAttribute; + } + + @Override + public double valueSparse(int indexAttribute) { + return value(indexAttribute); + } + + @Override + public boolean isMissingSparse(int indexAttribute) { + return isMissing(indexAttribute); + } + + /* + * @Override public double value(Attribute attribute) { return + * value(attribute.index()); } + */ + + @Override + public double[] toDoubleArray() { + return attributeValues.clone(); + } + + @Override + public void setValue(int attributeIndex, double d) { + this.attributeValues[attributeIndex] = d; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java new file mode 100644 index 0000000..ee99914 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java @@ -0,0 +1,93 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; + +/** + * + * @author abifet + */ + +public interface Instance extends Serializable { + + double weight(); + + void setWeight(double weight); + + // Attributes + Attribute attribute(int instAttIndex); + + void deleteAttributeAt(int i); + + void insertAttributeAt(int i); + + int numAttributes(); + + public void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes); + + // Values + int numValues(); + + String stringValue(int i); + + double value(int instAttIndex); + + double value(Attribute attribute); + + void setValue(int m_numAttributes, double d); + + boolean isMissing(int instAttIndex); + + int index(int i); + + double valueSparse(int i); + + boolean isMissingSparse(int p1); + + double[] toDoubleArray(); + + // Class + Attribute classAttribute(); + + int classIndex(); + + boolean classIsMissing(); + + double classValue(); + + int numClasses(); + + void setClassValue(double d); + + Instance copy(); + + // Dataset + void setDataset(Instances dataset); + + Instances dataset(); + + String toString(); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java new file mode 100644 index 0000000..eca4145 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java @@ -0,0 +1,55 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; + +/** + * + * @author abifet + */ +public interface InstanceData extends Serializable { + + public int numAttributes(); + + public double value(int instAttIndex); + + public boolean isMissing(int instAttIndex); + + public int numValues(); + + public int index(int i); + + public double valueSparse(int i); + + public boolean isMissingSparse(int p1); + + // public double value(Attribute attribute); + + public double[] toDoubleArray(); + + public void setValue(int m_numAttributes, double d); + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java new file mode 100644 index 0000000..639f065 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java @@ -0,0 +1,108 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; +import java.util.List; + +/** + * + * @author abifet + */ +public class InstanceInformation implements Serializable { + + // Should we split Instances as a List of Instances, and InformationInstances + + /** The dataset's name. */ + protected String relationName; + + /** The attribute information. */ + protected List<Attribute> attributes; + + protected int classIndex; + + public InstanceInformation(InstanceInformation chunk) { + this.relationName = chunk.relationName; + this.attributes = chunk.attributes; + this.classIndex = chunk.classIndex; + } + + public InstanceInformation(String st, List<Attribute> v) { + this.relationName = st; + this.attributes = v; + } + + public InstanceInformation() { + this.relationName = null; + this.attributes = null; + } + + // Information Instances + + public void setRelationName(String string) { + this.relationName = string; + } + + public String getRelationName() { + return this.relationName; + } + + public int classIndex() { + return classIndex; + } + + public void setClassIndex(int classIndex) { + this.classIndex = classIndex; + } + + public Attribute classAttribute() { + return this.attribute(this.classIndex()); + } + + public int numAttributes() { + return this.attributes.size(); + } + + public Attribute attribute(int w) { + return this.attributes.get(w); + } + + public int numClasses() { + return this.attributes.get(this.classIndex()).numValues(); + } + + public void deleteAttributeAt(Integer integer) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + public void insertAttributeAt(Attribute attribute, int i) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + public void setAttributes(List<Attribute> v) { + this.attributes = v; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java new file mode 100644 index 0000000..556caaa --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java @@ -0,0 +1,244 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Reader; +import java.io.Serializable; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +/** + * + * @author abifet + */ +public class Instances implements Serializable { + + public static final String ARFF_RELATION = "@relation"; + public static final String ARFF_DATA = "@data"; + + protected InstanceInformation instanceInformation; + /** + * The instances. + */ + protected List<Instance> instances; + + transient protected ArffLoader arff; + + protected int classAttribute; + + public Instances(InstancesHeader modelContext) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + public Instances(Instances chunk) { + this.instanceInformation = chunk.instanceInformation(); + // this.relationName = chunk.relationName; + // this.attributes = chunk.attributes; + this.instances = chunk.instances; + } + + public Instances() { + // this.instanceInformation = chunk.instanceInformation(); + // this.relationName = chunk.relationName; + // this.attributes = chunk.attributes; + // this.instances = chunk.instances; + } + + public Instances(Reader reader, int size, int classAttribute) { + this.classAttribute = classAttribute; + arff = new ArffLoader(reader, 0, classAttribute); + this.instanceInformation = arff.getStructure(); + this.instances = new ArrayList<>(); + } + + public Instances(Instances chunk, int capacity) { + this(chunk); + } + + public Instances(String st, List<Attribute> v, int capacity) { + + this.instanceInformation = new InstanceInformation(st, v); + this.instances = new ArrayList<>(); + } + + public Instances(Instances chunk, int i, int j) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + public Instances(StringReader st, int v) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + // Information Instances + public void setRelationName(String string) { + this.instanceInformation.setRelationName(string); + } + + public String getRelationName() { + return this.instanceInformation.getRelationName(); + } + + public int classIndex() { + return this.instanceInformation.classIndex(); + } + + public void setClassIndex(int classIndex) { + this.instanceInformation.setClassIndex(classIndex); + } + + public Attribute classAttribute() { + return this.instanceInformation.classAttribute(); + } + + public int numAttributes() { + return this.instanceInformation.numAttributes(); + } + + public Attribute attribute(int w) { + return this.instanceInformation.attribute(w); + } + + public int numClasses() { + return this.instanceInformation.numClasses(); + } + + public void deleteAttributeAt(Integer integer) { + this.instanceInformation.deleteAttributeAt(integer); + } + + public void insertAttributeAt(Attribute attribute, int i) { + this.instanceInformation.insertAttributeAt(attribute, i); + } + + // List of Instances + public Instance instance(int num) { + return this.instances.get(num); + } + + public int numInstances() { + return this.instances.size(); + } + + public void add(Instance inst) { + this.instances.add(inst.copy()); + } + + public void randomize(Random random) { + for (int j = numInstances() - 1; j > 0; j--) { + swap(j, random.nextInt(j + 1)); + } + } + + public void stratify(int numFolds) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + public Instances trainCV(int numFolds, int n, Random random) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + public Instances testCV(int numFolds, int n) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + /* + * public Instances dataset() { throw new + * UnsupportedOperationException("Not yet implemented"); } + */ + public double meanOrMode(int j) { + throw new UnsupportedOperationException("Not yet implemented"); // CobWeb + } + + public boolean readInstance(Reader fileReader) { + + // ArffReader arff = new ArffReader(reader, this, m_Lines, 1); + if (arff == null) { + arff = new ArffLoader(fileReader, 0, this.classAttribute); + } + Instance inst = arff.readInstance(fileReader); + if (inst != null) { + inst.setDataset(this); + add(inst); + return true; + } else { + return false; + } + } + + public void delete() { + this.instances = new ArrayList<>(); + } + + public void swap(int i, int j) { + Instance in = instances.get(i); + instances.set(i, instances.get(j)); + instances.set(j, in); + } + + private InstanceInformation instanceInformation() { + return this.instanceInformation; + } + + public Attribute attribute(String name) { + + for (int i = 0; i < numAttributes(); i++) { + if (attribute(i).name().equals(name)) { + return attribute(i); + } + } + return null; + } + + @Override + public String toString() { + StringBuilder text = new StringBuilder(); + + for (int i = 0; i < numInstances(); i++) { + text.append(instance(i).toString()); + if (i < numInstances() - 1) { + text.append('\n'); + } + } + return text.toString(); + } + + // toString() with header + public String toStringArff() { + StringBuilder text = new StringBuilder(); + + text.append(ARFF_RELATION).append(" ") + .append(Utils.quote(getRelationName())).append("\n\n"); + for (int i = 0; i < numAttributes(); i++) { + text.append(attribute(i).toString()).append("\n"); + } + text.append("\n").append(ARFF_DATA).append("\n"); + + text.append(toString()); + return text.toString(); + + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java new file mode 100644 index 0000000..a5d5a74 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java @@ -0,0 +1,123 @@ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Class for storing the header or context of a data stream. It allows to know the number of attributes and classes. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class InstancesHeader extends Instances { + + private static final long serialVersionUID = 1L; + + public InstancesHeader(Instances i) { + super(i, 0); + } + + public InstancesHeader() { + super(); + } + + /* + * @Override public boolean add(Instance i) { throw new + * UnsupportedOperationException(); } + * + * @Override public boolean readInstance(Reader r) throws IOException { throw + * new UnsupportedOperationException(); } + */ + + public static String getClassNameString(InstancesHeader context) { + if (context == null) { + return "[class]"; + } + return "[class:" + context.classAttribute().name() + "]"; + } + + public static String getClassLabelString(InstancesHeader context, + int classLabelIndex) { + if ((context == null) || (classLabelIndex >= context.numClasses())) { + return "<class " + (classLabelIndex + 1) + ">"; + } + return "<class " + (classLabelIndex + 1) + ":" + + context.classAttribute().value(classLabelIndex) + ">"; + } + + // is impervious to class index changes - attIndex is true attribute index + // regardless of class position + public static String getAttributeNameString(InstancesHeader context, + int attIndex) { + if ((context == null) || (attIndex >= context.numAttributes())) { + return "[att " + (attIndex + 1) + "]"; + } + int instAttIndex = attIndex < context.classIndex() ? attIndex + : attIndex + 1; + return "[att " + (attIndex + 1) + ":" + + context.attribute(instAttIndex).name() + "]"; + } + + // is impervious to class index changes - attIndex is true attribute index + // regardless of class position + public static String getNominalValueString(InstancesHeader context, + int attIndex, int valIndex) { + if (context != null) { + int instAttIndex = attIndex < context.classIndex() ? attIndex + : attIndex + 1; + if ((instAttIndex < context.numAttributes()) + && (valIndex < context.attribute(instAttIndex).numValues())) { + return "{val " + (valIndex + 1) + ":" + + context.attribute(instAttIndex).value(valIndex) + "}"; + } + } + return "{val " + (valIndex + 1) + "}"; + } + + // is impervious to class index changes - attIndex is true attribute index + // regardless of class position + public static String getNumericValueString(InstancesHeader context, + int attIndex, double value) { + if (context != null) { + int instAttIndex = attIndex < context.classIndex() ? attIndex + : attIndex + 1; + if (instAttIndex < context.numAttributes()) { + if (context.attribute(instAttIndex).isDate()) { + return context.attribute(instAttIndex).formatDate(value); + } + } + } + return Double.toString(value); + } + + // add autom. + /* + * public int classIndex() { throw new + * UnsupportedOperationException("Not yet implemented"); } + * + * public int numAttributes() { throw new + * UnsupportedOperationException("Not yet implemented"); } + * + * @Override public Attribute attribute(int nPos) { throw new + * UnsupportedOperationException("Not yet implemented"); } + * + * public int numClasses() { return 0; } + */ +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/SingleClassInstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SingleClassInstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SingleClassInstanceData.java new file mode 100644 index 0000000..dfb8474 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/SingleClassInstanceData.java @@ -0,0 +1,86 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * + * @author abifet + */ +public class SingleClassInstanceData implements InstanceData { + + protected double classValue; + + @Override + public int numAttributes() { + return 1; + } + + @Override + public double value(int instAttIndex) { + return classValue; + } + + @Override + public boolean isMissing(int indexAttribute) { + return Double.isNaN(this.value(indexAttribute)); + } + + @Override + public int numValues() { + return 1; + } + + @Override + public int index(int i) { + return 0; + } + + @Override + public double valueSparse(int i) { + return value(i); + } + + @Override + public boolean isMissingSparse(int indexAttribute) { + return Double.isNaN(this.value(indexAttribute)); + } + + /* + * @Override public double value(Attribute attribute) { return + * this.classValue; } + */ + + @Override + public double[] toDoubleArray() { + double[] array = { this.classValue }; + return array; + } + + @Override + public void setValue(int m_numAttributes, double d) { + this.classValue = d; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java new file mode 100644 index 0000000..4018d97 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java @@ -0,0 +1,260 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * + * @author abifet + */ +// public int[] m_AttValues; // for DataPoint + +public class SingleLabelInstance implements Instance { + + protected double weight; + + protected InstanceData instanceData; + + protected InstanceData classData; + + // Fast implementation without using Objects + // protected double[] attributeValues; + // protected double classValue; + + protected InstancesHeader instanceInformation; + + public SingleLabelInstance() { + // necessary for kryo serializer + } + + public SingleLabelInstance(SingleLabelInstance inst) { + this.weight = inst.weight; + this.instanceData = inst.instanceData; // copy + this.classData = inst.classData; // copy + // this.classValue = inst.classValue; + // this.attributeValues = inst.attributeValues; + this.instanceInformation = inst.instanceInformation; + } + + // Dense + public SingleLabelInstance(double weight, double[] res) { + this.weight = weight; + this.instanceData = new DenseInstanceData(res); + // this.attributeValues = res; + this.classData = new SingleClassInstanceData(); + // this.classValue = Double.NaN; + + } + + // Sparse + public SingleLabelInstance(double weight, double[] attributeValues, + int[] indexValues, int numberAttributes) { + this.weight = weight; + this.instanceData = new SparseInstanceData(attributeValues, + indexValues, numberAttributes); // ??? + this.classData = new SingleClassInstanceData(); + // this.classValue = Double.NaN; + // this.instanceInformation = new InstancesHeader(); + + } + + public SingleLabelInstance(double weight, InstanceData instanceData) { + this.weight = weight; + this.instanceData = instanceData; // ??? + // this.classValue = Double.NaN; + this.classData = new SingleClassInstanceData(); + // this.instanceInformation = new InstancesHeader(); + } + + public SingleLabelInstance(int numAttributes) { + this.instanceData = new DenseInstanceData(new double[numAttributes]); + // m_AttValues = new double[numAttributes]; + /* + * for (int i = 0; i < m_AttValues.length; i++) { m_AttValues[i] = + * Utils.missingValue(); } + */ + this.weight = 1; + this.classData = new SingleClassInstanceData(); + this.instanceInformation = new InstancesHeader(); + } + + @Override + public double weight() { + return weight; + } + + @Override + public void setWeight(double weight) { + this.weight = weight; + } + + @Override + public Attribute attribute(int instAttIndex) { + return this.instanceInformation.attribute(instAttIndex); + } + + @Override + public void deleteAttributeAt(int i) { + // throw new UnsupportedOperationException("Not yet implemented"); + } + + @Override + public void insertAttributeAt(int i) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + @Override + public int numAttributes() { + return this.instanceInformation.numAttributes(); + } + + @Override + public double value(int instAttIndex) { + return // attributeValues[instAttIndex]; // + this.instanceData.value(instAttIndex); + } + + @Override + public boolean isMissing(int instAttIndex) { + return // Double.isNaN(value(instAttIndex)); // + this.instanceData.isMissing(instAttIndex); + } + + @Override + public int numValues() { + return // this.attributeValues.length; // + this.instanceData.numValues(); + } + + @Override + public int index(int i) { + return // i; // + this.instanceData.index(i); + } + + @Override + public double valueSparse(int i) { + return this.instanceData.valueSparse(i); + } + + @Override + public boolean isMissingSparse(int p) { + return this.instanceData.isMissingSparse(p); + } + + @Override + public double value(Attribute attribute) { + // throw new UnsupportedOperationException("Not yet implemented"); + // //Predicates.java + return value(attribute.index()); + + } + + @Override + public String stringValue(int i) { + throw new UnsupportedOperationException("Not yet implemented"); + } + + @Override + public double[] toDoubleArray() { + return // this.attributeValues; // + this.instanceData.toDoubleArray(); + } + + @Override + public void setValue(int numAttribute, double d) { + this.instanceData.setValue(numAttribute, d); + // this.attributeValues[numAttribute] = d; + } + + @Override + public double classValue() { + return this.classData.value(0); + // return classValue; + } + + @Override + public int classIndex() { + return instanceInformation.classIndex(); + } + + @Override + public int numClasses() { + return this.instanceInformation.numClasses(); + } + + @Override + public boolean classIsMissing() { + return // Double.isNaN(this.classValue);// + this.classData.isMissing(0); + } + + @Override + public Attribute classAttribute() { + return this.instanceInformation.attribute(0); + } + + @Override + public void setClassValue(double d) { + this.classData.setValue(0, d); + // this.classValue = d; + } + + @Override + public Instance copy() { + SingleLabelInstance inst = new SingleLabelInstance(this); + return inst; + } + + @Override + public Instances dataset() { + return this.instanceInformation; + } + + @Override + public void setDataset(Instances dataset) { + this.instanceInformation = new InstancesHeader(dataset); + } + + public void addSparseValues(int[] indexValues, double[] attributeValues, + int numberAttributes) { + this.instanceData = new SparseInstanceData(attributeValues, + indexValues, numberAttributes); // ??? + } + + @Override + public String toString() { + StringBuffer text = new StringBuffer(); + + for (int i = 0; i < this.numValues(); i++) { + if (i > 0) + text.append(","); + text.append(this.value(i)); + } + text.append(",").append(this.weight()); + + return text.toString(); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java new file mode 100644 index 0000000..54dd417 --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java @@ -0,0 +1,50 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * + * @author abifet + */ +public class SparseInstance extends SingleLabelInstance { + + public SparseInstance(double d, double[] res) { + super(d, res); + } + + public SparseInstance(SingleLabelInstance inst) { + super(inst); + } + + public SparseInstance(double numberAttributes) { + // super(1, new double[(int) numberAttributes-1]); + super(1, null, null, (int) numberAttributes); + } + + public SparseInstance(double weight, double[] attributeValues, int[] indexValues, int numberAttributes) { + super(weight, attributeValues, indexValues, numberAttributes); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java new file mode 100644 index 0000000..9c14b8f --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java @@ -0,0 +1,171 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * + * @author abifet + */ +public class SparseInstanceData implements InstanceData { + + public SparseInstanceData(double[] attributeValues, int[] indexValues, int numberAttributes) { + this.attributeValues = attributeValues; + this.indexValues = indexValues; + this.numberAttributes = numberAttributes; + } + + public SparseInstanceData(int length) { + this.attributeValues = new double[length]; + this.indexValues = new int[length]; + } + + protected double[] attributeValues; + + public double[] getAttributeValues() { + return attributeValues; + } + + public void setAttributeValues(double[] attributeValues) { + this.attributeValues = attributeValues; + } + + public int[] getIndexValues() { + return indexValues; + } + + public void setIndexValues(int[] indexValues) { + this.indexValues = indexValues; + } + + public int getNumberAttributes() { + return numberAttributes; + } + + public void setNumberAttributes(int numberAttributes) { + this.numberAttributes = numberAttributes; + } + + protected int[] indexValues; + protected int numberAttributes; + + @Override + public int numAttributes() { + return this.numberAttributes; + } + + @Override + public double value(int indexAttribute) { + int location = locateIndex(indexAttribute); + // return location == -1 ? 0 : this.attributeValues[location]; + // int index = locateIndex(attIndex); + if ((location >= 0) && (indexValues[location] == indexAttribute)) { + return attributeValues[location]; + } else { + return 0.0; + } + } + + @Override + public boolean isMissing(int indexAttribute) { + return Double.isNaN(this.value(indexAttribute)); + } + + @Override + public int numValues() { + return this.attributeValues.length; + } + + @Override + public int index(int indexAttribute) { + return this.indexValues[indexAttribute]; + } + + @Override + public double valueSparse(int indexAttribute) { + return this.attributeValues[indexAttribute]; + } + + @Override + public boolean isMissingSparse(int indexAttribute) { + return Double.isNaN(this.valueSparse(indexAttribute)); + } + + /* + * @Override public double value(Attribute attribute) { return + * value(attribute.index()); } + */ + + @Override + public double[] toDoubleArray() { + double[] array = new double[numAttributes()]; + for (int i = 0; i < numValues(); i++) { + array[index(i)] = valueSparse(i); + } + return array; + } + + @Override + public void setValue(int attributeIndex, double d) { + int index = locateIndex(attributeIndex); + if (index(index) == attributeIndex) { + this.attributeValues[index] = d; + } else { + // We need to add the value + } + } + + /** + * Locates the greatest index that is not greater than the given index. + * + * @return the internal index of the attribute index. Returns -1 if no index with this property could be found + */ + public int locateIndex(int index) { + + int min = 0; + int max = this.indexValues.length - 1; + + if (max == -1) { + return -1; + } + + // Binary search + while ((this.indexValues[min] <= index) && (this.indexValues[max] >= index)) { + int current = (max + min) / 2; + if (this.indexValues[current] > index) { + max = current - 1; + } else if (this.indexValues[current] < index) { + min = current + 1; + } else { + return current; + } + } + if (this.indexValues[max] < index) { + return max; + } else { + return min - 1; + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java new file mode 100644 index 0000000..73990bb --- /dev/null +++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java @@ -0,0 +1,91 @@ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +public class Utils { + public static int maxIndex(double[] doubles) { + + double maximum = 0; + int maxIndex = 0; + + for (int i = 0; i < doubles.length; i++) { + if ((i == 0) || (doubles[i] > maximum)) { + maxIndex = i; + maximum = doubles[i]; + } + } + + return maxIndex; + } + + public static String quote(String string) { + boolean quote = false; + + // backquote the following characters + if ((string.indexOf('\n') != -1) || (string.indexOf('\r') != -1) || (string.indexOf('\'') != -1) + || (string.indexOf('"') != -1) + || (string.indexOf('\\') != -1) || (string.indexOf('\t') != -1) || (string.indexOf('%') != -1) + || (string.indexOf('\u001E') != -1)) { + string = backQuoteChars(string); + quote = true; + } + + // Enclose the string in 's if the string contains a recently added + // backquote or contains one of the following characters. + if ((quote == true) || (string.indexOf('{') != -1) || (string.indexOf('}') != -1) || (string.indexOf(',') != -1) + || (string.equals("?")) + || (string.indexOf(' ') != -1) || (string.equals(""))) { + string = ("'".concat(string)).concat("'"); + } + + return string; + } + + public static String backQuoteChars(String string) { + + int index; + StringBuffer newStringBuffer; + + // replace each of the following characters with the backquoted version + char charsFind[] = { '\\', '\'', '\t', '\n', '\r', '"', '%', '\u001E' }; + String charsReplace[] = { "\\\\", "\\'", "\\t", "\\n", "\\r", "\\\"", "\\%", "\\u001E" }; + for (int i = 0; i < charsFind.length; i++) { + if (string.indexOf(charsFind[i]) != -1) { + newStringBuffer = new StringBuffer(); + while ((index = string.indexOf(charsFind[i])) != -1) { + if (index > 0) { + newStringBuffer.append(string.substring(0, index)); + } + newStringBuffer.append(charsReplace[i]); + if ((index + 1) < string.length()) { + string = string.substring(index + 1); + } else { + string = ""; + } + } + newStringBuffer.append(string); + string = newStringBuffer.toString(); + } + } + + return string; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/test/java/com/yahoo/labs/samoa/instances/ArffLoaderTest.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/test/java/com/yahoo/labs/samoa/instances/ArffLoaderTest.java b/samoa-instances/src/test/java/com/yahoo/labs/samoa/instances/ArffLoaderTest.java deleted file mode 100644 index 62fd7b7..0000000 --- a/samoa-instances/src/test/java/com/yahoo/labs/samoa/instances/ArffLoaderTest.java +++ /dev/null @@ -1,108 +0,0 @@ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import org.junit.Before; -import org.junit.Test; - -import java.io.StringReader; - -import static org.junit.Assert.assertEquals; - -public class ArffLoaderTest { - - private ArffLoader loader; - - private StringReader reader; - - @Before - public void setUp() { - String inputString = "@relation test.txt\n" - + "\n" - + "@attribute Dur numeric\n" - + "@attribute Proto {udp,tcp,icmp,arp,ipx/spx,ipv6-icmp,pim,esp,igmp,rtcp,rtp,ipv6,udt}\n" - + "@attribute Dir {' <->',' <?>',' ->',' ?>',' who',' <-',' <?'}\n" - + "@attribute State {CON,PA_PA,PA_FRA, ...}\n" - + "@attribute sTos numeric\n" - + "@attribute dTos numeric\n" - + "@attribute TotPkts numeric\n" - + "@attribute TotBytes numeric\n" - + "@attribute SrcBytes numeric\n" - + "@attribute class {Background,Normal,Botnet}\n" - + "\n" - + "@data\n" - + "\n" - + "1065.731934,udp,' <->',...,0,0,2,252,145,Background\n" - + "1471.787109,udp,' <->',CON,0,0,2,252,145,Background"; - reader = new StringReader(inputString); - int size = 0; - int classAttribute = 10; - loader = new ArffLoader(reader, size, classAttribute); - - } - - @Test - public void testGetHeader() { - InstanceInformation header = loader.getStructure(); - assertEquals(10, header.numAttributes()); - assertEquals(9, header.classIndex()); - assertEquals(true, header.attribute(0).isNumeric()); - assertEquals(false, header.attribute(1).isNumeric()); - assertEquals(false, header.attribute(2).isNumeric()); - assertEquals(false, header.attribute(3).isNumeric()); - assertEquals(true, header.attribute(4).isNumeric()); - assertEquals(true, header.attribute(5).isNumeric()); - assertEquals(true, header.attribute(6).isNumeric()); - assertEquals(true, header.attribute(7).isNumeric()); - assertEquals(true, header.attribute(8).isNumeric()); - assertEquals(false, header.attribute(9).isNumeric()); - - assertEquals(7, header.attribute(2).numValues()); - assertEquals(" <->", header.attribute(2).value(0)); - assertEquals(" <?>", header.attribute(2).value(1)); - assertEquals(" ->", header.attribute(2).value(2)); - assertEquals(" ?>", header.attribute(2).value(3)); - assertEquals(" who", header.attribute(2).value(4)); - assertEquals(" <-", header.attribute(2).value(5)); - assertEquals(" <?", header.attribute(2).value(6)); - - assertEquals(3, header.attribute(9).numValues()); - assertEquals("Background", header.attribute(9).value(0)); - assertEquals("Normal", header.attribute(9).value(1)); - assertEquals("Botnet", header.attribute(9).value(2)); - - } - - @Test - public void testReadInstance() { - Instance instance = loader.readInstance(reader); - assertEquals(1065.731934, instance.value(0), 0); - assertEquals(0, instance.value(1), 0); - assertEquals(0, instance.value(2), 0); - assertEquals(3, instance.value(3), 0); - assertEquals(0, instance.value(4), 0); - assertEquals(0, instance.value(5), 0); - assertEquals(2, instance.value(6), 0); - assertEquals(252, instance.value(7), 0); - assertEquals(145, instance.value(8), 0); - assertEquals(0, instance.value(9), 0); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/test/java/org/apache/samoa/instances/ArffLoaderTest.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/test/java/org/apache/samoa/instances/ArffLoaderTest.java b/samoa-instances/src/test/java/org/apache/samoa/instances/ArffLoaderTest.java new file mode 100644 index 0000000..b9dbd25 --- /dev/null +++ b/samoa-instances/src/test/java/org/apache/samoa/instances/ArffLoaderTest.java @@ -0,0 +1,111 @@ +package org.apache.samoa.instances; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.ArffLoader; +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.InstanceInformation; +import org.junit.Before; +import org.junit.Test; + +import java.io.StringReader; + +import static org.junit.Assert.assertEquals; + +public class ArffLoaderTest { + + private ArffLoader loader; + + private StringReader reader; + + @Before + public void setUp() { + String inputString = "@relation test.txt\n" + + "\n" + + "@attribute Dur numeric\n" + + "@attribute Proto {udp,tcp,icmp,arp,ipx/spx,ipv6-icmp,pim,esp,igmp,rtcp,rtp,ipv6,udt}\n" + + "@attribute Dir {' <->',' <?>',' ->',' ?>',' who',' <-',' <?'}\n" + + "@attribute State {CON,PA_PA,PA_FRA, ...}\n" + + "@attribute sTos numeric\n" + + "@attribute dTos numeric\n" + + "@attribute TotPkts numeric\n" + + "@attribute TotBytes numeric\n" + + "@attribute SrcBytes numeric\n" + + "@attribute class {Background,Normal,Botnet}\n" + + "\n" + + "@data\n" + + "\n" + + "1065.731934,udp,' <->',...,0,0,2,252,145,Background\n" + + "1471.787109,udp,' <->',CON,0,0,2,252,145,Background"; + reader = new StringReader(inputString); + int size = 0; + int classAttribute = 10; + loader = new ArffLoader(reader, size, classAttribute); + + } + + @Test + public void testGetHeader() { + InstanceInformation header = loader.getStructure(); + assertEquals(10, header.numAttributes()); + assertEquals(9, header.classIndex()); + assertEquals(true, header.attribute(0).isNumeric()); + assertEquals(false, header.attribute(1).isNumeric()); + assertEquals(false, header.attribute(2).isNumeric()); + assertEquals(false, header.attribute(3).isNumeric()); + assertEquals(true, header.attribute(4).isNumeric()); + assertEquals(true, header.attribute(5).isNumeric()); + assertEquals(true, header.attribute(6).isNumeric()); + assertEquals(true, header.attribute(7).isNumeric()); + assertEquals(true, header.attribute(8).isNumeric()); + assertEquals(false, header.attribute(9).isNumeric()); + + assertEquals(7, header.attribute(2).numValues()); + assertEquals(" <->", header.attribute(2).value(0)); + assertEquals(" <?>", header.attribute(2).value(1)); + assertEquals(" ->", header.attribute(2).value(2)); + assertEquals(" ?>", header.attribute(2).value(3)); + assertEquals(" who", header.attribute(2).value(4)); + assertEquals(" <-", header.attribute(2).value(5)); + assertEquals(" <?", header.attribute(2).value(6)); + + assertEquals(3, header.attribute(9).numValues()); + assertEquals("Background", header.attribute(9).value(0)); + assertEquals("Normal", header.attribute(9).value(1)); + assertEquals("Botnet", header.attribute(9).value(2)); + + } + + @Test + public void testReadInstance() { + Instance instance = loader.readInstance(reader); + assertEquals(1065.731934, instance.value(0), 0); + assertEquals(0, instance.value(1), 0); + assertEquals(0, instance.value(2), 0); + assertEquals(3, instance.value(3), 0); + assertEquals(0, instance.value(4), 0); + assertEquals(0, instance.value(5), 0); + assertEquals(2, instance.value(6), 0); + assertEquals(252, instance.value(7), 0); + assertEquals(145, instance.value(8), 0); + assertEquals(0, instance.value(9), 0); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-local/pom.xml ---------------------------------------------------------------------- diff --git a/samoa-local/pom.xml b/samoa-local/pom.xml index e309625..edfb410 100644 --- a/samoa-local/pom.xml +++ b/samoa-local/pom.xml @@ -30,19 +30,19 @@ <description>Simple local engine for SAMOA</description> <artifactId>samoa-local</artifactId> <parent> - <groupId>com.yahoo.labs.samoa</groupId> + <groupId>org.apache.samoa</groupId> <artifactId>samoa</artifactId> <version>0.3.0-SNAPSHOT</version> </parent> <dependencies> <dependency> - <groupId>com.yahoo.labs.samoa</groupId> + <groupId>org.apache.samoa</groupId> <artifactId>samoa-api</artifactId> <version>${project.version}</version> </dependency> <dependency> - <groupId>com.yahoo.labs.samoa</groupId> + <groupId>org.apache.samoa</groupId> <artifactId>samoa-test</artifactId> <type>test-jar</type> <classifier>test-jar-with-dependencies</classifier> http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-local/src/main/java/com/yahoo/labs/samoa/LocalDoTask.java ---------------------------------------------------------------------- diff --git a/samoa-local/src/main/java/com/yahoo/labs/samoa/LocalDoTask.java b/samoa-local/src/main/java/com/yahoo/labs/samoa/LocalDoTask.java deleted file mode 100644 index 78f4ef5..0000000 --- a/samoa-local/src/main/java/com/yahoo/labs/samoa/LocalDoTask.java +++ /dev/null @@ -1,90 +0,0 @@ -package com.yahoo.labs.samoa; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.github.javacliparser.ClassOption; -import com.github.javacliparser.FlagOption; -import com.github.javacliparser.IntOption; -import com.github.javacliparser.Option; -import com.yahoo.labs.samoa.tasks.Task; -import com.yahoo.labs.samoa.topology.impl.SimpleComponentFactory; -import com.yahoo.labs.samoa.topology.impl.SimpleEngine; - -/** - * The Class DoTask. - */ -public class LocalDoTask { - - // TODO: clean up this class for helping ML Developer in SAMOA - // TODO: clean up code from storm-impl - - // It seems that the 3 extra options are not used. - // Probably should remove them - private static final String SUPPRESS_STATUS_OUT_MSG = "Suppress the task status output. Normally it is sent to stderr."; - private static final String SUPPRESS_RESULT_OUT_MSG = "Suppress the task result output. Normally it is sent to stdout."; - private static final String STATUS_UPDATE_FREQ_MSG = "Wait time in milliseconds between status updates."; - private static final Logger logger = LoggerFactory.getLogger(LocalDoTask.class); - - /** - * The main method. - * - * @param args - * the arguments - */ - public static void main(String[] args) { - - // ArrayList<String> tmpArgs = new ArrayList<String>(Arrays.asList(args)); - - // args = tmpArgs.toArray(new String[0]); - - FlagOption suppressStatusOutOpt = new FlagOption("suppressStatusOut", 'S', SUPPRESS_STATUS_OUT_MSG); - - FlagOption suppressResultOutOpt = new FlagOption("suppressResultOut", 'R', SUPPRESS_RESULT_OUT_MSG); - - IntOption statusUpdateFreqOpt = new IntOption("statusUpdateFrequency", 'F', STATUS_UPDATE_FREQ_MSG, 1000, 0, - Integer.MAX_VALUE); - - Option[] extraOptions = new Option[] { suppressStatusOutOpt, suppressResultOutOpt, statusUpdateFreqOpt }; - - StringBuilder cliString = new StringBuilder(); - for (String arg : args) { - cliString.append(" ").append(arg); - } - logger.debug("Command line string = {}", cliString.toString()); - System.out.println("Command line string = " + cliString.toString()); - - Task task; - try { - task = ClassOption.cliStringToObject(cliString.toString(), Task.class, extraOptions); - logger.info("Successfully instantiating {}", task.getClass().getCanonicalName()); - } catch (Exception e) { - logger.error("Fail to initialize the task", e); - System.out.println("Fail to initialize the task" + e); - return; - } - task.setFactory(new SimpleComponentFactory()); - task.init(); - SimpleEngine.submitTopology(task.getTopology()); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleComponentFactory.java ---------------------------------------------------------------------- diff --git a/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleComponentFactory.java b/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleComponentFactory.java deleted file mode 100644 index 754b735..0000000 --- a/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleComponentFactory.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.yahoo.labs.samoa.topology.impl; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.core.EntranceProcessor; -import com.yahoo.labs.samoa.core.Processor; -import com.yahoo.labs.samoa.topology.ComponentFactory; -import com.yahoo.labs.samoa.topology.EntranceProcessingItem; -import com.yahoo.labs.samoa.topology.IProcessingItem; -import com.yahoo.labs.samoa.topology.ProcessingItem; -import com.yahoo.labs.samoa.topology.Stream; -import com.yahoo.labs.samoa.topology.Topology; - -public class SimpleComponentFactory implements ComponentFactory { - - public ProcessingItem createPi(Processor processor, int paralellism) { - return new SimpleProcessingItem(processor, paralellism); - } - - public ProcessingItem createPi(Processor processor) { - return this.createPi(processor, 1); - } - - public EntranceProcessingItem createEntrancePi(EntranceProcessor processor) { - return new SimpleEntranceProcessingItem(processor); - } - - public Stream createStream(IProcessingItem sourcePi) { - return new SimpleStream(sourcePi); - } - - public Topology createTopology(String topoName) { - return new SimpleTopology(topoName); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEngine.java ---------------------------------------------------------------------- diff --git a/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEngine.java b/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEngine.java deleted file mode 100644 index b737695..0000000 --- a/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEngine.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.topology.impl; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.topology.Topology; - -public class SimpleEngine { - - public static void submitTopology(Topology topology) { - SimpleTopology simpleTopology = (SimpleTopology) topology; - simpleTopology.run(); - // runs until completion - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEntranceProcessingItem.java ---------------------------------------------------------------------- diff --git a/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEntranceProcessingItem.java b/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEntranceProcessingItem.java deleted file mode 100644 index aaa2ddb..0000000 --- a/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEntranceProcessingItem.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.yahoo.labs.samoa.topology.impl; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.core.EntranceProcessor; -import com.yahoo.labs.samoa.topology.LocalEntranceProcessingItem; - -class SimpleEntranceProcessingItem extends LocalEntranceProcessingItem { - public SimpleEntranceProcessingItem(EntranceProcessor processor) { - super(processor); - } - - // The default waiting time when there is no available events is 100ms - // Override waitForNewEvents() to change it -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleProcessingItem.java ---------------------------------------------------------------------- diff --git a/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleProcessingItem.java b/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleProcessingItem.java deleted file mode 100644 index 3dd9f0b..0000000 --- a/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleProcessingItem.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.topology.impl; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import com.yahoo.labs.samoa.core.ContentEvent; -import com.yahoo.labs.samoa.core.Processor; -import com.yahoo.labs.samoa.topology.AbstractProcessingItem; -import com.yahoo.labs.samoa.topology.IProcessingItem; -import com.yahoo.labs.samoa.topology.ProcessingItem; -import com.yahoo.labs.samoa.topology.Stream; -import com.yahoo.labs.samoa.utils.PartitioningScheme; -import com.yahoo.labs.samoa.utils.StreamDestination; - -/** - * - * @author abifet - */ -class SimpleProcessingItem extends AbstractProcessingItem { - private IProcessingItem[] arrayProcessingItem; - - SimpleProcessingItem(Processor processor) { - super(processor); - } - - SimpleProcessingItem(Processor processor, int parallelism) { - super(processor); - this.setParallelism(parallelism); - } - - public IProcessingItem getProcessingItem(int i) { - return arrayProcessingItem[i]; - } - - @Override - protected ProcessingItem addInputStream(Stream inputStream, PartitioningScheme scheme) { - StreamDestination destination = new StreamDestination(this, this.getParallelism(), scheme); - ((SimpleStream) inputStream).addDestination(destination); - return this; - } - - public SimpleProcessingItem copy() { - Processor processor = this.getProcessor(); - return new SimpleProcessingItem(processor.newProcessor(processor)); - } - - public void processEvent(ContentEvent event, int counter) { - - int parallelism = this.getParallelism(); - // System.out.println("Process event "+event+" (isLast="+event.isLastEvent()+") with counter="+counter+" while parallelism="+parallelism); - if (this.arrayProcessingItem == null && parallelism > 0) { - // Init processing elements, the first time they are needed - this.arrayProcessingItem = new IProcessingItem[parallelism]; - for (int j = 0; j < parallelism; j++) { - arrayProcessingItem[j] = this.copy(); - arrayProcessingItem[j].getProcessor().onCreate(j); - } - } - if (this.arrayProcessingItem != null) { - IProcessingItem pi = this.getProcessingItem(counter); - Processor p = pi.getProcessor(); - // System.out.println("PI="+pi+", p="+p); - this.getProcessingItem(counter).getProcessor().process(event); - } - } -}
