http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-flink/src/main/java/org/apache/samoa/flink/topology/impl/FlinkTopology.java ---------------------------------------------------------------------- diff --git a/samoa-flink/src/main/java/org/apache/samoa/flink/topology/impl/FlinkTopology.java b/samoa-flink/src/main/java/org/apache/samoa/flink/topology/impl/FlinkTopology.java new file mode 100644 index 0000000..65c52c6 --- /dev/null +++ b/samoa-flink/src/main/java/org/apache/samoa/flink/topology/impl/FlinkTopology.java @@ -0,0 +1,186 @@ +package org.apache.samoa.flink.topology.impl; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; + +import org.apache.flink.api.java.tuple.Tuple3; +import org.apache.flink.streaming.api.datastream.IterativeDataStream; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.samoa.flink.helpers.CircleDetection; +import org.apache.samoa.flink.helpers.Utils; +import org.apache.samoa.topology.AbstractTopology; +import org.apache.samoa.topology.EntranceProcessingItem; +import org.apache.samoa.utils.PartitioningScheme; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +/** + * A SAMOA topology on Apache Flink + * + * A Samoa-Flink Streaming Topology is DAG of ProcessingItems encapsulated within custom operators. + * Streams are tagged and filtered in each operator's output so they can be routed to the right + * operator respectively. Building a Flink topology from a Samoa task involves invoking all these + * stream transformations and finally, marking and initiating loops in the graph. We have to do that + * since Flink only allows explicit loops in the topology started with 'iterate()' and closed with + * 'closeWith()'. Thus, when we build a flink topology we have to do it incrementally from the + * sources, mark loops and initialize them with explicit iterations. + * + */ +public class FlinkTopology extends AbstractTopology { + + private static final Logger logger = LoggerFactory.getLogger(FlinkTopology.class); + public static StreamExecutionEnvironment env; + public List<List<FlinkProcessingItem>> topologyLoops = new ArrayList<>(); + public List<Integer> backEdges = new ArrayList<Integer>(); + + public FlinkTopology(String name, StreamExecutionEnvironment env) { + super(name); + this.env = env; + } + + public StreamExecutionEnvironment getEnvironment() { + return env; + } + + public void build() { + markCircles(); + for (EntranceProcessingItem src : getEntranceProcessingItems()) { + ((FlinkEntranceProcessingItem) src).initialise(); + } + initComponents(ImmutableList.copyOf(Iterables.filter(getProcessingItems(), FlinkProcessingItem.class))); + } + + private void initComponents(ImmutableList<FlinkProcessingItem> flinkComponents) { + if (flinkComponents.isEmpty()) return; + + for (FlinkProcessingItem comp : flinkComponents) { + if (comp.canBeInitialised() && !comp.isInitialised() && !comp.isPartOfCircle()) { + comp.initialise(); + comp.initialiseStreams(); + + }//if component is part of one or more circle + else if (comp.isPartOfCircle() && !comp.isInitialised()) { + for (Integer circle : comp.getCircleIds()) { + //check if circle can be initialized + if (checkCircleReady(circle)) { + logger.debug("Circle: " + circle + " can be initialised"); + initialiseCircle(circle); + } else { + logger.debug("Circle cannot be initialised"); + } + } + } + + } + initComponents(ImmutableList.copyOf(Iterables.filter(flinkComponents, new Predicate<FlinkProcessingItem>() { + @Override + public boolean apply(FlinkProcessingItem flinkComponent) { + return !flinkComponent.isInitialised(); + } + }))); + } + + private void markCircles(){ + List<FlinkProcessingItem> pis = Lists.newArrayList(Iterables.filter(getProcessingItems(), FlinkProcessingItem.class)); + List<Integer>[] graph = new List[pis.size()]; + FlinkProcessingItem[] processingItems = new FlinkProcessingItem[pis.size()]; + + + for (int i=0;i<pis.size();i++) { + graph[i] = new ArrayList<Integer>(); + } + //construct the graph of the topology for the Processing Items (No entrance pi is included) + for (FlinkProcessingItem pi: pis) { + processingItems[pi.getComponentId()] = pi; + for (Tuple3<FlinkStream, PartitioningScheme, Integer> is : pi.getInputStreams()) { + if (is.f2 != -1) graph[is.f2].add(pi.getComponentId()); + } + } + for (int g=0;g<graph.length;g++) + logger.debug(graph[g].toString()); + + CircleDetection detCircles = new CircleDetection(); + List<List<Integer>> circles = detCircles.getCircles(graph); + + //update PIs, regarding being part of a circle. + for (List<Integer> c : circles){ + List<FlinkProcessingItem> circle = new ArrayList<>(); + for (Integer it : c){ + circle.add(processingItems[it]); + processingItems[it].addPItoLoop(topologyLoops.size()); + } + topologyLoops.add(circle); + backEdges.add(circle.get(0).getComponentId()); + } + logger.debug("Circles detected in the topology: " + circles); + } + + + private boolean checkCircleReady(int circleId) { + + List<Integer> circleIds = new ArrayList<>(); + + for (FlinkProcessingItem pi : topologyLoops.get(circleId)) { + circleIds.add(pi.getComponentId()); + } + //check that all incoming to the circle streams are initialised + for (FlinkProcessingItem procItem : topologyLoops.get(circleId)) { + for (Tuple3<FlinkStream, PartitioningScheme, Integer> inputStream : procItem.getInputStreams()) { + //if a inputStream is not initialized AND source of inputStream is not in the circle or a tail of other circle + if ((!inputStream.f0.isInitialised()) && (!circleIds.contains(inputStream.f2)) && (!backEdges.contains(inputStream.f2))) + return false; + } + } + return true; + } + + private void initialiseCircle(int circleId) { + //get the head and tail of circle + FlinkProcessingItem tail = topologyLoops.get(circleId).get(0); + FlinkProcessingItem head = topologyLoops.get(circleId).get(topologyLoops.get(circleId).size() - 1); + + //initialise source stream of the iteration, so as to use it for the iteration starting point + if (!head.isInitialised()) { + head.setOnIteration(true); + head.initialise(); + head.initialiseStreams(); + } + + //initialise all nodes after head + for (int node = topologyLoops.get(circleId).size() - 2; node >= 0; node--) { + topologyLoops.get(circleId).get(node).initialise(); + topologyLoops.get(circleId).get(node).initialiseStreams(); + } + + ((IterativeDataStream) head.getInStream()).closeWith(head.getInputStreamBySourceID(tail.getComponentId()).getOutStream()); + } + + +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-flink/src/main/java/org/apache/samoa/flink/topology/impl/SamoaType.java ---------------------------------------------------------------------- diff --git a/samoa-flink/src/main/java/org/apache/samoa/flink/topology/impl/SamoaType.java b/samoa-flink/src/main/java/org/apache/samoa/flink/topology/impl/SamoaType.java new file mode 100644 index 0000000..c0f223f --- /dev/null +++ b/samoa-flink/src/main/java/org/apache/samoa/flink/topology/impl/SamoaType.java @@ -0,0 +1,42 @@ +package org.apache.samoa.flink.topology.impl; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + + + + +import org.apache.flink.api.java.tuple.Tuple3; +import org.apache.samoa.core.ContentEvent; + +public class SamoaType extends Tuple3<String, ContentEvent, String> { + public SamoaType() { + super(); + } + + private SamoaType(String key, ContentEvent event, String streamId) { + super(key, event, streamId); + } + + public static SamoaType of(ContentEvent event, String streamId) { + String key = event.getKey() == null ? "none" : event.getKey(); + return new SamoaType(key, event, streamId); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/pom.xml ---------------------------------------------------------------------- diff --git a/samoa-instances/pom.xml b/samoa-instances/pom.xml index b5372af..5415db9 100644 --- a/samoa-instances/pom.xml +++ b/samoa-instances/pom.xml @@ -31,7 +31,7 @@ <artifactId>samoa-instances</artifactId> <parent> - <groupId>com.yahoo.labs.samoa</groupId> + <groupId>org.apache.samoa</groupId> <artifactId>samoa</artifactId> <version>0.3.0-SNAPSHOT</version> </parent> http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/ArffLoader.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/ArffLoader.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/ArffLoader.java deleted file mode 100644 index dc22bb8..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/ArffLoader.java +++ /dev/null @@ -1,396 +0,0 @@ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.Reader; -import java.io.Serializable; -import java.io.StreamTokenizer; -import java.util.ArrayList; -import java.util.List; -import java.util.logging.Level; -import java.util.logging.Logger; - -/** - * @author abifet - */ -public class ArffLoader implements Serializable { - - protected InstanceInformation instanceInformation; - - transient protected StreamTokenizer streamTokenizer; - - protected Reader reader; - - protected int size; - - protected int classAttribute; - - public ArffLoader() { - } - - public ArffLoader(Reader reader, int size, int classAttribute) { - this.reader = reader; - this.size = size; - this.classAttribute = classAttribute; - initStreamTokenizer(reader); - } - - public InstanceInformation getStructure() { - return this.instanceInformation; - } - - public Instance readInstance(Reader reader) { - if (streamTokenizer == null) { - initStreamTokenizer(reader); - } - while (streamTokenizer.ttype == StreamTokenizer.TT_EOL) { - try { - streamTokenizer.nextToken(); - } catch (IOException ex) { - Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); - } - } - if (streamTokenizer.ttype == '{') { - return readInstanceSparse(); - // return readDenseInstanceSparse(); - } else { - return readInstanceDense(); - } - - } - - public Instance readInstanceDense() { - Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1); - // System.out.println(this.instanceInformation.numAttributes()); - int numAttribute = 0; - try { - while (numAttribute == 0 && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - // For each line - while (streamTokenizer.ttype != StreamTokenizer.TT_EOL - && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - // For each item - if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { - // System.out.println(streamTokenizer.nval + "Num "); - this.setValue(instance, numAttribute, streamTokenizer.nval, true); - //numAttribute++; - - } else if (streamTokenizer.sval != null && ( - streamTokenizer.ttype == StreamTokenizer.TT_WORD - || streamTokenizer.ttype == 34 || streamTokenizer.ttype == 39)) { - // System.out.println(streamTokenizer.sval + "Str"); - boolean isNumeric = attributes.get(numAttribute).isNumeric(); - double value; - if ("?".equals(streamTokenizer.sval)) { - value = Double.NaN; // Utils.missingValue(); - } else if (isNumeric == true) { - value = Double.valueOf(streamTokenizer.sval).doubleValue(); - } else { - value = this.instanceInformation.attribute(numAttribute).indexOfValue( - streamTokenizer.sval); - } - - this.setValue(instance, numAttribute, value, isNumeric); - //numAttribute++; - } - numAttribute++; - streamTokenizer.nextToken(); - } - streamTokenizer.nextToken(); - // System.out.println("EOL"); - } - - } catch (IOException ex) { - Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); - } - //System.out.println(instance); - return (numAttribute > 0) ? instance : null; - } - - private void setValue(Instance instance, int numAttribute, double value, boolean isNumber) { - double valueAttribute; - if (this.instanceInformation.attribute(numAttribute).isNominal) { - valueAttribute = value; - //this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value)); - // System.out.println(value +"/"+valueAttribute+" "); - - } else { - valueAttribute = value; - // System.out.println(value +"/"+valueAttribute+" "); - } - if (this.instanceInformation.classIndex() == numAttribute) { - instance.setClassValue(valueAttribute); - // System.out.println(value - // +"<"+this.instanceInformation.classIndex()+">"); - } else { - instance.setValue(numAttribute, valueAttribute); - } - } - - private Instance readInstanceSparse() { - // Return a Sparse Instance - Instance instance = new SparseInstance(1.0, null); // (this.instanceInformation.numAttributes() - // + 1); - // System.out.println(this.instanceInformation.numAttributes()); - int numAttribute; - ArrayList<Double> attributeValues = new ArrayList<Double>(); - List<Integer> indexValues = new ArrayList<Integer>(); - try { - // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - streamTokenizer.nextToken(); // Remove the '{' char - // For each line - while (streamTokenizer.ttype != StreamTokenizer.TT_EOL - && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - while (streamTokenizer.ttype != '}') { - // For each item - // streamTokenizer.nextToken(); - // while (streamTokenizer.ttype != '}'){ - // System.out.println(streamTokenizer.nval +"-"+ - // streamTokenizer.sval); - // numAttribute = (int) streamTokenizer.nval; - if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { - numAttribute = (int) streamTokenizer.nval; - } else { - numAttribute = Integer.parseInt(streamTokenizer.sval); - } - streamTokenizer.nextToken(); - - if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { - // System.out.print(streamTokenizer.nval + " "); - this.setSparseValue(instance, indexValues, attributeValues, numAttribute, - streamTokenizer.nval, true); - // numAttribute++; - - } else if (streamTokenizer.sval != null && ( - streamTokenizer.ttype == StreamTokenizer.TT_WORD - || streamTokenizer.ttype == 34)) { - // System.out.print(streamTokenizer.sval + "-"); - if (attributes.get(numAttribute).isNumeric()) { - this.setSparseValue(instance, indexValues, attributeValues, numAttribute, - Double.valueOf(streamTokenizer.sval).doubleValue(), true); - } else { - this.setSparseValue(instance, indexValues, attributeValues, numAttribute, - this.instanceInformation - .attribute(numAttribute).indexOfValue(streamTokenizer.sval), - false); - } - } - streamTokenizer.nextToken(); - } - streamTokenizer.nextToken(); // Remove the '}' char - } - streamTokenizer.nextToken(); - // System.out.println("EOL"); - // } - - } catch (IOException ex) { - Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); - } - int[] arrayIndexValues = new int[attributeValues.size()]; - double[] arrayAttributeValues = new double[attributeValues.size()]; - for (int i = 0; i < arrayIndexValues.length; i++) { - arrayIndexValues[i] = indexValues.get(i).intValue(); - arrayAttributeValues[i] = attributeValues.get(i).doubleValue(); - } - instance.addSparseValues(arrayIndexValues, arrayAttributeValues, - this.instanceInformation.numAttributes()); - return instance; - - } - - private void setSparseValue(Instance instance, List<Integer> indexValues, - List<Double> attributeValues, - int numAttribute, double value, boolean isNumber) { - double valueAttribute; - if (isNumber && this.instanceInformation.attribute(numAttribute).isNominal) { - valueAttribute = - this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value)); - } else { - valueAttribute = value; - } - if (this.instanceInformation.classIndex() == numAttribute) { - instance.setClassValue(valueAttribute); - } else { - // instance.setValue(numAttribute, valueAttribute); - indexValues.add(numAttribute); - attributeValues.add(valueAttribute); - } - // System.out.println(numAttribute+":"+valueAttribute+","+this.instanceInformation.classIndex()+","+value); - } - - private Instance readDenseInstanceSparse() { - // Returns a dense instance - Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1); - // System.out.println(this.instanceInformation.numAttributes()); - int numAttribute; - try { - // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - streamTokenizer.nextToken(); // Remove the '{' char - // For each line - while (streamTokenizer.ttype != StreamTokenizer.TT_EOL - && streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - while (streamTokenizer.ttype != '}') { - // For each item - // streamTokenizer.nextToken(); - // while (streamTokenizer.ttype != '}'){ - // System.out.print(streamTokenizer.nval+":"); - numAttribute = (int) streamTokenizer.nval; - streamTokenizer.nextToken(); - - if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) { - // System.out.print(streamTokenizer.nval + " "); - this.setValue(instance, numAttribute, streamTokenizer.nval, true); - // numAttribute++; - - } else if (streamTokenizer.sval != null && ( - streamTokenizer.ttype == StreamTokenizer.TT_WORD - || streamTokenizer.ttype == 34)) { - // System.out.print(streamTokenizer.sval + - // "/"+this.instanceInformation.attribute(numAttribute).indexOfValue(streamTokenizer.sval)+" "); - if (attributes.get(numAttribute).isNumeric()) { - this.setValue(instance, numAttribute, - Double.valueOf(streamTokenizer.sval).doubleValue(), true); - } else { - this.setValue(instance, numAttribute, - this.instanceInformation.attribute(numAttribute) - .indexOfValue(streamTokenizer.sval), false); - // numAttribute++; - } - } - streamTokenizer.nextToken(); - } - streamTokenizer.nextToken(); // Remove the '}' char - } - streamTokenizer.nextToken(); - // System.out.println("EOL"); - // } - - } catch (IOException ex) { - Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); - } - return instance; - } - - protected List<Attribute> attributes; - - private InstanceInformation getHeader() { - - String relation = "file stream"; - // System.out.println("RELATION " + relation); - attributes = new ArrayList<Attribute>(); - try { - streamTokenizer.nextToken(); - while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) { - // For each line - // if (streamTokenizer.ttype == '@') { - if (streamTokenizer.ttype == StreamTokenizer.TT_WORD - && streamTokenizer.sval.startsWith("@") == true) { - // streamTokenizer.nextToken(); - String token = streamTokenizer.sval.toUpperCase(); - if (token.startsWith("@RELATION")) { - streamTokenizer.nextToken(); - relation = streamTokenizer.sval; - // System.out.println("RELATION " + relation); - } else if (token.startsWith("@ATTRIBUTE")) { - streamTokenizer.nextToken(); - String name = streamTokenizer.sval; - // System.out.println("* " + name); - if (name == null) { - name = Double.toString(streamTokenizer.nval); - } - streamTokenizer.nextToken(); - String type = streamTokenizer.sval; - // System.out.println("* " + name + ":" + type + " "); - if (streamTokenizer.ttype == '{') { - parseDoubleBrackests(name); - } else if (streamTokenizer.ttype == 10) {//for the buggy non-formal input arff file - streamTokenizer.nextToken(); - if (streamTokenizer.ttype == '{') { - parseDoubleBrackests(name); - } - } else { - // Add attribute - attributes.add(new Attribute(name)); - } - - } else if (token.startsWith("@DATA")) { - // System.out.print("END"); - streamTokenizer.nextToken(); - break; - } - } - streamTokenizer.nextToken(); - } - - } catch (IOException ex) { - Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex); - } - return new InstanceInformation(relation, attributes); - } - - private void parseDoubleBrackests(String name) throws IOException { - - streamTokenizer.nextToken(); - List<String> attributeLabels = new ArrayList<String>(); - while (streamTokenizer.ttype != '}') { - - if (streamTokenizer.sval != null) { - attributeLabels.add(streamTokenizer.sval); - // System.out.print(streamTokenizer.sval + ","); - } else { - attributeLabels.add(Double.toString(streamTokenizer.nval)); - // System.out.print(streamTokenizer.nval + ","); - } - - streamTokenizer.nextToken(); - } - // System.out.println(); - attributes.add(new Attribute(name, attributeLabels)); - - } - - private void initStreamTokenizer(Reader reader) { - BufferedReader br = new BufferedReader(reader); - - // Init streamTokenizer - streamTokenizer = new StreamTokenizer(br); - - streamTokenizer.resetSyntax(); - streamTokenizer.whitespaceChars(0, ' '); - streamTokenizer.wordChars(' ' + 1, '\u00FF'); - streamTokenizer.whitespaceChars(',', ','); - streamTokenizer.commentChar('%'); - streamTokenizer.quoteChar('"'); - streamTokenizer.quoteChar('\''); - streamTokenizer.ordinaryChar('{'); - streamTokenizer.ordinaryChar('}'); - streamTokenizer.eolIsSignificant(true); - - this.instanceInformation = this.getHeader(); - if (classAttribute < 0) { - this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1); - // System.out.print(this.instanceInformation.classIndex()); - } else if (classAttribute > 0) { - this.instanceInformation.setClassIndex(classAttribute - 1); - } - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Attribute.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Attribute.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Attribute.java deleted file mode 100644 index 6ebd678..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Attribute.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Serializable; -import java.text.SimpleDateFormat; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * @author abifet - */ -public class Attribute implements Serializable { - - public static final String ARFF_ATTRIBUTE = "@attribute"; - public static final String ARFF_ATTRIBUTE_NUMERIC = "NUMERIC"; - public static final String ARFF_ATTRIBUTE_NOMINAL = "NOMINAL"; - public static final String ARFF_ATTRIBUTE_DATE = "DATE"; - - /** - * - */ - protected boolean isNominal; - /** - * - */ - protected boolean isNumeric; - /** - * - */ - protected boolean isDate; - /** - * - */ - protected String name; - /** - * - */ - protected List<String> attributeValues; - - /** - * - * @return - */ - public List<String> getAttributeValues() { - return attributeValues; - } - - /** - * - */ - protected int index; - - /** - * - * @param string - */ - public Attribute(String string) { - this.name = string; - this.isNumeric = true; - } - - /** - * - * @param attributeName - * @param attributeValues - */ - public Attribute(String attributeName, List<String> attributeValues) { - this.name = attributeName; - this.attributeValues = attributeValues; - this.isNominal = true; - } - - /** - * - */ - public Attribute() { - this(""); - } - - /** - * - * @return - */ - public boolean isNominal() { - return this.isNominal; - } - - /** - * - * @return - */ - public String name() { - return this.name; - } - - /** - * - * @param value - * @return - */ - public String value(int value) { - return attributeValues.get(value); - } - - /** - * - * @return - */ - public boolean isNumeric() { - return isNumeric; - } - - /** - * - * @return - */ - public int numValues() { - if (isNumeric()) { - return 0; - } else { - return attributeValues.size(); - } - } - - /** - * - * @return - */ - public int index() { // RuleClassifier - return this.index; - } - - String formatDate(double value) { - SimpleDateFormat sdf = new SimpleDateFormat(); - return sdf.format(new Date((long) value)); - } - - boolean isDate() { - return isDate; - } - - private Map<String, Integer> valuesStringAttribute; - - /** - * - * @param value - * @return - */ - public final int indexOfValue(String value) { - - if (isNominal() == false) { - return -1; - } - if (this.valuesStringAttribute == null) { - this.valuesStringAttribute = new HashMap<String, Integer>(); - int count = 0; - for (String stringValue : attributeValues) { - this.valuesStringAttribute.put(stringValue, count); - count++; - } - } - Integer val = (Integer) this.valuesStringAttribute.get(value); - if (val == null) { - return -1; - } else { - return val.intValue(); - } - } - - @Override - public String toString() { - StringBuffer text = new StringBuffer(); - - text.append(ARFF_ATTRIBUTE).append(" ").append(Utils.quote(this.name)).append(" "); - - if (isNominal) { - text.append(ARFF_ATTRIBUTE_NOMINAL); - } else if (isNumeric) { - text.append(ARFF_ATTRIBUTE_NUMERIC); - } else if (isDate) { - text.append(ARFF_ATTRIBUTE_DATE); - } - - return text.toString(); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstance.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstance.java deleted file mode 100644 index 57d1bfd..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstance.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * @author abifet - */ -public class DenseInstance extends SingleLabelInstance { - - private static final long serialVersionUID = 280360594027716737L; - - public DenseInstance() { - // necessary for kryo serializer - } - - public DenseInstance(double weight, double[] res) { - super(weight, res); - } - - public DenseInstance(SingleLabelInstance inst) { - super(inst); - } - - public DenseInstance(Instance inst) { - super((SingleLabelInstance) inst); - } - - public DenseInstance(double numberAttributes) { - super((int) numberAttributes); - // super(1, new double[(int) numberAttributes-1]); - // Add missing values - // for (int i = 0; i < numberAttributes-1; i++) { - // //this.setValue(i, Double.NaN); - // } - - } - - @Override - public String toString() { - StringBuffer text = new StringBuffer(); - - for (int i = 0; i < this.instanceData.numAttributes(); i++) { - if (i > 0) { - text.append(","); - } - text.append(this.value(i)); - } - text.append(",").append(this.weight()); - - return text.toString(); - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstanceData.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstanceData.java deleted file mode 100644 index e5519e6..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstanceData.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * - * @author abifet - */ -public class DenseInstanceData implements InstanceData { - - public DenseInstanceData(double[] array) { - this.attributeValues = array; - } - - public DenseInstanceData(int length) { - this.attributeValues = new double[length]; - } - - public DenseInstanceData() { - this(0); - } - - protected double[] attributeValues; - - @Override - public int numAttributes() { - return this.attributeValues.length; - } - - @Override - public double value(int indexAttribute) { - return this.attributeValues[indexAttribute]; - } - - @Override - public boolean isMissing(int indexAttribute) { - return Double.isNaN(this.value(indexAttribute)); - } - - @Override - public int numValues() { - return numAttributes(); - } - - @Override - public int index(int indexAttribute) { - return indexAttribute; - } - - @Override - public double valueSparse(int indexAttribute) { - return value(indexAttribute); - } - - @Override - public boolean isMissingSparse(int indexAttribute) { - return isMissing(indexAttribute); - } - - /* - * @Override public double value(Attribute attribute) { return - * value(attribute.index()); } - */ - - @Override - public double[] toDoubleArray() { - return attributeValues.clone(); - } - - @Override - public void setValue(int attributeIndex, double d) { - this.attributeValues[attributeIndex] = d; - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instance.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instance.java deleted file mode 100644 index f16443f..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instance.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Serializable; - -/** - * - * @author abifet - */ - -public interface Instance extends Serializable { - - double weight(); - - void setWeight(double weight); - - // Attributes - Attribute attribute(int instAttIndex); - - void deleteAttributeAt(int i); - - void insertAttributeAt(int i); - - int numAttributes(); - - public void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes); - - // Values - int numValues(); - - String stringValue(int i); - - double value(int instAttIndex); - - double value(Attribute attribute); - - void setValue(int m_numAttributes, double d); - - boolean isMissing(int instAttIndex); - - int index(int i); - - double valueSparse(int i); - - boolean isMissingSparse(int p1); - - double[] toDoubleArray(); - - // Class - Attribute classAttribute(); - - int classIndex(); - - boolean classIsMissing(); - - double classValue(); - - int numClasses(); - - void setClassValue(double d); - - Instance copy(); - - // Dataset - void setDataset(Instances dataset); - - Instances dataset(); - - String toString(); -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceData.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceData.java deleted file mode 100644 index 7b947a7..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceData.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Serializable; - -/** - * - * @author abifet - */ -public interface InstanceData extends Serializable { - - public int numAttributes(); - - public double value(int instAttIndex); - - public boolean isMissing(int instAttIndex); - - public int numValues(); - - public int index(int i); - - public double valueSparse(int i); - - public boolean isMissingSparse(int p1); - - // public double value(Attribute attribute); - - public double[] toDoubleArray(); - - public void setValue(int m_numAttributes, double d); - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceInformation.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceInformation.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceInformation.java deleted file mode 100644 index a4660df..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceInformation.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Serializable; -import java.util.List; - -/** - * - * @author abifet - */ -public class InstanceInformation implements Serializable { - - // Should we split Instances as a List of Instances, and InformationInstances - - /** The dataset's name. */ - protected String relationName; - - /** The attribute information. */ - protected List<Attribute> attributes; - - protected int classIndex; - - public InstanceInformation(InstanceInformation chunk) { - this.relationName = chunk.relationName; - this.attributes = chunk.attributes; - this.classIndex = chunk.classIndex; - } - - public InstanceInformation(String st, List<Attribute> v) { - this.relationName = st; - this.attributes = v; - } - - public InstanceInformation() { - this.relationName = null; - this.attributes = null; - } - - // Information Instances - - public void setRelationName(String string) { - this.relationName = string; - } - - public String getRelationName() { - return this.relationName; - } - - public int classIndex() { - return classIndex; - } - - public void setClassIndex(int classIndex) { - this.classIndex = classIndex; - } - - public Attribute classAttribute() { - return this.attribute(this.classIndex()); - } - - public int numAttributes() { - return this.attributes.size(); - } - - public Attribute attribute(int w) { - return this.attributes.get(w); - } - - public int numClasses() { - return this.attributes.get(this.classIndex()).numValues(); - } - - public void deleteAttributeAt(Integer integer) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - public void insertAttributeAt(Attribute attribute, int i) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - public void setAttributes(List<Attribute> v) { - this.attributes = v; - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instances.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instances.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instances.java deleted file mode 100644 index 306f4c1..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instances.java +++ /dev/null @@ -1,244 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -import java.io.Reader; -import java.io.Serializable; -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - -/** - * - * @author abifet - */ -public class Instances implements Serializable { - - public static final String ARFF_RELATION = "@relation"; - public static final String ARFF_DATA = "@data"; - - protected InstanceInformation instanceInformation; - /** - * The instances. - */ - protected List<Instance> instances; - - transient protected ArffLoader arff; - - protected int classAttribute; - - public Instances(InstancesHeader modelContext) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - public Instances(Instances chunk) { - this.instanceInformation = chunk.instanceInformation(); - // this.relationName = chunk.relationName; - // this.attributes = chunk.attributes; - this.instances = chunk.instances; - } - - public Instances() { - // this.instanceInformation = chunk.instanceInformation(); - // this.relationName = chunk.relationName; - // this.attributes = chunk.attributes; - // this.instances = chunk.instances; - } - - public Instances(Reader reader, int size, int classAttribute) { - this.classAttribute = classAttribute; - arff = new ArffLoader(reader, 0, classAttribute); - this.instanceInformation = arff.getStructure(); - this.instances = new ArrayList<>(); - } - - public Instances(Instances chunk, int capacity) { - this(chunk); - } - - public Instances(String st, List<Attribute> v, int capacity) { - - this.instanceInformation = new InstanceInformation(st, v); - this.instances = new ArrayList<>(); - } - - public Instances(Instances chunk, int i, int j) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - public Instances(StringReader st, int v) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - // Information Instances - public void setRelationName(String string) { - this.instanceInformation.setRelationName(string); - } - - public String getRelationName() { - return this.instanceInformation.getRelationName(); - } - - public int classIndex() { - return this.instanceInformation.classIndex(); - } - - public void setClassIndex(int classIndex) { - this.instanceInformation.setClassIndex(classIndex); - } - - public Attribute classAttribute() { - return this.instanceInformation.classAttribute(); - } - - public int numAttributes() { - return this.instanceInformation.numAttributes(); - } - - public Attribute attribute(int w) { - return this.instanceInformation.attribute(w); - } - - public int numClasses() { - return this.instanceInformation.numClasses(); - } - - public void deleteAttributeAt(Integer integer) { - this.instanceInformation.deleteAttributeAt(integer); - } - - public void insertAttributeAt(Attribute attribute, int i) { - this.instanceInformation.insertAttributeAt(attribute, i); - } - - // List of Instances - public Instance instance(int num) { - return this.instances.get(num); - } - - public int numInstances() { - return this.instances.size(); - } - - public void add(Instance inst) { - this.instances.add(inst.copy()); - } - - public void randomize(Random random) { - for (int j = numInstances() - 1; j > 0; j--) { - swap(j, random.nextInt(j + 1)); - } - } - - public void stratify(int numFolds) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - public Instances trainCV(int numFolds, int n, Random random) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - public Instances testCV(int numFolds, int n) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - /* - * public Instances dataset() { throw new - * UnsupportedOperationException("Not yet implemented"); } - */ - public double meanOrMode(int j) { - throw new UnsupportedOperationException("Not yet implemented"); // CobWeb - } - - public boolean readInstance(Reader fileReader) { - - // ArffReader arff = new ArffReader(reader, this, m_Lines, 1); - if (arff == null) { - arff = new ArffLoader(fileReader, 0, this.classAttribute); - } - Instance inst = arff.readInstance(fileReader); - if (inst != null) { - inst.setDataset(this); - add(inst); - return true; - } else { - return false; - } - } - - public void delete() { - this.instances = new ArrayList<>(); - } - - public void swap(int i, int j) { - Instance in = instances.get(i); - instances.set(i, instances.get(j)); - instances.set(j, in); - } - - private InstanceInformation instanceInformation() { - return this.instanceInformation; - } - - public Attribute attribute(String name) { - - for (int i = 0; i < numAttributes(); i++) { - if (attribute(i).name().equals(name)) { - return attribute(i); - } - } - return null; - } - - @Override - public String toString() { - StringBuilder text = new StringBuilder(); - - for (int i = 0; i < numInstances(); i++) { - text.append(instance(i).toString()); - if (i < numInstances() - 1) { - text.append('\n'); - } - } - return text.toString(); - } - - // toString() with header - public String toStringArff() { - StringBuilder text = new StringBuilder(); - - text.append(ARFF_RELATION).append(" ") - .append(Utils.quote(getRelationName())).append("\n\n"); - for (int i = 0; i < numAttributes(); i++) { - text.append(attribute(i).toString()).append("\n"); - } - text.append("\n").append(ARFF_DATA).append("\n"); - - text.append(toString()); - return text.toString(); - - } -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstancesHeader.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstancesHeader.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstancesHeader.java deleted file mode 100644 index cde079d..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstancesHeader.java +++ /dev/null @@ -1,123 +0,0 @@ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * Class for storing the header or context of a data stream. It allows to know the number of attributes and classes. - * - * @author Richard Kirkby ([email protected]) - * @version $Revision: 7 $ - */ -public class InstancesHeader extends Instances { - - private static final long serialVersionUID = 1L; - - public InstancesHeader(Instances i) { - super(i, 0); - } - - public InstancesHeader() { - super(); - } - - /* - * @Override public boolean add(Instance i) { throw new - * UnsupportedOperationException(); } - * - * @Override public boolean readInstance(Reader r) throws IOException { throw - * new UnsupportedOperationException(); } - */ - - public static String getClassNameString(InstancesHeader context) { - if (context == null) { - return "[class]"; - } - return "[class:" + context.classAttribute().name() + "]"; - } - - public static String getClassLabelString(InstancesHeader context, - int classLabelIndex) { - if ((context == null) || (classLabelIndex >= context.numClasses())) { - return "<class " + (classLabelIndex + 1) + ">"; - } - return "<class " + (classLabelIndex + 1) + ":" - + context.classAttribute().value(classLabelIndex) + ">"; - } - - // is impervious to class index changes - attIndex is true attribute index - // regardless of class position - public static String getAttributeNameString(InstancesHeader context, - int attIndex) { - if ((context == null) || (attIndex >= context.numAttributes())) { - return "[att " + (attIndex + 1) + "]"; - } - int instAttIndex = attIndex < context.classIndex() ? attIndex - : attIndex + 1; - return "[att " + (attIndex + 1) + ":" - + context.attribute(instAttIndex).name() + "]"; - } - - // is impervious to class index changes - attIndex is true attribute index - // regardless of class position - public static String getNominalValueString(InstancesHeader context, - int attIndex, int valIndex) { - if (context != null) { - int instAttIndex = attIndex < context.classIndex() ? attIndex - : attIndex + 1; - if ((instAttIndex < context.numAttributes()) - && (valIndex < context.attribute(instAttIndex).numValues())) { - return "{val " + (valIndex + 1) + ":" - + context.attribute(instAttIndex).value(valIndex) + "}"; - } - } - return "{val " + (valIndex + 1) + "}"; - } - - // is impervious to class index changes - attIndex is true attribute index - // regardless of class position - public static String getNumericValueString(InstancesHeader context, - int attIndex, double value) { - if (context != null) { - int instAttIndex = attIndex < context.classIndex() ? attIndex - : attIndex + 1; - if (instAttIndex < context.numAttributes()) { - if (context.attribute(instAttIndex).isDate()) { - return context.attribute(instAttIndex).formatDate(value); - } - } - } - return Double.toString(value); - } - - // add autom. - /* - * public int classIndex() { throw new - * UnsupportedOperationException("Not yet implemented"); } - * - * public int numAttributes() { throw new - * UnsupportedOperationException("Not yet implemented"); } - * - * @Override public Attribute attribute(int nPos) { throw new - * UnsupportedOperationException("Not yet implemented"); } - * - * public int numClasses() { return 0; } - */ -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleClassInstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleClassInstanceData.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleClassInstanceData.java deleted file mode 100644 index 317000e..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleClassInstanceData.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * - * @author abifet - */ -public class SingleClassInstanceData implements InstanceData { - - protected double classValue; - - @Override - public int numAttributes() { - return 1; - } - - @Override - public double value(int instAttIndex) { - return classValue; - } - - @Override - public boolean isMissing(int indexAttribute) { - return Double.isNaN(this.value(indexAttribute)); - } - - @Override - public int numValues() { - return 1; - } - - @Override - public int index(int i) { - return 0; - } - - @Override - public double valueSparse(int i) { - return value(i); - } - - @Override - public boolean isMissingSparse(int indexAttribute) { - return Double.isNaN(this.value(indexAttribute)); - } - - /* - * @Override public double value(Attribute attribute) { return - * this.classValue; } - */ - - @Override - public double[] toDoubleArray() { - double[] array = { this.classValue }; - return array; - } - - @Override - public void setValue(int m_numAttributes, double d) { - this.classValue = d; - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleLabelInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleLabelInstance.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleLabelInstance.java deleted file mode 100644 index 96b9afe..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleLabelInstance.java +++ /dev/null @@ -1,260 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * - * @author abifet - */ -// public int[] m_AttValues; // for DataPoint - -public class SingleLabelInstance implements Instance { - - protected double weight; - - protected InstanceData instanceData; - - protected InstanceData classData; - - // Fast implementation without using Objects - // protected double[] attributeValues; - // protected double classValue; - - protected InstancesHeader instanceInformation; - - public SingleLabelInstance() { - // necessary for kryo serializer - } - - public SingleLabelInstance(SingleLabelInstance inst) { - this.weight = inst.weight; - this.instanceData = inst.instanceData; // copy - this.classData = inst.classData; // copy - // this.classValue = inst.classValue; - // this.attributeValues = inst.attributeValues; - this.instanceInformation = inst.instanceInformation; - } - - // Dense - public SingleLabelInstance(double weight, double[] res) { - this.weight = weight; - this.instanceData = new DenseInstanceData(res); - // this.attributeValues = res; - this.classData = new SingleClassInstanceData(); - // this.classValue = Double.NaN; - - } - - // Sparse - public SingleLabelInstance(double weight, double[] attributeValues, - int[] indexValues, int numberAttributes) { - this.weight = weight; - this.instanceData = new SparseInstanceData(attributeValues, - indexValues, numberAttributes); // ??? - this.classData = new SingleClassInstanceData(); - // this.classValue = Double.NaN; - // this.instanceInformation = new InstancesHeader(); - - } - - public SingleLabelInstance(double weight, InstanceData instanceData) { - this.weight = weight; - this.instanceData = instanceData; // ??? - // this.classValue = Double.NaN; - this.classData = new SingleClassInstanceData(); - // this.instanceInformation = new InstancesHeader(); - } - - public SingleLabelInstance(int numAttributes) { - this.instanceData = new DenseInstanceData(new double[numAttributes]); - // m_AttValues = new double[numAttributes]; - /* - * for (int i = 0; i < m_AttValues.length; i++) { m_AttValues[i] = - * Utils.missingValue(); } - */ - this.weight = 1; - this.classData = new SingleClassInstanceData(); - this.instanceInformation = new InstancesHeader(); - } - - @Override - public double weight() { - return weight; - } - - @Override - public void setWeight(double weight) { - this.weight = weight; - } - - @Override - public Attribute attribute(int instAttIndex) { - return this.instanceInformation.attribute(instAttIndex); - } - - @Override - public void deleteAttributeAt(int i) { - // throw new UnsupportedOperationException("Not yet implemented"); - } - - @Override - public void insertAttributeAt(int i) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - @Override - public int numAttributes() { - return this.instanceInformation.numAttributes(); - } - - @Override - public double value(int instAttIndex) { - return // attributeValues[instAttIndex]; // - this.instanceData.value(instAttIndex); - } - - @Override - public boolean isMissing(int instAttIndex) { - return // Double.isNaN(value(instAttIndex)); // - this.instanceData.isMissing(instAttIndex); - } - - @Override - public int numValues() { - return // this.attributeValues.length; // - this.instanceData.numValues(); - } - - @Override - public int index(int i) { - return // i; // - this.instanceData.index(i); - } - - @Override - public double valueSparse(int i) { - return this.instanceData.valueSparse(i); - } - - @Override - public boolean isMissingSparse(int p) { - return this.instanceData.isMissingSparse(p); - } - - @Override - public double value(Attribute attribute) { - // throw new UnsupportedOperationException("Not yet implemented"); - // //Predicates.java - return value(attribute.index()); - - } - - @Override - public String stringValue(int i) { - throw new UnsupportedOperationException("Not yet implemented"); - } - - @Override - public double[] toDoubleArray() { - return // this.attributeValues; // - this.instanceData.toDoubleArray(); - } - - @Override - public void setValue(int numAttribute, double d) { - this.instanceData.setValue(numAttribute, d); - // this.attributeValues[numAttribute] = d; - } - - @Override - public double classValue() { - return this.classData.value(0); - // return classValue; - } - - @Override - public int classIndex() { - return instanceInformation.classIndex(); - } - - @Override - public int numClasses() { - return this.instanceInformation.numClasses(); - } - - @Override - public boolean classIsMissing() { - return // Double.isNaN(this.classValue);// - this.classData.isMissing(0); - } - - @Override - public Attribute classAttribute() { - return this.instanceInformation.attribute(0); - } - - @Override - public void setClassValue(double d) { - this.classData.setValue(0, d); - // this.classValue = d; - } - - @Override - public Instance copy() { - SingleLabelInstance inst = new SingleLabelInstance(this); - return inst; - } - - @Override - public Instances dataset() { - return this.instanceInformation; - } - - @Override - public void setDataset(Instances dataset) { - this.instanceInformation = new InstancesHeader(dataset); - } - - public void addSparseValues(int[] indexValues, double[] attributeValues, - int numberAttributes) { - this.instanceData = new SparseInstanceData(attributeValues, - indexValues, numberAttributes); // ??? - } - - @Override - public String toString() { - StringBuffer text = new StringBuffer(); - - for (int i = 0; i < this.numValues(); i++) { - if (i > 0) - text.append(","); - text.append(this.value(i)); - } - text.append(",").append(this.weight()); - - return text.toString(); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstance.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstance.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstance.java deleted file mode 100644 index d4d876c..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstance.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * - * @author abifet - */ -public class SparseInstance extends SingleLabelInstance { - - public SparseInstance(double d, double[] res) { - super(d, res); - } - - public SparseInstance(SingleLabelInstance inst) { - super(inst); - } - - public SparseInstance(double numberAttributes) { - // super(1, new double[(int) numberAttributes-1]); - super(1, null, null, (int) numberAttributes); - } - - public SparseInstance(double weight, double[] attributeValues, int[] indexValues, int numberAttributes) { - super(weight, attributeValues, indexValues, numberAttributes); - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstanceData.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstanceData.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstanceData.java deleted file mode 100644 index 658af34..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstanceData.java +++ /dev/null @@ -1,171 +0,0 @@ -/* - * To change this template, choose Tools | Templates - * and open the template in the editor. - */ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -/** - * - * @author abifet - */ -public class SparseInstanceData implements InstanceData { - - public SparseInstanceData(double[] attributeValues, int[] indexValues, int numberAttributes) { - this.attributeValues = attributeValues; - this.indexValues = indexValues; - this.numberAttributes = numberAttributes; - } - - public SparseInstanceData(int length) { - this.attributeValues = new double[length]; - this.indexValues = new int[length]; - } - - protected double[] attributeValues; - - public double[] getAttributeValues() { - return attributeValues; - } - - public void setAttributeValues(double[] attributeValues) { - this.attributeValues = attributeValues; - } - - public int[] getIndexValues() { - return indexValues; - } - - public void setIndexValues(int[] indexValues) { - this.indexValues = indexValues; - } - - public int getNumberAttributes() { - return numberAttributes; - } - - public void setNumberAttributes(int numberAttributes) { - this.numberAttributes = numberAttributes; - } - - protected int[] indexValues; - protected int numberAttributes; - - @Override - public int numAttributes() { - return this.numberAttributes; - } - - @Override - public double value(int indexAttribute) { - int location = locateIndex(indexAttribute); - // return location == -1 ? 0 : this.attributeValues[location]; - // int index = locateIndex(attIndex); - if ((location >= 0) && (indexValues[location] == indexAttribute)) { - return attributeValues[location]; - } else { - return 0.0; - } - } - - @Override - public boolean isMissing(int indexAttribute) { - return Double.isNaN(this.value(indexAttribute)); - } - - @Override - public int numValues() { - return this.attributeValues.length; - } - - @Override - public int index(int indexAttribute) { - return this.indexValues[indexAttribute]; - } - - @Override - public double valueSparse(int indexAttribute) { - return this.attributeValues[indexAttribute]; - } - - @Override - public boolean isMissingSparse(int indexAttribute) { - return Double.isNaN(this.valueSparse(indexAttribute)); - } - - /* - * @Override public double value(Attribute attribute) { return - * value(attribute.index()); } - */ - - @Override - public double[] toDoubleArray() { - double[] array = new double[numAttributes()]; - for (int i = 0; i < numValues(); i++) { - array[index(i)] = valueSparse(i); - } - return array; - } - - @Override - public void setValue(int attributeIndex, double d) { - int index = locateIndex(attributeIndex); - if (index(index) == attributeIndex) { - this.attributeValues[index] = d; - } else { - // We need to add the value - } - } - - /** - * Locates the greatest index that is not greater than the given index. - * - * @return the internal index of the attribute index. Returns -1 if no index with this property could be found - */ - public int locateIndex(int index) { - - int min = 0; - int max = this.indexValues.length - 1; - - if (max == -1) { - return -1; - } - - // Binary search - while ((this.indexValues[min] <= index) && (this.indexValues[max] >= index)) { - int current = (max + min) / 2; - if (this.indexValues[current] > index) { - max = current - 1; - } else if (this.indexValues[current] < index) { - min = current + 1; - } else { - return current; - } - } - if (this.indexValues[max] < index) { - return max; - } else { - return min - 1; - } - } - -} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Utils.java ---------------------------------------------------------------------- diff --git a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Utils.java b/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Utils.java deleted file mode 100644 index 993186c..0000000 --- a/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Utils.java +++ /dev/null @@ -1,91 +0,0 @@ -package com.yahoo.labs.samoa.instances; - -/* - * #%L - * SAMOA - * %% - * Copyright (C) 2014 - 2015 Apache Software Foundation - * %% - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * #L% - */ - -public class Utils { - public static int maxIndex(double[] doubles) { - - double maximum = 0; - int maxIndex = 0; - - for (int i = 0; i < doubles.length; i++) { - if ((i == 0) || (doubles[i] > maximum)) { - maxIndex = i; - maximum = doubles[i]; - } - } - - return maxIndex; - } - - public static String quote(String string) { - boolean quote = false; - - // backquote the following characters - if ((string.indexOf('\n') != -1) || (string.indexOf('\r') != -1) || (string.indexOf('\'') != -1) - || (string.indexOf('"') != -1) - || (string.indexOf('\\') != -1) || (string.indexOf('\t') != -1) || (string.indexOf('%') != -1) - || (string.indexOf('\u001E') != -1)) { - string = backQuoteChars(string); - quote = true; - } - - // Enclose the string in 's if the string contains a recently added - // backquote or contains one of the following characters. - if ((quote == true) || (string.indexOf('{') != -1) || (string.indexOf('}') != -1) || (string.indexOf(',') != -1) - || (string.equals("?")) - || (string.indexOf(' ') != -1) || (string.equals(""))) { - string = ("'".concat(string)).concat("'"); - } - - return string; - } - - public static String backQuoteChars(String string) { - - int index; - StringBuffer newStringBuffer; - - // replace each of the following characters with the backquoted version - char charsFind[] = { '\\', '\'', '\t', '\n', '\r', '"', '%', '\u001E' }; - String charsReplace[] = { "\\\\", "\\'", "\\t", "\\n", "\\r", "\\\"", "\\%", "\\u001E" }; - for (int i = 0; i < charsFind.length; i++) { - if (string.indexOf(charsFind[i]) != -1) { - newStringBuffer = new StringBuffer(); - while ((index = string.indexOf(charsFind[i])) != -1) { - if (index > 0) { - newStringBuffer.append(string.substring(0, index)); - } - newStringBuffer.append(charsReplace[i]); - if ((index + 1) < string.length()) { - string = string.substring(index + 1); - } else { - string = ""; - } - } - newStringBuffer.append(string); - string = newStringBuffer.toString(); - } - } - - return string; - } -}
