http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..d762ce3 --- /dev/null +++ b/pom.xml @@ -0,0 +1,286 @@ +<!-- + #%L + SAMOA + %% + Copyright (C) 2013 Yahoo! Inc. + %% + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + #L% + --> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <name>SAMOA</name> + <description>Scalable Advanced Massive Online Analysis</description> + <url>http://github.com/yahoo/samoa</url> + <modules> + <module>samoa-test</module> + </modules> + <packaging>pom</packaging> + + <groupId>com.yahoo.labs.samoa</groupId> + <artifactId>samoa</artifactId> + <version>0.3.0-SNAPSHOT</version> + + <organization> + <name>Yahoo Labs</name> + <url>http://labs.yahoo.com</url> + </organization> + + <scm> + <connection>scm:git:[email protected]:yahoo/samoa.git</connection> + <url>scm:git:[email protected]:yahoo/samoa.git</url> + <developerConnection>scm:git:[email protected]:yahoo/samoa.git</developerConnection> + <tag>HEAD</tag> + </scm> + + <profiles> + <profile> + <id>local</id> + <activation> + <activeByDefault>true</activeByDefault> + </activation> + <modules> + <module>samoa-instances</module> + <module>samoa-api</module> + <module>samoa-local</module> + <module>samoa-test</module> + </modules> + </profile> + <profile> + <id>threads</id> + <modules> + <module>samoa-instances</module> + <module>samoa-api</module> + <module>samoa-threads</module> + <module>samoa-test</module> + </modules> + </profile> + <profile> + <id>storm</id> + <modules> + <module>samoa-instances</module> + <module>samoa-api</module> + <module>samoa-storm</module> + <module>samoa-test</module> + </modules> + </profile> + <profile> + <id>s4</id> + <modules> + <module>samoa-instances</module> + <module>samoa-api</module> + <module>samoa-s4</module> + <module>samoa-test</module> + </modules> + </profile> + <profile> + <id>samza</id> + <modules> + <module>samoa-instances</module> + <module>samoa-api</module> + <module>samoa-samza</module> + <module>samoa-test</module> + </modules> + </profile> + <profile> + <id>all</id> + <modules> + <module>samoa-instances</module> + <module>samoa-api</module> + <module>samoa-local</module> + <module>samoa-threads</module> + <module>samoa-storm</module> + <module>samoa-s4</module> + <module>samoa-samza</module> + <module>samoa-test</module> + </modules> + </profile> + </profiles> + + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + + <maven-dependency-plugin.version>2.7</maven-dependency-plugin.version> + <maven-assembly-plugin.version>2.4</maven-assembly-plugin.version> + + <commons-lang3.version>3.1</commons-lang3.version> + <guava.version>17.0</guava.version> + <hadoop.version>2.2.0</hadoop.version> + <javacliparser.version>0.5.0</javacliparser.version> + <jcip-annotations.version>1.0</jcip-annotations.version> + <jmockit.version>1.13</jmockit.version> + <junit.version>4.10</junit.version> + <kafka.version>0.8.1</kafka.version> + <kryo.version>2.17</kryo.version> + <metrics-core.version>2.2.0</metrics-core.version> + <miniball.version>1.0.3</miniball.version> + <s4.version>0.6.0-incubating</s4.version> + <samza.version>0.7.0</samza.version> + <slf4j-log4j12.version>1.7.2</slf4j-log4j12.version> + <slf4j-simple.version>1.7.5</slf4j-simple.version> + <maven-surefire-plugin.version>2.18</maven-surefire-plugin.version> + <storm.version>0.8.2</storm.version> + <!-- storm 0.8.2 loads zookeeper classes with hardcoded names from 3.3 version--> + <zookeeper.storm.version>3.3.6</zookeeper.storm.version> + </properties> + + <dependencies> + <dependency> + <groupId>org.jmockit</groupId> + <artifactId>jmockit</artifactId> + <version>${jmockit.version}</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>${junit.version}</version> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <version>3.1</version> + <configuration> + <source>1.7</source> + <target>1.7</target> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-release-plugin</artifactId> + <version>2.5.1</version> + <dependencies> + <dependency> + <groupId>org.apache.maven.scm</groupId> + <artifactId>maven-scm-provider-gitexe</artifactId> + <version>1.9.2</version> + </dependency> + </dependencies> + <configuration> + <tagNameFormat>v@{project.version}</tagNameFormat> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-javadoc-plugin</artifactId> + <version>2.9.1</version> + <configuration> + <aggregate>true</aggregate> + <show>public</show> + <nohelp>true</nohelp> + <header>SAMOA ${project.version}</header> + <footer>Scalable Advanced Massive Online Analysis, + ${project.version}</footer> + <doctitle>SAMOA API ${project.version}</doctitle> + <links> + <link>http://samoa-project.net/docs/api/</link> + </links> + </configuration> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>license-maven-plugin</artifactId> + <version>1.5</version> + <configuration> + <licenseName>apache_v2</licenseName> + <inceptionYear>2013</inceptionYear> + <organizationName>Yahoo! Inc.</organizationName> + <projectName>SAMOA</projectName> + <roots> + <root>samoa-api</root> + <root>samoa-instances</root> + <root>samoa-local</root> + <root>samoa-storm</root> + <root>samoa-s4</root> + <root>samoa-samza</root> + <root>bin</root> + </roots> + <excludes> + <exclude>**/target/**</exclude> + </excludes> + </configuration> + <executions> + <execution> + <id>first</id> + <goals> + <goal>update-file-header</goal> + </goals> + <phase>process-sources</phase> + </execution> + </executions> + </plugin> + </plugins> + <pluginManagement> + <plugins> + <!--This plugin's configuration is used to store Eclipse + m2e settings only. It has no influence on the Maven build itself. --> + <plugin> + <groupId>org.eclipse.m2e</groupId> + <artifactId>lifecycle-mapping</artifactId> + <version>1.0.0</version> + <configuration> + <lifecycleMappingMetadata> + <pluginExecutions> + <pluginExecution> + <pluginExecutionFilter> + <groupId> + org.codehaus.mojo + </groupId> + <artifactId> + license-maven-plugin + </artifactId> + <versionRange> + [1.5,) + </versionRange> + <goals> + <goal> + update-file-header + </goal> + </goals> + </pluginExecutionFilter> + <action> + <ignore /> + </action> + </pluginExecution> + </pluginExecutions> + </lifecycleMappingMetadata> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <version>0.10</version> + <configuration> + <excludes> + <exclude>.git/**/*</exclude> + <exclude>**/*.iml</exclude> + <exclude>**/README.md</exclude> + </excludes> + </configuration> + </plugin> + </plugins> + </pluginManagement> + </build> + + <parent> + <groupId>org.sonatype.oss</groupId> + <artifactId>oss-parent</artifactId> + <version>7</version> + </parent> +</project>
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/pom.xml ---------------------------------------------------------------------- diff --git a/samoa-api/pom.xml b/samoa-api/pom.xml new file mode 100644 index 0000000..c9e41fc --- /dev/null +++ b/samoa-api/pom.xml @@ -0,0 +1,127 @@ +<!-- + #%L + SAMOA + %% + Copyright (C) 2013 Yahoo! Inc. + %% + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + #L% + --> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <properties> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + </properties> + + <name>samoa-api</name> + <description>API and algorithms for SAMOA</description> + + <artifactId>samoa-api</artifactId> + <parent> + <groupId>com.yahoo.labs.samoa</groupId> + <artifactId>samoa</artifactId> + <version>0.3.0-SNAPSHOT</version> + </parent> + + <dependencies> + <dependency> + <groupId>com.yammer.metrics</groupId> + <artifactId>metrics-core</artifactId> + <version>${metrics-core.version}</version> + </dependency> + + <dependency> + <groupId>net.jcip</groupId> + <artifactId>jcip-annotations</artifactId> + <version>${jcip-annotations.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.commons</groupId> + <artifactId>commons-lang3</artifactId> + <version>${commons-lang3.version}</version> + </dependency> + + <dependency> + <groupId>com.github.javacliparser</groupId> + <artifactId>javacliparser</artifactId> + <version>${javacliparser.version}</version> + </dependency> + + <dependency> + <groupId>com.yahoo.labs.samoa</groupId> + <artifactId>samoa-instances</artifactId> + <version>${project.version}</version> + </dependency> + + <dependency> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + <version>${guava.version}</version> + </dependency> + + <dependency> + <groupId>com.esotericsoftware.kryo</groupId> + <artifactId>kryo</artifactId> + <version>${kryo.version}</version> + </dependency> + + <dependency> + <groupId>com.dreizak</groupId> + <artifactId>miniball</artifactId> + <version>${miniball.version}</version> + </dependency> + + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-common</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <version>${hadoop.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-minicluster</artifactId> + <version>${hadoop.version}</version> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-dependency-plugin</artifactId> + <version>${maven-dependency-plugin.version}</version> + <executions> + <execution> + <id>copy-dependencies</id> + <phase>package</phase> + <goals> + <goal>copy-dependencies</goal> + </goals> + <configuration> + <outputDirectory>${project.build.directory}/lib</outputDirectory> + <overWriteReleases>false</overWriteReleases> + <overWriteSnapshots>false</overWriteSnapshots> + <overWriteIfNewer>true</overWriteIfNewer> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> +</project> http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/core/ContentEvent.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/core/ContentEvent.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/ContentEvent.java new file mode 100644 index 0000000..a3ef92a --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/ContentEvent.java @@ -0,0 +1,43 @@ +package com.yahoo.labs.samoa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * The Interface ContentEvent. + */ +public interface ContentEvent extends java.io.Serializable { + + /** + * Gets the content event key. + * + * @return the key + */ + public String getKey(); + + /** + * Sets the content event key. + * + * @param key string + */ + public void setKey(String key); + + public boolean isLastEvent(); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/core/DoubleVector.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/core/DoubleVector.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/DoubleVector.java new file mode 100644 index 0000000..39362b5 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/DoubleVector.java @@ -0,0 +1,119 @@ +package com.yahoo.labs.samoa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.Arrays; + +import com.google.common.primitives.Doubles; + +public class DoubleVector implements java.io.Serializable { + + /** + * + */ + private static final long serialVersionUID = 8243012708860261398L; + + private double[] doubleArray; + + public DoubleVector() { + this.doubleArray = new double[0]; + } + + public DoubleVector(double[] toCopy) { + this.doubleArray = new double[toCopy.length]; + System.arraycopy(toCopy, 0, this.doubleArray, 0, toCopy.length); + } + + public DoubleVector(DoubleVector toCopy) { + this(toCopy.getArrayRef()); + } + + public double[] getArrayRef() { + return this.doubleArray; + } + + public double[] getArrayCopy() { + return Doubles.concat(this.doubleArray); + } + + public int numNonZeroEntries() { + int count = 0; + for (double element : this.doubleArray) { + if (Double.compare(element, 0.0) != 0) { + count++; + } + } + return count; + } + + public void setValue(int index, double value) { + if (index >= doubleArray.length) { + this.doubleArray = Doubles.ensureCapacity(this.doubleArray, index + 1, 0); + } + this.doubleArray[index] = value; + } + + public void addToValue(int index, double value) { + if (index >= doubleArray.length) { + this.doubleArray = Doubles.ensureCapacity(this.doubleArray, index + 1, 0); + } + this.doubleArray[index] += value; + } + + public double sumOfValues() { + double sum = 0.0; + for (double element : this.doubleArray) { + sum += element; + } + return sum; + } + + public void getSingleLineDescription(StringBuilder out) { + out.append("{"); + out.append(Doubles.join("|", this.doubleArray)); + out.append("}"); + } + + @Override + public String toString() { + return "DoubleVector [doubleArray=" + Arrays.toString(doubleArray) + "]"; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Arrays.hashCode(doubleArray); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (!(obj instanceof DoubleVector)) + return false; + DoubleVector other = (DoubleVector) obj; + return Arrays.equals(doubleArray, other.doubleArray); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/core/EntranceProcessor.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/core/EntranceProcessor.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/EntranceProcessor.java new file mode 100644 index 0000000..e1bdc14 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/EntranceProcessor.java @@ -0,0 +1,59 @@ +package com.yahoo.labs.samoa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; + +import com.github.javacliparser.Configurable; + +/** + * An EntranceProcessor is a specific kind of processor dedicated to providing events to inject in the topology. It can be connected to a single output stream. + */ +public interface EntranceProcessor extends Serializable, Configurable, Processor { + + /** + * Initializes the Processor. This method is called once after the topology is set up and before any call to the {@link nextTuple} method. + * + * @param the + * identifier of the processor. + */ + public void onCreate(int id); + + /** + * Checks whether the source stream is finished/exhausted. + */ + public boolean isFinished(); + + /** + * Checks whether a new event is ready to be processed. + * + * @return true if the EntranceProcessor is ready to provide the next event, false otherwise. + */ + public boolean hasNext(); + + /** + * Provides the next tuple to be processed by the topology. This method is the entry point for external events into the topology. + * + * @return the next event to be processed. + */ + public ContentEvent nextEvent(); + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Globals.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Globals.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Globals.java new file mode 100644 index 0000000..8e04016 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Globals.java @@ -0,0 +1,59 @@ +package com.yahoo.labs.samoa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * License + */ + +import com.github.javacliparser.StringUtils; + +/** + * Class for storing global information about current version of SAMOA. + * + * @author Albert Bifet + * @version $Revision: 7 $ + */ +public class Globals { + + public static final String workbenchTitle = "SAMOA: Scalable Advanced Massive Online Analysis Platform "; + + public static final String versionString = "0.0.1"; + + public static final String copyrightNotice = "Copyright Yahoo! Inc 2013"; + + public static final String webAddress = "http://github.com/yahoo/samoa"; + + public static String getWorkbenchInfoString() { + StringBuilder result = new StringBuilder(); + result.append(workbenchTitle); + StringUtils.appendNewline(result); + result.append("Version: "); + result.append(versionString); + StringUtils.appendNewline(result); + result.append("Copyright: "); + result.append(copyrightNotice); + StringUtils.appendNewline(result); + result.append("Web: "); + result.append(webAddress); + return result.toString(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Processor.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Processor.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Processor.java new file mode 100644 index 0000000..2033fae --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Processor.java @@ -0,0 +1,61 @@ +package com.yahoo.labs.samoa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; + +import com.github.javacliparser.Configurable; + +/** + * The Interface Processor. + */ +public interface Processor extends Serializable, Configurable { + + /** + * Entry point for the {@link Processor} code. This method is called once for every event received. + * + * @param event + * the event to be processed. + * @return true if successful, false otherwise. + */ + boolean process(ContentEvent event); + + /** + * Initializes the Processor. + * This method is called once after the topology is set up and before any call to the {@link process} method. + * + * @param id + * the identifier of the processor. + */ + void onCreate(int id); + + /** + * Creates a copy of a processor. + * This method is used to instantiate multiple instances of the same {@link Processsor}. + * + * @param processor + * the processor to be copied. + * + * @return a new instance of the {@link Processor}. + * */ + Processor newProcessor(Processor processor); // FIXME there should be no need for the processor as a parameter + // TODO can we substitute this with Cloneable? +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/core/SerializableInstance.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/core/SerializableInstance.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/SerializableInstance.java new file mode 100644 index 0000000..715c656 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/core/SerializableInstance.java @@ -0,0 +1,67 @@ +package com.yahoo.labs.samoa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.instances.DenseInstance; +import com.yahoo.labs.samoa.instances.Instance; + +/** + * License + */ + +//import weka.core.DenseInstance; +//import weka.core.Instance; + +/** + * The Class SerializableInstance. + * This class is needed for serialization of kryo + */ +public class SerializableInstance extends DenseInstance { + + /** The Constant serialVersionUID. */ + private static final long serialVersionUID = -3659459626274566468L; + + /** + * Instantiates a new serializable instance. + */ + public SerializableInstance() { + super(0); + } + + /** + * Instantiates a new serializable instance. + * + * @param arg0 the arg0 + */ + public SerializableInstance(int arg0) { + super(arg0); + } + + /** + * Instantiates a new serializable instance. + * + * @param inst the inst + */ + public SerializableInstance(Instance inst) { + super(inst); + } + +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/BasicClassificationPerformanceEvaluator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/BasicClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/BasicClassificationPerformanceEvaluator.java new file mode 100644 index 0000000..89a89c0 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/BasicClassificationPerformanceEvaluator.java @@ -0,0 +1,157 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.AbstractMOAObject; +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.yahoo.labs.samoa.instances.Instance; +import com.yahoo.labs.samoa.instances.Utils; + +/** + * Classification evaluator that performs basic incremental evaluation. + * + * @author Richard Kirkby ([email protected]) + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public class BasicClassificationPerformanceEvaluator extends AbstractMOAObject implements + ClassificationPerformanceEvaluator { + + private static final long serialVersionUID = 1L; + + protected double weightObserved; + + protected double weightCorrect; + + protected double[] columnKappa; + + protected double[] rowKappa; + + protected int numClasses; + + private double weightCorrectNoChangeClassifier; + + private int lastSeenClass; + + @Override + public void reset() { + reset(this.numClasses); + } + + public void reset(int numClasses) { + this.numClasses = numClasses; + this.rowKappa = new double[numClasses]; + this.columnKappa = new double[numClasses]; + for (int i = 0; i < this.numClasses; i++) { + this.rowKappa[i] = 0.0; + this.columnKappa[i] = 0.0; + } + this.weightObserved = 0.0; + this.weightCorrect = 0.0; + this.weightCorrectNoChangeClassifier = 0.0; + this.lastSeenClass = 0; + } + + @Override + public void addResult(Instance inst, double[] classVotes) { + double weight = inst.weight(); + int trueClass = (int) inst.classValue(); + if (weight > 0.0) { + if (this.weightObserved == 0) { + reset(inst.numClasses()); + } + this.weightObserved += weight; + int predictedClass = Utils.maxIndex(classVotes); + if (predictedClass == trueClass) { + this.weightCorrect += weight; + } + if(rowKappa.length > 0){ + this.rowKappa[predictedClass] += weight; + } + if (columnKappa.length > 0) { + this.columnKappa[trueClass] += weight; + } + } + if (this.lastSeenClass == trueClass) { + this.weightCorrectNoChangeClassifier += weight; + } + this.lastSeenClass = trueClass; + } + + @Override + public Measurement[] getPerformanceMeasurements() { + return new Measurement[]{ + new Measurement("classified instances", + getTotalWeightObserved()), + new Measurement("classifications correct (percent)", + getFractionCorrectlyClassified() * 100.0), + new Measurement("Kappa Statistic (percent)", + getKappaStatistic() * 100.0), + new Measurement("Kappa Temporal Statistic (percent)", + getKappaTemporalStatistic() * 100.0) + }; + + } + + public double getTotalWeightObserved() { + return this.weightObserved; + } + + public double getFractionCorrectlyClassified() { + return this.weightObserved > 0.0 ? this.weightCorrect + / this.weightObserved : 0.0; + } + + public double getFractionIncorrectlyClassified() { + return 1.0 - getFractionCorrectlyClassified(); + } + + public double getKappaStatistic() { + if (this.weightObserved > 0.0) { + double p0 = getFractionCorrectlyClassified(); + double pc = 0.0; + for (int i = 0; i < this.numClasses; i++) { + pc += (this.rowKappa[i] / this.weightObserved) + * (this.columnKappa[i] / this.weightObserved); + } + return (p0 - pc) / (1.0 - pc); + } else { + return 0; + } + } + + public double getKappaTemporalStatistic() { + if (this.weightObserved > 0.0) { + double p0 = this.weightCorrect / this.weightObserved; + double pc = this.weightCorrectNoChangeClassifier / this.weightObserved; + + return (p0 - pc) / (1.0 - pc); + } else { + return 0; + } + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + Measurement.getMeasurementsDescription(getPerformanceMeasurements(), + sb, indent); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/BasicRegressionPerformanceEvaluator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/BasicRegressionPerformanceEvaluator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/BasicRegressionPerformanceEvaluator.java new file mode 100644 index 0000000..d98fe72 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/BasicRegressionPerformanceEvaluator.java @@ -0,0 +1,134 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.instances.Instance; +import com.yahoo.labs.samoa.moa.AbstractMOAObject; +import com.yahoo.labs.samoa.moa.core.Measurement; + +/** + * Regression evaluator that performs basic incremental evaluation. + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public class BasicRegressionPerformanceEvaluator extends AbstractMOAObject + implements RegressionPerformanceEvaluator { + + private static final long serialVersionUID = 1L; + + protected double weightObserved; + + protected double squareError; + + protected double averageError; + + protected double sumTarget; + + protected double squareTargetError; + + protected double averageTargetError; + + @Override + public void reset() { + this.weightObserved = 0.0; + this.squareError = 0.0; + this.averageError = 0.0; + this.sumTarget = 0.0; + this.averageTargetError = 0.0; + this.squareTargetError = 0.0; + + } + + @Override + public void addResult(Instance inst, double[] prediction) { + double weight = inst.weight(); + double classValue = inst.classValue(); + if (weight > 0.0) { + if (prediction.length > 0) { + double meanTarget = this.weightObserved != 0 ? + this.sumTarget / this.weightObserved : 0.0; + this.squareError += (classValue - prediction[0]) * (classValue - prediction[0]); + this.averageError += Math.abs(classValue - prediction[0]); + this.squareTargetError += (classValue - meanTarget) * (classValue - meanTarget); + this.averageTargetError += Math.abs(classValue - meanTarget); + this.sumTarget += classValue; + this.weightObserved += weight; + } + } + } + + @Override + public Measurement[] getPerformanceMeasurements() { + return new Measurement[]{ + new Measurement("classified instances", + getTotalWeightObserved()), + new Measurement("mean absolute error", + getMeanError()), + new Measurement("root mean squared error", + getSquareError()), + new Measurement("relative mean absolute error", + getRelativeMeanError()), + new Measurement("relative root mean squared error", + getRelativeSquareError()) + }; + } + + public double getTotalWeightObserved() { + return this.weightObserved; + } + + public double getMeanError() { + return this.weightObserved > 0.0 ? this.averageError + / this.weightObserved : 0.0; + } + + public double getSquareError() { + return Math.sqrt(this.weightObserved > 0.0 ? this.squareError + / this.weightObserved : 0.0); + } + + public double getTargetMeanError() { + return this.weightObserved > 0.0 ? this.averageTargetError + / this.weightObserved : 0.0; + } + + public double getTargetSquareError() { + return Math.sqrt(this.weightObserved > 0.0 ? this.squareTargetError + / this.weightObserved : 0.0); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + Measurement.getMeasurementsDescription(getPerformanceMeasurements(), + sb, indent); + } + + private double getRelativeMeanError() { + return this.averageTargetError> 0 ? + this.averageError/this.averageTargetError : 0.0; + } + + private double getRelativeSquareError() { + return Math.sqrt(this.squareTargetError> 0 ? + this.squareError/this.squareTargetError : 0.0); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClassificationPerformanceEvaluator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClassificationPerformanceEvaluator.java new file mode 100644 index 0000000..4d684f6 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClassificationPerformanceEvaluator.java @@ -0,0 +1,24 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +public interface ClassificationPerformanceEvaluator extends PerformanceEvaluator { +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringEvaluationContentEvent.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringEvaluationContentEvent.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringEvaluationContentEvent.java new file mode 100644 index 0000000..27fee6a --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringEvaluationContentEvent.java @@ -0,0 +1,82 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ +import com.yahoo.labs.samoa.core.*; +import com.yahoo.labs.samoa.instances.Instance; +import com.yahoo.labs.samoa.moa.cluster.Clustering; +import com.yahoo.labs.samoa.moa.core.DataPoint; + +/** + * License + */ +/** + * The Class Clustering ResultEvent. + */ +final public class ClusteringEvaluationContentEvent implements ContentEvent { + + private static final long serialVersionUID = -7746983521296618922L; + private Clustering gtClustering; + private DataPoint dataPoint; + private final boolean isLast; + private String key = "0"; + + public ClusteringEvaluationContentEvent() { + this.isLast = false; + } + + public ClusteringEvaluationContentEvent(boolean isLast) { + this.isLast = isLast; + } + + /** + * Instantiates a new gtClustering result event. + * + * @param clustering the gtClustering result + * @param instance data point + * @param isLast is the last result + */ + public ClusteringEvaluationContentEvent(Clustering clustering, DataPoint instance, boolean isLast) { + this.gtClustering = clustering; + this.isLast = isLast; + this.dataPoint = instance; + } + + public String getKey() { + return key; + } + + public void setKey(String key) { + this.key = key; + } + + public boolean isLastEvent() { + return this.isLast; + } + + Clustering getGTClustering() { + return this.gtClustering; + } + + DataPoint getDataPoint() { + return this.dataPoint; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringEvaluatorProcessor.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringEvaluatorProcessor.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringEvaluatorProcessor.java new file mode 100644 index 0000000..2525a04 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringEvaluatorProcessor.java @@ -0,0 +1,319 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.yahoo.labs.samoa.core.ContentEvent; +import com.yahoo.labs.samoa.core.Processor; +import com.yahoo.labs.samoa.evaluation.measures.SSQ; +import com.yahoo.labs.samoa.evaluation.measures.StatisticalCollection; +import com.yahoo.labs.samoa.moa.cluster.Clustering; +import com.yahoo.labs.samoa.moa.clusterers.KMeans; +import com.yahoo.labs.samoa.moa.core.DataPoint; +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.yahoo.labs.samoa.moa.evaluation.LearningCurve; +import com.yahoo.labs.samoa.moa.evaluation.LearningEvaluation; +import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection; + +public class ClusteringEvaluatorProcessor implements Processor { + + /** + * + */ + private static final long serialVersionUID = -2778051819116753612L; + + private static final Logger logger = LoggerFactory.getLogger(EvaluatorProcessor.class); + + private static final String ORDERING_MEASUREMENT_NAME = "evaluation instances"; + + private final int samplingFrequency; + private final int decayHorizon; + private final File dumpFile; + private transient PrintStream immediateResultStream = null; + private transient boolean firstDump = true; + + private long totalCount = 0; + private long experimentStart = 0; + + private LearningCurve learningCurve; + + private MeasureCollection[] measures; + + private int id; + + protected Clustering gtClustering; + + protected ArrayList<DataPoint> points; + + private ClusteringEvaluatorProcessor(Builder builder) { + this.samplingFrequency = builder.samplingFrequency; + this.dumpFile = builder.dumpFile; + this.points = new ArrayList<>(); + this.decayHorizon = builder.decayHorizon; + } + + @Override + public boolean process(ContentEvent event) { + boolean ret = false; + if (event instanceof ClusteringResultContentEvent) { + ret = process((ClusteringResultContentEvent) event); + } + if (event instanceof ClusteringEvaluationContentEvent) { + ret = process((ClusteringEvaluationContentEvent) event); + } + return ret; + } + + private boolean process(ClusteringResultContentEvent result) { + // evaluate + Clustering clustering = KMeans.gaussianMeans(gtClustering, result.getClustering()); + for (MeasureCollection measure : measures) { + try { + measure.evaluateClusteringPerformance(clustering, gtClustering, points); + } catch (Exception ex) { + ex.printStackTrace(); + } + } + + this.addMeasurement(); + + if (result.isLastEvent()) { + this.concludeMeasurement(); + return true; + } + + totalCount += 1; + + if (totalCount == 1) { + experimentStart = System.nanoTime(); + } + + return false; + } + + private boolean process(ClusteringEvaluationContentEvent result) { + boolean ret = false; + if (result.getGTClustering() != null) { + gtClustering = result.getGTClustering(); + ret = true; + } + if (result.getDataPoint() != null) { + points.add(result.getDataPoint()); + if (points.size() > this.decayHorizon) { + points.remove(0); + } + ret = true; + } + return ret; + } + + @Override + public void onCreate(int id) { + this.id = id; + this.learningCurve = new LearningCurve(ORDERING_MEASUREMENT_NAME); + // create the measure collection + measures = getMeasures(getMeasureSelection()); + + if (this.dumpFile != null) { + try { + if (dumpFile.exists()) { + this.immediateResultStream = new PrintStream(new FileOutputStream(dumpFile, true), true); + } else { + this.immediateResultStream = new PrintStream(new FileOutputStream(dumpFile), true); + } + + } catch (FileNotFoundException e) { + this.immediateResultStream = null; + logger.error("File not found exception for {}:{}", this.dumpFile.getAbsolutePath(), e.toString()); + + } catch (Exception e) { + this.immediateResultStream = null; + logger.error("Exception when creating {}:{}", this.dumpFile.getAbsolutePath(), e.toString()); + } + } + + this.firstDump = true; + } + + private static ArrayList<Class> getMeasureSelection() { + ArrayList<Class> mclasses = new ArrayList<>(); + // mclasses.add(EntropyCollection.class); + // mclasses.add(F1.class); + // mclasses.add(General.class); + // *mclasses.add(CMM.class); + mclasses.add(SSQ.class); + // *mclasses.add(SilhouetteCoefficient.class); + mclasses.add(StatisticalCollection.class); + // mclasses.add(Separation.class); + + return mclasses; + } + + private static MeasureCollection[] getMeasures(ArrayList<Class> measure_classes) { + MeasureCollection[] measures = new MeasureCollection[measure_classes.size()]; + for (int i = 0; i < measure_classes.size(); i++) { + try { + MeasureCollection m = (MeasureCollection) measure_classes.get(i).newInstance(); + measures[i] = m; + + } catch (Exception ex) { + java.util.logging.Logger.getLogger("Couldn't create Instance for " + measure_classes.get(i).getName()); + ex.printStackTrace(); + } + } + return measures; + } + + @Override + public Processor newProcessor(Processor p) { + ClusteringEvaluatorProcessor originalProcessor = (ClusteringEvaluatorProcessor) p; + ClusteringEvaluatorProcessor newProcessor = new ClusteringEvaluatorProcessor.Builder(originalProcessor).build(); + + if (originalProcessor.learningCurve != null) { + newProcessor.learningCurve = originalProcessor.learningCurve; + } + + return newProcessor; + } + + @Override + public String toString() { + StringBuilder report = new StringBuilder(); + + report.append(EvaluatorProcessor.class.getCanonicalName()); + report.append("id = ").append(this.id); + report.append('\n'); + + if (learningCurve.numEntries() > 0) { + report.append(learningCurve.toString()); + report.append('\n'); + } + return report.toString(); + } + + private void addMeasurement() { + // printMeasures(); + List<Measurement> measurements = new ArrayList<>(); + measurements.add(new Measurement(ORDERING_MEASUREMENT_NAME, totalCount * this.samplingFrequency)); + + addClusteringPerformanceMeasurements(measurements); + Measurement[] finalMeasurements = measurements.toArray(new Measurement[measurements.size()]); + + LearningEvaluation learningEvaluation = new LearningEvaluation(finalMeasurements); + learningCurve.insertEntry(learningEvaluation); + logger.debug("evaluator id = {}", this.id); + // logger.info(learningEvaluation.toString()); + + if (immediateResultStream != null) { + if (firstDump) { + immediateResultStream.println(learningCurve.headerToString()); + firstDump = false; + } + + immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() - 1)); + immediateResultStream.flush(); + } + } + + private void addClusteringPerformanceMeasurements(List<Measurement> measurements) { + for (MeasureCollection measure : measures) { + for (int j = 0; j < measure.getNumMeasures(); j++) { + Measurement measurement = new Measurement(measure.getName(j), measure.getLastValue(j)); + measurements.add(measurement); + } + } + } + + private void concludeMeasurement() { + logger.info("last event is received!"); + logger.info("total count: {}", this.totalCount); + + String learningCurveSummary = this.toString(); + logger.info(learningCurveSummary); + + long experimentEnd = System.nanoTime(); + long totalExperimentTime = TimeUnit.SECONDS.convert(experimentEnd - experimentStart, TimeUnit.NANOSECONDS); + logger.info("total evaluation time: {} seconds for {} instances", totalExperimentTime, totalCount); + // logger.info("average throughput rate: {} instances/seconds", (totalCount/totalExperimentTime)); + } + + private void printMeasures() { + StringBuilder sb = new StringBuilder(); + for (MeasureCollection measure : measures) { + + sb.append("Mean ").append(measure.getClass().getSimpleName()).append(":").append(measure.getNumMeasures()).append("\n"); + for (int j = 0; j < measure.getNumMeasures(); j++) { + sb.append("[").append(measure.getName(j)).append("=").append(measure.getLastValue(j)).append("] \n"); + + } + sb.append("\n"); + } + + logger.debug("\n MEASURES: \n\n {}", sb.toString()); + System.out.println(sb.toString()); + } + + public static class Builder { + + private int samplingFrequency = 1000; + private File dumpFile = null; + private int decayHorizon = 1000; + + public Builder(int samplingFrequency) { + this.samplingFrequency = samplingFrequency; + } + + public Builder(ClusteringEvaluatorProcessor oldProcessor) { + this.samplingFrequency = oldProcessor.samplingFrequency; + this.dumpFile = oldProcessor.dumpFile; + this.decayHorizon = oldProcessor.decayHorizon; + } + + public Builder samplingFrequency(int samplingFrequency) { + this.samplingFrequency = samplingFrequency; + return this; + } + + public Builder decayHorizon(int decayHorizon) { + this.decayHorizon = decayHorizon; + return this; + } + + public Builder dumpFile(File file) { + this.dumpFile = file; + return this; + } + + public ClusteringEvaluatorProcessor build() { + return new ClusteringEvaluatorProcessor(this); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringResultContentEvent.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringResultContentEvent.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringResultContentEvent.java new file mode 100644 index 0000000..1a5610e --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringResultContentEvent.java @@ -0,0 +1,73 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.core.ContentEvent; +import com.yahoo.labs.samoa.moa.cluster.Clustering; + +/** + * License + */ +/** + * The Class Clustering ResultEvent. + */ +final public class ClusteringResultContentEvent implements ContentEvent { + + private static final long serialVersionUID = -7746983521296618922L; + private Clustering clustering; + private final boolean isLast; + private String key = "0"; + + public ClusteringResultContentEvent() { + this.isLast = false; + } + + public ClusteringResultContentEvent(boolean isLast) { + this.isLast = isLast; + } + + /** + * Instantiates a new clustering result event. + * + * @param clustering the clustering result + * @param isLast is the last result + */ + public ClusteringResultContentEvent(Clustering clustering, boolean isLast) { + this.clustering = clustering; + this.isLast = isLast; + } + + public String getKey() { + return key; + } + + public void setKey(String key) { + this.key = key; + } + + public boolean isLastEvent() { + return this.isLast; + } + + public Clustering getClustering() { + return this.clustering; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/EvaluatorProcessor.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/EvaluatorProcessor.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/EvaluatorProcessor.java new file mode 100755 index 0000000..f110872 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/EvaluatorProcessor.java @@ -0,0 +1,234 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.util.Collections; +import java.util.List; +import java.util.Vector; +import java.util.concurrent.TimeUnit; + +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.yahoo.labs.samoa.moa.evaluation.LearningCurve; +import com.yahoo.labs.samoa.moa.evaluation.LearningEvaluation; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.yahoo.labs.samoa.core.ContentEvent; +import com.yahoo.labs.samoa.core.Processor; +import com.yahoo.labs.samoa.learners.ResultContentEvent; + +public class EvaluatorProcessor implements Processor { + + /** + * + */ + private static final long serialVersionUID = -2778051819116753612L; + + private static final Logger logger = + LoggerFactory.getLogger(EvaluatorProcessor.class); + + private static final String ORDERING_MEASUREMENT_NAME = "evaluation instances"; + + private final PerformanceEvaluator evaluator; + private final int samplingFrequency; + private final File dumpFile; + private transient PrintStream immediateResultStream = null; + private transient boolean firstDump = true; + + + private long totalCount = 0; + private long experimentStart = 0; + + private long sampleStart = 0; + + private LearningCurve learningCurve; + private int id; + + private EvaluatorProcessor(Builder builder){ + this.evaluator = builder.evaluator; + this.samplingFrequency = builder.samplingFrequency; + this.dumpFile = builder.dumpFile; + } + + @Override + public boolean process(ContentEvent event) { + + ResultContentEvent result = (ResultContentEvent) event; + + if((totalCount > 0) && (totalCount % samplingFrequency) == 0){ + long sampleEnd = System.nanoTime(); + long sampleDuration = TimeUnit.SECONDS.convert(sampleEnd - sampleStart, TimeUnit.NANOSECONDS); + sampleStart = sampleEnd; + + logger.info("{} seconds for {} instances", sampleDuration, samplingFrequency); + this.addMeasurement(); + } + + if(result.isLastEvent()){ + this.concludeMeasurement(); + return true; + } + + evaluator.addResult(result.getInstance(), result.getClassVotes()); + totalCount += 1; + + if(totalCount == 1){ + sampleStart = System.nanoTime(); + experimentStart = sampleStart; + } + + return false; + } + + @Override + public void onCreate(int id) { + this.id = id; + this.learningCurve = new LearningCurve(ORDERING_MEASUREMENT_NAME); + + if (this.dumpFile != null) { + try { + if(dumpFile.exists()){ + this.immediateResultStream = new PrintStream( + new FileOutputStream(dumpFile, true), true); + }else{ + this.immediateResultStream = new PrintStream( + new FileOutputStream(dumpFile), true); + } + + } catch (FileNotFoundException e) { + this.immediateResultStream = null; + logger.error("File not found exception for {}:{}", this.dumpFile.getAbsolutePath(), e.toString()); + + } catch (Exception e){ + this.immediateResultStream = null; + logger.error("Exception when creating {}:{}", this.dumpFile.getAbsolutePath(), e.toString()); + } + } + + this.firstDump = true; + } + + @Override + public Processor newProcessor(Processor p) { + EvaluatorProcessor originalProcessor = (EvaluatorProcessor) p; + EvaluatorProcessor newProcessor = new EvaluatorProcessor.Builder(originalProcessor).build(); + + if (originalProcessor.learningCurve != null){ + newProcessor.learningCurve = originalProcessor.learningCurve; + } + + return newProcessor; + } + + @Override + public String toString() { + StringBuilder report = new StringBuilder(); + + report.append(EvaluatorProcessor.class.getCanonicalName()); + report.append("id = ").append(this.id); + report.append('\n'); + + if(learningCurve.numEntries() > 0){ + report.append(learningCurve.toString()); + report.append('\n'); + } + return report.toString(); + } + + private void addMeasurement(){ + List<Measurement> measurements = new Vector<>(); + measurements.add(new Measurement(ORDERING_MEASUREMENT_NAME, totalCount)); + + Collections.addAll(measurements, evaluator.getPerformanceMeasurements()); + + Measurement[] finalMeasurements = measurements.toArray(new Measurement[measurements.size()]); + + LearningEvaluation learningEvaluation = new LearningEvaluation(finalMeasurements); + learningCurve.insertEntry(learningEvaluation); + logger.debug("evaluator id = {}", this.id); + logger.info(learningEvaluation.toString()); + + if(immediateResultStream != null){ + if(firstDump){ + immediateResultStream.println(learningCurve.headerToString()); + firstDump = false; + } + + immediateResultStream.println(learningCurve.entryToString(learningCurve.numEntries() -1)); + immediateResultStream.flush(); + } + } + + private void concludeMeasurement(){ + logger.info("last event is received!"); + logger.info("total count: {}", this.totalCount); + + String learningCurveSummary = this.toString(); + logger.info(learningCurveSummary); + + + long experimentEnd = System.nanoTime(); + long totalExperimentTime = TimeUnit.SECONDS.convert(experimentEnd - experimentStart, TimeUnit.NANOSECONDS); + logger.info("total evaluation time: {} seconds for {} instances", totalExperimentTime, totalCount); + + if (immediateResultStream!=null) { + immediateResultStream.println("# COMPLETED"); + immediateResultStream.flush(); + } + //logger.info("average throughput rate: {} instances/seconds", (totalCount/totalExperimentTime)); + } + + public static class Builder{ + + private final PerformanceEvaluator evaluator; + private int samplingFrequency = 100000; + private File dumpFile = null; + + public Builder(PerformanceEvaluator evaluator){ + this.evaluator = evaluator; + } + + public Builder(EvaluatorProcessor oldProcessor){ + this.evaluator = oldProcessor.evaluator; + this.samplingFrequency = oldProcessor.samplingFrequency; + this.dumpFile = oldProcessor.dumpFile; + } + + public Builder samplingFrequency(int samplingFrequency){ + this.samplingFrequency = samplingFrequency; + return this; + } + + public Builder dumpFile(File file){ + this.dumpFile = file; + return this; + } + + public EvaluatorProcessor build(){ + return new EvaluatorProcessor(this); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java new file mode 100644 index 0000000..b88e87a --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java @@ -0,0 +1,62 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.yahoo.labs.samoa.moa.MOAObject; +import com.yahoo.labs.samoa.moa.core.Measurement; + +import com.yahoo.labs.samoa.instances.Instance; + +/** + * Interface implemented by learner evaluators to monitor the results of the + * learning process. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public interface PerformanceEvaluator extends MOAObject { + + /** + * Resets this evaluator. It must be similar to starting a new evaluator + * from scratch. + * + */ + public void reset(); + + /** + * Adds a learning result to this evaluator. + * + * @param inst + * the instance to be classified + * @param classVotes + * an array containing the estimated membership probabilities of + * the test instance in each class + * @return an array of measurements monitored in this evaluator + */ + public void addResult(Instance inst, double[] classVotes); + + /** + * Gets the current measurements monitored by this evaluator. + * + * @return an array of measurements monitored by this evaluator + */ + public Measurement[] getPerformanceMeasurements(); +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java new file mode 100644 index 0000000..d230cd0 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java @@ -0,0 +1,25 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +public interface RegressionPerformanceEvaluator extends PerformanceEvaluator { + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/787864b6/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java new file mode 100644 index 0000000..8b1f394 --- /dev/null +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/WindowClassificationPerformanceEvaluator.java @@ -0,0 +1,220 @@ +package com.yahoo.labs.samoa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2013 Yahoo! Inc. + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import com.github.javacliparser.IntOption; +import com.yahoo.labs.samoa.moa.AbstractMOAObject; +import com.yahoo.labs.samoa.moa.core.Measurement; +import com.yahoo.labs.samoa.instances.Instance; +import com.yahoo.labs.samoa.instances.Utils; + +/** + * Classification evaluator that updates evaluation results using a sliding + * window. + * + * @author Albert Bifet (abifet at cs dot waikato dot ac dot nz) + * @version $Revision: 7 $ + */ +public class WindowClassificationPerformanceEvaluator extends AbstractMOAObject implements + ClassificationPerformanceEvaluator { + + private static final long serialVersionUID = 1L; + + public IntOption widthOption = new IntOption("width", + 'w', "Size of Window", 1000); + + protected double TotalweightObserved = 0; + + protected Estimator weightObserved; + + protected Estimator weightCorrect; + + protected Estimator weightCorrectNoChangeClassifier; + + protected double lastSeenClass; + + protected Estimator[] columnKappa; + + protected Estimator[] rowKappa; + + protected Estimator[] classAccuracy; + + protected int numClasses; + + public class Estimator { + + protected double[] window; + + protected int posWindow; + + protected int lenWindow; + + protected int SizeWindow; + + protected double sum; + + public Estimator(int sizeWindow) { + window = new double[sizeWindow]; + SizeWindow = sizeWindow; + posWindow = 0; + lenWindow = 0; + } + + public void add(double value) { + sum -= window[posWindow]; + sum += value; + window[posWindow] = value; + posWindow++; + if (posWindow == SizeWindow) { + posWindow = 0; + } + if (lenWindow < SizeWindow) { + lenWindow++; + } + } + + public double total() { + return sum; + } + + public double length() { + return lenWindow; + } + + } + + /* public void setWindowWidth(int w) { + this.width = w; + reset(); + }*/ + @Override + public void reset() { + reset(this.numClasses); + } + + public void reset(int numClasses) { + this.numClasses = numClasses; + this.rowKappa = new Estimator[numClasses]; + this.columnKappa = new Estimator[numClasses]; + this.classAccuracy = new Estimator[numClasses]; + for (int i = 0; i < this.numClasses; i++) { + this.rowKappa[i] = new Estimator(this.widthOption.getValue()); + this.columnKappa[i] = new Estimator(this.widthOption.getValue()); + this.classAccuracy[i] = new Estimator(this.widthOption.getValue()); + } + this.weightCorrect = new Estimator(this.widthOption.getValue()); + this.weightCorrectNoChangeClassifier = new Estimator(this.widthOption.getValue()); + this.weightObserved = new Estimator(this.widthOption.getValue()); + this.TotalweightObserved = 0; + this.lastSeenClass = 0; + } + + @Override + public void addResult(Instance inst, double[] classVotes) { + double weight = inst.weight(); + int trueClass = (int) inst.classValue(); + if (weight > 0.0) { + if (TotalweightObserved == 0) { + reset(inst.numClasses()); + } + this.TotalweightObserved += weight; + this.weightObserved.add(weight); + int predictedClass = Utils.maxIndex(classVotes); + if (predictedClass == trueClass) { + this.weightCorrect.add(weight); + } else { + this.weightCorrect.add(0); + } + //Add Kappa statistic information + for (int i = 0; i < this.numClasses; i++) { + this.rowKappa[i].add(i == predictedClass ? weight : 0); + this.columnKappa[i].add(i == trueClass ? weight : 0); + } + if (this.lastSeenClass == trueClass) { + this.weightCorrectNoChangeClassifier.add(weight); + } else { + this.weightCorrectNoChangeClassifier.add(0); + } + this.classAccuracy[trueClass].add(predictedClass == trueClass ? weight : 0.0); + this.lastSeenClass = trueClass; + } + } + + @Override + public Measurement[] getPerformanceMeasurements() { + return new Measurement[]{ + new Measurement("classified instances", + this.TotalweightObserved), + new Measurement("classifications correct (percent)", + getFractionCorrectlyClassified() * 100.0), + new Measurement("Kappa Statistic (percent)", + getKappaStatistic() * 100.0), + new Measurement("Kappa Temporal Statistic (percent)", + getKappaTemporalStatistic() * 100.0) + }; + + } + + public double getTotalWeightObserved() { + return this.weightObserved.total(); + } + + public double getFractionCorrectlyClassified() { + return this.weightObserved.total() > 0.0 ? this.weightCorrect.total() + / this.weightObserved.total() : 0.0; + } + + public double getKappaStatistic() { + if (this.weightObserved.total() > 0.0) { + double p0 = this.weightCorrect.total() / this.weightObserved.total(); + double pc = 0; + for (int i = 0; i < this.numClasses; i++) { + pc += (this.rowKappa[i].total() / this.weightObserved.total()) + * (this.columnKappa[i].total() / this.weightObserved.total()); + } + return (p0 - pc) / (1 - pc); + } else { + return 0; + } + } + + public double getKappaTemporalStatistic() { + if (this.weightObserved.total() > 0.0) { + double p0 = this.weightCorrect.total() / this.weightObserved.total(); + double pc = this.weightCorrectNoChangeClassifier.total() / this.weightObserved.total(); + + return (p0 - pc) / (1 - pc); + } else { + return 0; + } + } + + public double getFractionIncorrectlyClassified() { + return 1.0 - getFractionCorrectlyClassified(); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + Measurement.getMeasurementsDescription(getPerformanceMeasurements(), + sb, indent); + } + +}
