http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/centralized/AMRulesRegressorProcessor.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/centralized/AMRulesRegressorProcessor.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/centralized/AMRulesRegressorProcessor.java new file mode 100644 index 0000000..55f0064 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/centralized/AMRulesRegressorProcessor.java @@ -0,0 +1,513 @@ +package org.apache.samoa.learners.classifiers.rules.centralized; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; + +import org.apache.samoa.core.ContentEvent; +import org.apache.samoa.core.Processor; +import org.apache.samoa.instances.Instance; +import org.apache.samoa.instances.Instances; +import org.apache.samoa.learners.InstanceContentEvent; +import org.apache.samoa.learners.ResultContentEvent; +import org.apache.samoa.learners.classifiers.rules.common.ActiveRule; +import org.apache.samoa.learners.classifiers.rules.common.Perceptron; +import org.apache.samoa.learners.classifiers.rules.common.RuleActiveRegressionNode; +import org.apache.samoa.moa.classifiers.rules.core.attributeclassobservers.FIMTDDNumericAttributeClassLimitObserver; +import org.apache.samoa.moa.classifiers.rules.core.voting.ErrorWeightedVote; +import org.apache.samoa.topology.Stream; + +/** + * AMRules Regressor Processor is the main (and only) processor for AMRulesRegressor task. It is adapted from the + * AMRules implementation in MOA. + * + * @author Anh Thu Vu + * + */ +public class AMRulesRegressorProcessor implements Processor { + /** + * + */ + private static final long serialVersionUID = 1L; + + private int processorId; + + // Rules & default rule + protected List<ActiveRule> ruleSet; + protected ActiveRule defaultRule; + protected int ruleNumberID; + protected double[] statistics; + + // SAMOA Stream + private Stream resultStream; + + // Options + protected int pageHinckleyThreshold; + protected double pageHinckleyAlpha; + protected boolean driftDetection; + protected int predictionFunction; // Adaptive=0 Perceptron=1 TargetMean=2 + protected boolean constantLearningRatioDecay; + protected double learningRatio; + + protected double splitConfidence; + protected double tieThreshold; + protected int gracePeriod; + + protected boolean noAnomalyDetection; + protected double multivariateAnomalyProbabilityThreshold; + protected double univariateAnomalyprobabilityThreshold; + protected int anomalyNumInstThreshold; + + protected boolean unorderedRules; + + protected FIMTDDNumericAttributeClassLimitObserver numericObserver; + + protected ErrorWeightedVote voteType; + + /* + * Constructor + */ + public AMRulesRegressorProcessor(Builder builder) { + this.pageHinckleyThreshold = builder.pageHinckleyThreshold; + this.pageHinckleyAlpha = builder.pageHinckleyAlpha; + this.driftDetection = builder.driftDetection; + this.predictionFunction = builder.predictionFunction; + this.constantLearningRatioDecay = builder.constantLearningRatioDecay; + this.learningRatio = builder.learningRatio; + this.splitConfidence = builder.splitConfidence; + this.tieThreshold = builder.tieThreshold; + this.gracePeriod = builder.gracePeriod; + + this.noAnomalyDetection = builder.noAnomalyDetection; + this.multivariateAnomalyProbabilityThreshold = builder.multivariateAnomalyProbabilityThreshold; + this.univariateAnomalyprobabilityThreshold = builder.univariateAnomalyprobabilityThreshold; + this.anomalyNumInstThreshold = builder.anomalyNumInstThreshold; + this.unorderedRules = builder.unorderedRules; + + this.numericObserver = builder.numericObserver; + this.voteType = builder.voteType; + } + + /* + * Process + */ + @Override + public boolean process(ContentEvent event) { + InstanceContentEvent instanceEvent = (InstanceContentEvent) event; + + // predict + if (instanceEvent.isTesting()) { + this.predictOnInstance(instanceEvent); + } + + // train + if (instanceEvent.isTraining()) { + this.trainOnInstance(instanceEvent); + } + + return true; + } + + /* + * Prediction + */ + private void predictOnInstance(InstanceContentEvent instanceEvent) { + double[] prediction = getVotesForInstance(instanceEvent.getInstance()); + ResultContentEvent rce = newResultContentEvent(prediction, instanceEvent); + resultStream.put(rce); + } + + /** + * Helper method to generate new ResultContentEvent based on an instance and its prediction result. + * + * @param prediction + * The predicted class label from the decision tree model. + * @param inEvent + * The associated instance content event + * @return ResultContentEvent to be sent into Evaluator PI or other destination PI. + */ + private ResultContentEvent newResultContentEvent(double[] prediction, InstanceContentEvent inEvent) { + ResultContentEvent rce = new ResultContentEvent(inEvent.getInstanceIndex(), inEvent.getInstance(), + inEvent.getClassId(), prediction, inEvent.isLastEvent()); + rce.setClassifierIndex(this.processorId); + rce.setEvaluationIndex(inEvent.getEvaluationIndex()); + return rce; + } + + /** + * getVotesForInstance extension of the instance method getVotesForInstance in moa.classifier.java returns the + * prediction of the instance. Called in EvaluateModelRegression + */ + private double[] getVotesForInstance(Instance instance) { + ErrorWeightedVote errorWeightedVote = newErrorWeightedVote(); + int numberOfRulesCovering = 0; + + for (ActiveRule rule : ruleSet) { + if (rule.isCovering(instance) == true) { + numberOfRulesCovering++; + double[] vote = rule.getPrediction(instance); + double error = rule.getCurrentError(); + errorWeightedVote.addVote(vote, error); + if (!this.unorderedRules) { // Ordered Rules Option. + break; // Only one rule cover the instance. + } + } + } + + if (numberOfRulesCovering == 0) { + double[] vote = defaultRule.getPrediction(instance); + double error = defaultRule.getCurrentError(); + errorWeightedVote.addVote(vote, error); + } + double[] weightedVote = errorWeightedVote.computeWeightedVote(); + + return weightedVote; + } + + public ErrorWeightedVote newErrorWeightedVote() { + return voteType.getACopy(); + } + + /* + * Training + */ + private void trainOnInstance(InstanceContentEvent instanceEvent) { + this.trainOnInstanceImpl(instanceEvent.getInstance()); + } + + public void trainOnInstanceImpl(Instance instance) { + /* + AMRules Algorithm + + For each rule in the rule set + If rule covers the instance + if the instance is not an anomaly + Update Change Detection Tests + Compute prediction error + Call PHTest + If change is detected then + Remove rule + Else + Update sufficient statistics of rule + If number of examples in rule > Nmin + Expand rule + If ordered set then + break + If none of the rule covers the instance + Update sufficient statistics of default rule + If number of examples in default rule is multiple of Nmin + Expand default rule and add it to the set of rules + Reset the default rule + */ + boolean rulesCoveringInstance = false; + Iterator<ActiveRule> ruleIterator = this.ruleSet.iterator(); + while (ruleIterator.hasNext()) { + ActiveRule rule = ruleIterator.next(); + if (rule.isCovering(instance) == true) { + rulesCoveringInstance = true; + if (isAnomaly(instance, rule) == false) { + // Update Change Detection Tests + double error = rule.computeError(instance); // Use adaptive mode error + boolean changeDetected = ((RuleActiveRegressionNode) rule.getLearningNode()).updateChangeDetection(error); + if (changeDetected == true) { + ruleIterator.remove(); + } else { + rule.updateStatistics(instance); + if (rule.getInstancesSeen() % this.gracePeriod == 0.0) { + if (rule.tryToExpand(this.splitConfidence, this.tieThreshold)) { + rule.split(); + } + } + } + if (!this.unorderedRules) + break; + } + } + } + + if (rulesCoveringInstance == false) { + defaultRule.updateStatistics(instance); + if (defaultRule.getInstancesSeen() % this.gracePeriod == 0.0) { + if (defaultRule.tryToExpand(this.splitConfidence, this.tieThreshold) == true) { + ActiveRule newDefaultRule = newRule(defaultRule.getRuleNumberID(), + (RuleActiveRegressionNode) defaultRule.getLearningNode(), + ((RuleActiveRegressionNode) defaultRule.getLearningNode()).getStatisticsOtherBranchSplit()); // other branch + defaultRule.split(); + defaultRule.setRuleNumberID(++ruleNumberID); + this.ruleSet.add(this.defaultRule); + + defaultRule = newDefaultRule; + + } + } + } + } + + /** + * Method to verify if the instance is an anomaly. + * + * @param instance + * @param rule + * @return + */ + private boolean isAnomaly(Instance instance, ActiveRule rule) { + // AMRUles is equipped with anomaly detection. If on, compute the anomaly + // value. + boolean isAnomaly = false; + if (this.noAnomalyDetection == false) { + if (rule.getInstancesSeen() >= this.anomalyNumInstThreshold) { + isAnomaly = rule.isAnomaly(instance, + this.univariateAnomalyprobabilityThreshold, + this.multivariateAnomalyProbabilityThreshold, + this.anomalyNumInstThreshold); + } + } + return isAnomaly; + } + + /* + * Create new rules + */ + // TODO check this after finish rule, LN + private ActiveRule newRule(int ID, RuleActiveRegressionNode node, double[] statistics) { + ActiveRule r = newRule(ID); + + if (node != null) + { + if (node.getPerceptron() != null) + { + r.getLearningNode().setPerceptron(new Perceptron(node.getPerceptron())); + r.getLearningNode().getPerceptron().setLearningRatio(this.learningRatio); + } + if (statistics == null) + { + double mean; + if (node.getNodeStatistics().getValue(0) > 0) { + mean = node.getNodeStatistics().getValue(1) / node.getNodeStatistics().getValue(0); + r.getLearningNode().getTargetMean().reset(mean, 1); + } + } + } + if (statistics != null && ((RuleActiveRegressionNode) r.getLearningNode()).getTargetMean() != null) + { + double mean; + if (statistics[0] > 0) { + mean = statistics[1] / statistics[0]; + ((RuleActiveRegressionNode) r.getLearningNode()).getTargetMean().reset(mean, (long) statistics[0]); + } + } + return r; + } + + private ActiveRule newRule(int ID) { + ActiveRule r = new ActiveRule.Builder(). + threshold(this.pageHinckleyThreshold). + alpha(this.pageHinckleyAlpha). + changeDetection(this.driftDetection). + predictionFunction(this.predictionFunction). + statistics(new double[3]). + learningRatio(this.learningRatio). + numericObserver(numericObserver). + id(ID).build(); + return r; + } + + /* + * Init processor + */ + @Override + public void onCreate(int id) { + this.processorId = id; + this.statistics = new double[] { 0.0, 0, 0 }; + this.ruleNumberID = 0; + this.defaultRule = newRule(++this.ruleNumberID); + + this.ruleSet = new LinkedList<ActiveRule>(); + } + + /* + * Clone processor + */ + @Override + public Processor newProcessor(Processor p) { + AMRulesRegressorProcessor oldProcessor = (AMRulesRegressorProcessor) p; + Builder builder = new Builder(oldProcessor); + AMRulesRegressorProcessor newProcessor = builder.build(); + newProcessor.resultStream = oldProcessor.resultStream; + return newProcessor; + } + + /* + * Output stream + */ + public void setResultStream(Stream resultStream) { + this.resultStream = resultStream; + } + + public Stream getResultStream() { + return this.resultStream; + } + + /* + * Others + */ + public boolean isRandomizable() { + return true; + } + + /* + * Builder + */ + public static class Builder { + private int pageHinckleyThreshold; + private double pageHinckleyAlpha; + private boolean driftDetection; + private int predictionFunction; // Adaptive=0 Perceptron=1 TargetMean=2 + private boolean constantLearningRatioDecay; + private double learningRatio; + private double splitConfidence; + private double tieThreshold; + private int gracePeriod; + + private boolean noAnomalyDetection; + private double multivariateAnomalyProbabilityThreshold; + private double univariateAnomalyprobabilityThreshold; + private int anomalyNumInstThreshold; + + private boolean unorderedRules; + + private FIMTDDNumericAttributeClassLimitObserver numericObserver; + private ErrorWeightedVote voteType; + + private Instances dataset; + + public Builder(Instances dataset) { + this.dataset = dataset; + } + + public Builder(AMRulesRegressorProcessor processor) { + this.pageHinckleyThreshold = processor.pageHinckleyThreshold; + this.pageHinckleyAlpha = processor.pageHinckleyAlpha; + this.driftDetection = processor.driftDetection; + this.predictionFunction = processor.predictionFunction; + this.constantLearningRatioDecay = processor.constantLearningRatioDecay; + this.learningRatio = processor.learningRatio; + this.splitConfidence = processor.splitConfidence; + this.tieThreshold = processor.tieThreshold; + this.gracePeriod = processor.gracePeriod; + + this.noAnomalyDetection = processor.noAnomalyDetection; + this.multivariateAnomalyProbabilityThreshold = processor.multivariateAnomalyProbabilityThreshold; + this.univariateAnomalyprobabilityThreshold = processor.univariateAnomalyprobabilityThreshold; + this.anomalyNumInstThreshold = processor.anomalyNumInstThreshold; + this.unorderedRules = processor.unorderedRules; + + this.numericObserver = processor.numericObserver; + this.voteType = processor.voteType; + } + + public Builder threshold(int threshold) { + this.pageHinckleyThreshold = threshold; + return this; + } + + public Builder alpha(double alpha) { + this.pageHinckleyAlpha = alpha; + return this; + } + + public Builder changeDetection(boolean changeDetection) { + this.driftDetection = changeDetection; + return this; + } + + public Builder predictionFunction(int predictionFunction) { + this.predictionFunction = predictionFunction; + return this; + } + + public Builder constantLearningRatioDecay(boolean constantDecay) { + this.constantLearningRatioDecay = constantDecay; + return this; + } + + public Builder learningRatio(double learningRatio) { + this.learningRatio = learningRatio; + return this; + } + + public Builder splitConfidence(double splitConfidence) { + this.splitConfidence = splitConfidence; + return this; + } + + public Builder tieThreshold(double tieThreshold) { + this.tieThreshold = tieThreshold; + return this; + } + + public Builder gracePeriod(int gracePeriod) { + this.gracePeriod = gracePeriod; + return this; + } + + public Builder noAnomalyDetection(boolean noAnomalyDetection) { + this.noAnomalyDetection = noAnomalyDetection; + return this; + } + + public Builder multivariateAnomalyProbabilityThreshold(double mAnomalyThreshold) { + this.multivariateAnomalyProbabilityThreshold = mAnomalyThreshold; + return this; + } + + public Builder univariateAnomalyProbabilityThreshold(double uAnomalyThreshold) { + this.univariateAnomalyprobabilityThreshold = uAnomalyThreshold; + return this; + } + + public Builder anomalyNumberOfInstancesThreshold(int anomalyNumInstThreshold) { + this.anomalyNumInstThreshold = anomalyNumInstThreshold; + return this; + } + + public Builder unorderedRules(boolean unorderedRules) { + this.unorderedRules = unorderedRules; + return this; + } + + public Builder numericObserver(FIMTDDNumericAttributeClassLimitObserver numericObserver) { + this.numericObserver = numericObserver; + return this; + } + + public Builder voteType(ErrorWeightedVote voteType) { + this.voteType = voteType; + return this; + } + + public AMRulesRegressorProcessor build() { + return new AMRulesRegressorProcessor(this); + } + } +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/ActiveRule.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/ActiveRule.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/ActiveRule.java new file mode 100644 index 0000000..d233af0 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/ActiveRule.java @@ -0,0 +1,228 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; + +import org.apache.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalTest; +import org.apache.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; +import org.apache.samoa.moa.classifiers.rules.core.attributeclassobservers.FIMTDDNumericAttributeClassLimitObserver; +import org.apache.samoa.moa.classifiers.rules.core.conditionaltests.NumericAttributeBinaryRulePredicate; + +/** + * ActiveRule is a LearningRule that actively update its LearningNode with incoming instances. + * + * @author Anh Thu Vu + * + */ + +public class ActiveRule extends LearningRule { + + private static final long serialVersionUID = 1L; + + private double[] statisticsOtherBranchSplit; + + private Builder builder; + + private RuleActiveRegressionNode learningNode; + + private RuleSplitNode lastUpdatedRuleSplitNode; + + /* + * Constructor with Builder + */ + public ActiveRule() { + super(); + this.builder = null; + this.learningNode = null; + this.ruleNumberID = 0; + } + + public ActiveRule(Builder builder) { + super(); + this.setBuilder(builder); + this.learningNode = newRuleActiveLearningNode(builder); + // JD - use builder ID to set ruleNumberID + this.ruleNumberID = builder.id; + } + + private RuleActiveRegressionNode newRuleActiveLearningNode(Builder builder) { + return new RuleActiveRegressionNode(builder); + } + + /* + * Setters & getters + */ + public Builder getBuilder() { + return builder; + } + + public void setBuilder(Builder builder) { + this.builder = builder; + } + + @Override + public RuleRegressionNode getLearningNode() { + return this.learningNode; + } + + @Override + public void setLearningNode(RuleRegressionNode learningNode) { + this.learningNode = (RuleActiveRegressionNode) learningNode; + } + + public double[] statisticsOtherBranchSplit() { + return this.statisticsOtherBranchSplit; + } + + public RuleSplitNode getLastUpdatedRuleSplitNode() { + return this.lastUpdatedRuleSplitNode; + } + + /* + * Builder + */ + public static class Builder implements Serializable { + + private static final long serialVersionUID = 1712887264918475622L; + protected boolean changeDetection; + protected boolean usePerceptron; + protected double threshold; + protected double alpha; + protected int predictionFunction; + protected boolean constantLearningRatioDecay; + protected double learningRatio; + + protected double[] statistics; + + protected FIMTDDNumericAttributeClassLimitObserver numericObserver; + + protected double lastTargetMean; + + public int id; + + public Builder() { + } + + public Builder changeDetection(boolean changeDetection) { + this.changeDetection = changeDetection; + return this; + } + + public Builder threshold(double threshold) { + this.threshold = threshold; + return this; + } + + public Builder alpha(double alpha) { + this.alpha = alpha; + return this; + } + + public Builder predictionFunction(int predictionFunction) { + this.predictionFunction = predictionFunction; + return this; + } + + public Builder statistics(double[] statistics) { + this.statistics = statistics; + return this; + } + + public Builder constantLearningRatioDecay(boolean constantLearningRatioDecay) { + this.constantLearningRatioDecay = constantLearningRatioDecay; + return this; + } + + public Builder learningRatio(double learningRatio) { + this.learningRatio = learningRatio; + return this; + } + + public Builder numericObserver(FIMTDDNumericAttributeClassLimitObserver numericObserver) { + this.numericObserver = numericObserver; + return this; + } + + public Builder id(int id) { + this.id = id; + return this; + } + + public ActiveRule build() { + return new ActiveRule(this); + } + + } + + /** + * Try to Expand method. + * + * @param splitConfidence + * @param tieThreshold + * @return + */ + public boolean tryToExpand(double splitConfidence, double tieThreshold) { + + boolean shouldSplit = this.learningNode.tryToExpand(splitConfidence, tieThreshold); + return shouldSplit; + + } + + // JD: Only call after tryToExpand returning true + public void split() + { + // this.statisticsOtherBranchSplit = + // this.learningNode.getStatisticsOtherBranchSplit(); + // create a split node, + int splitIndex = this.learningNode.getSplitIndex(); + InstanceConditionalTest st = this.learningNode.getBestSuggestion().splitTest; + if (st instanceof NumericAttributeBinaryTest) { + NumericAttributeBinaryTest splitTest = (NumericAttributeBinaryTest) st; + NumericAttributeBinaryRulePredicate predicate = new NumericAttributeBinaryRulePredicate( + splitTest.getAttsTestDependsOn()[0], splitTest.getSplitValue(), + splitIndex + 1); + lastUpdatedRuleSplitNode = new RuleSplitNode(predicate, this.learningNode.getStatisticsBranchSplit()); + if (this.nodeListAdd(lastUpdatedRuleSplitNode)) { + // create a new learning node + RuleActiveRegressionNode newLearningNode = newRuleActiveLearningNode(this.getBuilder().statistics( + this.learningNode.getStatisticsNewRuleActiveLearningNode())); + newLearningNode.initialize(this.learningNode); + this.learningNode = newLearningNode; + } + } + else + throw new UnsupportedOperationException("AMRules (currently) only supports numerical attributes."); + } + + // protected void debug(String string, int level) { + // if (this.amRules.VerbosityOption.getValue()>=level) { + // System.out.println(string); + // } + // } + + /** + * MOA GUI output + */ + @Override + public void getDescription(StringBuilder sb, int indent) { + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/LearningRule.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/LearningRule.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/LearningRule.java new file mode 100644 index 0000000..5c41215 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/LearningRule.java @@ -0,0 +1,122 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.core.DoubleVector; +import org.apache.samoa.moa.core.StringUtils; + +/** + * Rule with LearningNode (statistical data). + * + * @author Anh Thu Vu + * + */ +public abstract class LearningRule extends Rule { + + /** + * + */ + private static final long serialVersionUID = 1L; + + /* + * Constructor + */ + public LearningRule() { + super(); + } + + /* + * LearningNode + */ + public abstract RuleRegressionNode getLearningNode(); + + public abstract void setLearningNode(RuleRegressionNode learningNode); + + /* + * No. of instances seen + */ + public long getInstancesSeen() { + return this.getLearningNode().getInstancesSeen(); + } + + /* + * Error and change detection + */ + public double computeError(Instance instance) { + return this.getLearningNode().computeError(instance); + } + + /* + * Prediction + */ + public double[] getPrediction(Instance instance, int mode) { + return this.getLearningNode().getPrediction(instance, mode); + } + + public double[] getPrediction(Instance instance) { + return this.getLearningNode().getPrediction(instance); + } + + public double getCurrentError() { + return this.getLearningNode().getCurrentError(); + } + + /* + * Anomaly detection + */ + public boolean isAnomaly(Instance instance, + double uniVariateAnomalyProbabilityThreshold, + double multiVariateAnomalyProbabilityThreshold, + int numberOfInstanceesForAnomaly) { + return this.getLearningNode().isAnomaly(instance, uniVariateAnomalyProbabilityThreshold, + multiVariateAnomalyProbabilityThreshold, + numberOfInstanceesForAnomaly); + } + + /* + * Update + */ + public void updateStatistics(Instance instance) { + this.getLearningNode().updateStatistics(instance); + } + + public String printRule() { + StringBuilder out = new StringBuilder(); + int indent = 1; + StringUtils.appendIndented(out, indent, "Rule Nr." + this.ruleNumberID + " Instances seen:" + + this.getLearningNode().getInstancesSeen() + "\n"); // AC + for (RuleSplitNode node : nodeList) { + StringUtils.appendIndented(out, indent, node.getSplitTest().toString()); + StringUtils.appendIndented(out, indent, " "); + StringUtils.appendIndented(out, indent, node.toString()); + } + DoubleVector pred = new DoubleVector(this.getLearningNode().getSimplePrediction()); + StringUtils.appendIndented(out, 0, " --> y: " + pred.toString()); + StringUtils.appendNewline(out); + + if (getLearningNode().perceptron != null) { + ((RuleActiveRegressionNode) this.getLearningNode()).perceptron.getModelDescription(out, 0); + StringUtils.appendNewline(out); + } + return (out.toString()); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/NonLearningRule.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/NonLearningRule.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/NonLearningRule.java new file mode 100644 index 0000000..9069bd4 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/NonLearningRule.java @@ -0,0 +1,51 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * The most basic rule: inherit from Rule the ID and list of features. + * + * @author Anh Thu Vu + * + */ +/* + * This branch (Non-learning rule) was created for an old implementation. + * Probably should remove None-Learning and Learning Rule classes, merge Rule + * with LearningRule. + */ +public class NonLearningRule extends Rule { + + /** + * + */ + private static final long serialVersionUID = -1210907339230307784L; + + public NonLearningRule(ActiveRule rule) { + this.nodeList = rule.nodeList; + this.ruleNumberID = rule.ruleNumberID; + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + // do nothing + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/PassiveRule.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/PassiveRule.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/PassiveRule.java new file mode 100644 index 0000000..f5309a7 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/PassiveRule.java @@ -0,0 +1,70 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.LinkedList; + +/** + * PassiveRule is a LearningRule that update its LearningNode with the received new LearningNode. + * + * @author Anh Thu Vu + * + */ +public class PassiveRule extends LearningRule { + + /** + * + */ + private static final long serialVersionUID = -5551571895910530275L; + + private RulePassiveRegressionNode learningNode; + + /* + * Constructor to turn an ActiveRule into a PassiveRule + */ + public PassiveRule(ActiveRule rule) { + this.nodeList = new LinkedList<>(); + for (RuleSplitNode node : rule.nodeList) { + this.nodeList.add(node.getACopy()); + } + + this.learningNode = new RulePassiveRegressionNode(rule.getLearningNode()); + this.ruleNumberID = rule.ruleNumberID; + } + + @Override + public RuleRegressionNode getLearningNode() { + return this.learningNode; + } + + @Override + public void setLearningNode(RuleRegressionNode learningNode) { + this.learningNode = (RulePassiveRegressionNode) learningNode; + } + + /* + * MOA GUI + */ + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Perceptron.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Perceptron.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Perceptron.java new file mode 100644 index 0000000..1b6c9d2 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Perceptron.java @@ -0,0 +1,487 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.classifiers.AbstractClassifier; +import org.apache.samoa.moa.classifiers.Regressor; +import org.apache.samoa.moa.core.DoubleVector; +import org.apache.samoa.moa.core.Measurement; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.Serializer; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; + +/** + * Prediction scheme using Perceptron: Predictions are computed according to a linear function of the attributes. + * + * @author Anh Thu Vu + * + */ +public class Perceptron extends AbstractClassifier implements Regressor { + + private final double SD_THRESHOLD = 0.0000001; // THRESHOLD for normalizing attribute and target values + + private static final long serialVersionUID = 1L; + + // public FlagOption constantLearningRatioDecayOption = new FlagOption( + // "learningRatio_Decay_set_constant", 'd', + // "Learning Ratio Decay in Perceptron set to be constant. (The next parameter)."); + // + // public FloatOption learningRatioOption = new FloatOption( + // "learningRatio", 'l', + // "Constante Learning Ratio to use for training the Perceptrons in the leaves.", + // 0.01); + // + // public FloatOption learningRateDecayOption = new FloatOption( + // "learningRateDecay", 'm', + // " Learning Rate decay to use for training the Perceptron.", 0.001); + // + // public FloatOption fadingFactorOption = new FloatOption( + // "fadingFactor", 'e', + // "Fading factor for the Perceptron accumulated error", 0.99, 0, 1); + + protected boolean constantLearningRatioDecay; + protected double originalLearningRatio; + + private double nError; + protected double fadingFactor = 0.99; + private double learningRatio; + protected double learningRateDecay = 0.001; + + // The Perception weights + protected double[] weightAttribute; + + // Statistics used for error calculations + public DoubleVector perceptronattributeStatistics = new DoubleVector(); + public DoubleVector squaredperceptronattributeStatistics = new DoubleVector(); + + // The number of instances contributing to this model + protected int perceptronInstancesSeen; + protected int perceptronYSeen; + + protected double accumulatedError; + + // If the model (weights) should be reset or not + protected boolean initialisePerceptron; + + protected double perceptronsumY; + protected double squaredperceptronsumY; + + public Perceptron() { + this.initialisePerceptron = true; + } + + /* + * Perceptron + */ + public Perceptron(Perceptron p) { + this(p, false); + } + + public Perceptron(Perceptron p, boolean copyAccumulatedError) { + super(); + // this.constantLearningRatioDecayOption = + // p.constantLearningRatioDecayOption; + // this.learningRatioOption = p.learningRatioOption; + // this.learningRateDecayOption=p.learningRateDecayOption; + // this.fadingFactorOption = p.fadingFactorOption; + this.constantLearningRatioDecay = p.constantLearningRatioDecay; + this.originalLearningRatio = p.originalLearningRatio; + if (copyAccumulatedError) + this.accumulatedError = p.accumulatedError; + this.nError = p.nError; + this.fadingFactor = p.fadingFactor; + this.learningRatio = p.learningRatio; + this.learningRateDecay = p.learningRateDecay; + if (p.weightAttribute != null) + this.weightAttribute = p.weightAttribute.clone(); + + this.perceptronattributeStatistics = new DoubleVector(p.perceptronattributeStatistics); + this.squaredperceptronattributeStatistics = new DoubleVector(p.squaredperceptronattributeStatistics); + this.perceptronInstancesSeen = p.perceptronInstancesSeen; + + this.initialisePerceptron = p.initialisePerceptron; + this.perceptronsumY = p.perceptronsumY; + this.squaredperceptronsumY = p.squaredperceptronsumY; + this.perceptronYSeen = p.perceptronYSeen; + } + + public Perceptron(PerceptronData p) { + super(); + this.constantLearningRatioDecay = p.constantLearningRatioDecay; + this.originalLearningRatio = p.originalLearningRatio; + this.nError = p.nError; + this.fadingFactor = p.fadingFactor; + this.learningRatio = p.learningRatio; + this.learningRateDecay = p.learningRateDecay; + if (p.weightAttribute != null) + this.weightAttribute = p.weightAttribute.clone(); + + this.perceptronattributeStatistics = new DoubleVector(p.perceptronattributeStatistics); + this.squaredperceptronattributeStatistics = new DoubleVector(p.squaredperceptronattributeStatistics); + this.perceptronInstancesSeen = p.perceptronInstancesSeen; + + this.initialisePerceptron = p.initialisePerceptron; + this.perceptronsumY = p.perceptronsumY; + this.squaredperceptronsumY = p.squaredperceptronsumY; + this.perceptronYSeen = p.perceptronYSeen; + this.accumulatedError = p.accumulatedError; + } + + // private void printPerceptron() { + // System.out.println("Learning Ratio:"+this.learningRatio+" ("+this.originalLearningRatio+")"); + // System.out.println("Constant Learning Ratio Decay:"+this.constantLearningRatioDecay+" ("+this.learningRateDecay+")"); + // System.out.println("Error:"+this.accumulatedError+"/"+this.nError); + // System.out.println("Fading factor:"+this.fadingFactor); + // System.out.println("Perceptron Y:"+this.perceptronsumY+"/"+this.squaredperceptronsumY+"/"+this.perceptronYSeen); + // } + + /* + * Weights + */ + public void setWeights(double[] w) { + this.weightAttribute = w; + } + + public double[] getWeights() { + return this.weightAttribute; + } + + /* + * No. of instances seen + */ + public int getInstancesSeen() { + return perceptronInstancesSeen; + } + + public void setInstancesSeen(int pInstancesSeen) { + this.perceptronInstancesSeen = pInstancesSeen; + } + + /** + * A method to reset the model + */ + public void resetLearningImpl() { + this.initialisePerceptron = true; + this.reset(); + } + + public void reset() { + this.nError = 0.0; + this.accumulatedError = 0.0; + this.perceptronInstancesSeen = 0; + this.perceptronattributeStatistics = new DoubleVector(); + this.squaredperceptronattributeStatistics = new DoubleVector(); + this.perceptronsumY = 0.0; + this.squaredperceptronsumY = 0.0; + this.perceptronYSeen = 0; + } + + public void resetError() { + this.nError = 0.0; + this.accumulatedError = 0.0; + } + + /** + * Update the model using the provided instance + */ + public void trainOnInstanceImpl(Instance inst) { + accumulatedError = Math.abs(this.prediction(inst) - inst.classValue()) + fadingFactor * accumulatedError; + nError = 1 + fadingFactor * nError; + // Initialise Perceptron if necessary + if (this.initialisePerceptron) { + // this.fadingFactor=this.fadingFactorOption.getValue(); + // this.classifierRandom.setSeed(randomSeedOption.getValue()); + this.classifierRandom.setSeed(randomSeed); + this.initialisePerceptron = false; // not in resetLearningImpl() because it needs Instance! + this.weightAttribute = new double[inst.numAttributes()]; + for (int j = 0; j < inst.numAttributes(); j++) { + weightAttribute[j] = 2 * this.classifierRandom.nextDouble() - 1; + } + // Update Learning Rate + learningRatio = originalLearningRatio; + // this.learningRateDecay = learningRateDecayOption.getValue(); + + } + + // Update attribute statistics + this.perceptronInstancesSeen++; + this.perceptronYSeen++; + + for (int j = 0; j < inst.numAttributes() - 1; j++) + { + perceptronattributeStatistics.addToValue(j, inst.value(j)); + squaredperceptronattributeStatistics.addToValue(j, inst.value(j) * inst.value(j)); + } + this.perceptronsumY += inst.classValue(); + this.squaredperceptronsumY += inst.classValue() * inst.classValue(); + + if (!constantLearningRatioDecay) { + learningRatio = originalLearningRatio / (1 + perceptronInstancesSeen * learningRateDecay); + } + + this.updateWeights(inst, learningRatio); + // this.printPerceptron(); + } + + /** + * Output the prediction made by this perceptron on the given instance + */ + private double prediction(Instance inst) + { + double[] normalizedInstance = normalizedInstance(inst); + double normalizedPrediction = prediction(normalizedInstance); + return denormalizedPrediction(normalizedPrediction); + } + + public double normalizedPrediction(Instance inst) + { + double[] normalizedInstance = normalizedInstance(inst); + return prediction(normalizedInstance); + } + + private double denormalizedPrediction(double normalizedPrediction) { + if (!this.initialisePerceptron) { + double meanY = perceptronsumY / perceptronYSeen; + double sdY = computeSD(squaredperceptronsumY, perceptronsumY, perceptronYSeen); + if (sdY > SD_THRESHOLD) + return normalizedPrediction * sdY + meanY; + else + return normalizedPrediction + meanY; + } + else + return normalizedPrediction; // Perceptron may have been "reseted". Use old weights to predict + + } + + public double prediction(double[] instanceValues) + { + double prediction = 0.0; + if (!this.initialisePerceptron) + { + for (int j = 0; j < instanceValues.length - 1; j++) { + prediction += this.weightAttribute[j] * instanceValues[j]; + } + prediction += this.weightAttribute[instanceValues.length - 1]; + } + return prediction; + } + + public double[] normalizedInstance(Instance inst) { + // Normalize Instance + double[] normalizedInstance = new double[inst.numAttributes()]; + for (int j = 0; j < inst.numAttributes() - 1; j++) { + int instAttIndex = modelAttIndexToInstanceAttIndex(j); + double mean = perceptronattributeStatistics.getValue(j) / perceptronYSeen; + double sd = computeSD(squaredperceptronattributeStatistics.getValue(j), + perceptronattributeStatistics.getValue(j), perceptronYSeen); + if (sd > SD_THRESHOLD) + normalizedInstance[j] = (inst.value(instAttIndex) - mean) / sd; + else + normalizedInstance[j] = inst.value(instAttIndex) - mean; + } + return normalizedInstance; + } + + public double computeSD(double squaredVal, double val, int size) { + if (size > 1) { + return Math.sqrt((squaredVal - ((val * val) / size)) / (size - 1.0)); + } + return 0.0; + } + + public double updateWeights(Instance inst, double learningRatio) { + // Normalize Instance + double[] normalizedInstance = normalizedInstance(inst); + // Compute the Normalized Prediction of Perceptron + double normalizedPredict = prediction(normalizedInstance); + double normalizedY = normalizeActualClassValue(inst); + double sumWeights = 0.0; + double delta = normalizedY - normalizedPredict; + + for (int j = 0; j < inst.numAttributes() - 1; j++) { + int instAttIndex = modelAttIndexToInstanceAttIndex(j); + if (inst.attribute(instAttIndex).isNumeric()) { + this.weightAttribute[j] += learningRatio * delta * normalizedInstance[j]; + sumWeights += Math.abs(this.weightAttribute[j]); + } + } + this.weightAttribute[inst.numAttributes() - 1] += learningRatio * delta; + sumWeights += Math.abs(this.weightAttribute[inst.numAttributes() - 1]); + if (sumWeights > inst.numAttributes()) { // Lasso regression + for (int j = 0; j < inst.numAttributes() - 1; j++) { + int instAttIndex = modelAttIndexToInstanceAttIndex(j); + if (inst.attribute(instAttIndex).isNumeric()) { + this.weightAttribute[j] = this.weightAttribute[j] / sumWeights; + } + } + this.weightAttribute[inst.numAttributes() - 1] = this.weightAttribute[inst.numAttributes() - 1] / sumWeights; + } + + return denormalizedPrediction(normalizedPredict); + } + + public void normalizeWeights() { + double sumWeights = 0.0; + + for (double aWeightAttribute : this.weightAttribute) { + sumWeights += Math.abs(aWeightAttribute); + } + for (int j = 0; j < this.weightAttribute.length; j++) { + this.weightAttribute[j] = this.weightAttribute[j] / sumWeights; + } + } + + private double normalizeActualClassValue(Instance inst) { + double meanY = perceptronsumY / perceptronYSeen; + double sdY = computeSD(squaredperceptronsumY, perceptronsumY, perceptronYSeen); + + double normalizedY; + if (sdY > SD_THRESHOLD) { + normalizedY = (inst.classValue() - meanY) / sdY; + } else { + normalizedY = inst.classValue() - meanY; + } + return normalizedY; + } + + @Override + public boolean isRandomizable() { + return true; + } + + @Override + public double[] getVotesForInstance(Instance inst) { + return new double[] { this.prediction(inst) }; + } + + @Override + protected Measurement[] getModelMeasurementsImpl() { + return null; + } + + @Override + public void getModelDescription(StringBuilder out, int indent) { + if (this.weightAttribute != null) { + for (int i = 0; i < this.weightAttribute.length - 1; ++i) + { + if (this.weightAttribute[i] >= 0 && i > 0) + out.append(" +" + Math.round(this.weightAttribute[i] * 1000) / 1000.0 + " X" + i); + else + out.append(" " + Math.round(this.weightAttribute[i] * 1000) / 1000.0 + " X" + i); + } + if (this.weightAttribute[this.weightAttribute.length - 1] >= 0) + out.append(" +" + Math.round(this.weightAttribute[this.weightAttribute.length - 1] * 1000) / 1000.0); + else + out.append(" " + Math.round(this.weightAttribute[this.weightAttribute.length - 1] * 1000) / 1000.0); + } + } + + public void setLearningRatio(double learningRatio) { + this.learningRatio = learningRatio; + + } + + public double getCurrentError() + { + if (nError > 0) + return accumulatedError / nError; + else + return Double.MAX_VALUE; + } + + public static class PerceptronData implements Serializable { + /** + * + */ + private static final long serialVersionUID = 6727623208744105082L; + + private boolean constantLearningRatioDecay; + // If the model (weights) should be reset or not + private boolean initialisePerceptron; + + private double nError; + private double fadingFactor; + private double originalLearningRatio; + private double learningRatio; + private double learningRateDecay; + private double accumulatedError; + private double perceptronsumY; + private double squaredperceptronsumY; + + // The Perception weights + private double[] weightAttribute; + + // Statistics used for error calculations + private DoubleVector perceptronattributeStatistics; + private DoubleVector squaredperceptronattributeStatistics; + + // The number of instances contributing to this model + private int perceptronInstancesSeen; + private int perceptronYSeen; + + public PerceptronData() { + + } + + public PerceptronData(Perceptron p) { + this.constantLearningRatioDecay = p.constantLearningRatioDecay; + this.initialisePerceptron = p.initialisePerceptron; + this.nError = p.nError; + this.fadingFactor = p.fadingFactor; + this.originalLearningRatio = p.originalLearningRatio; + this.learningRatio = p.learningRatio; + this.learningRateDecay = p.learningRateDecay; + this.accumulatedError = p.accumulatedError; + this.perceptronsumY = p.perceptronsumY; + this.squaredperceptronsumY = p.squaredperceptronsumY; + this.weightAttribute = p.weightAttribute; + this.perceptronattributeStatistics = p.perceptronattributeStatistics; + this.squaredperceptronattributeStatistics = p.squaredperceptronattributeStatistics; + this.perceptronInstancesSeen = p.perceptronInstancesSeen; + this.perceptronYSeen = p.perceptronYSeen; + } + + public Perceptron build() { + return new Perceptron(this); + } + + } + + public static final class PerceptronSerializer extends Serializer<Perceptron> { + + @Override + public void write(Kryo kryo, Output output, Perceptron p) { + kryo.writeObjectOrNull(output, new PerceptronData(p), PerceptronData.class); + } + + @Override + public Perceptron read(Kryo kryo, Input input, Class<Perceptron> type) { + PerceptronData perceptronData = kryo.readObjectOrNull(input, PerceptronData.class); + return perceptronData.build(); + } + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Rule.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Rule.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Rule.java new file mode 100644 index 0000000..971f00f --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/Rule.java @@ -0,0 +1,111 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.LinkedList; +import java.util.List; + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.AbstractMOAObject; +import org.apache.samoa.moa.classifiers.rules.core.conditionaltests.NumericAttributeBinaryRulePredicate; + +/** + * The base class for "rule". Represents the most basic rule with and ID and a list of features (nodeList). + * + * @author Anh Thu Vu + * + */ +public abstract class Rule extends AbstractMOAObject { + private static final long serialVersionUID = 1L; + + protected int ruleNumberID; + + protected List<RuleSplitNode> nodeList; + + /* + * Constructor + */ + public Rule() { + this.nodeList = new LinkedList<RuleSplitNode>(); + } + + /* + * Rule ID + */ + public int getRuleNumberID() { + return ruleNumberID; + } + + public void setRuleNumberID(int ruleNumberID) { + this.ruleNumberID = ruleNumberID; + } + + /* + * RuleSplitNode list + */ + public List<RuleSplitNode> getNodeList() { + return nodeList; + } + + public void setNodeList(List<RuleSplitNode> nodeList) { + this.nodeList = nodeList; + } + + /* + * Covering + */ + public boolean isCovering(Instance inst) { + boolean isCovering = true; + for (RuleSplitNode node : nodeList) { + if (node.evaluate(inst) == false) { + isCovering = false; + break; + } + } + return isCovering; + } + + /* + * Add RuleSplitNode + */ + public boolean nodeListAdd(RuleSplitNode ruleSplitNode) { + // Check that the node is not already in the list + boolean isIncludedInNodeList = false; + boolean isUpdated = false; + for (RuleSplitNode node : nodeList) { + NumericAttributeBinaryRulePredicate nodeTest = (NumericAttributeBinaryRulePredicate) node.getSplitTest(); + NumericAttributeBinaryRulePredicate ruleSplitNodeTest = (NumericAttributeBinaryRulePredicate) ruleSplitNode + .getSplitTest(); + if (nodeTest.isUsingSameAttribute(ruleSplitNodeTest)) { + isIncludedInNodeList = true; + if (nodeTest.isIncludedInRuleNode(ruleSplitNodeTest) == true) { // remove this line to keep the most recent attribute value + // replace the value + nodeTest.setAttributeValue(ruleSplitNodeTest); + isUpdated = true; // if is updated (i.e. an expansion happened) a new learning node should be created + } + } + } + if (isIncludedInNodeList == false) { + this.nodeList.add(ruleSplitNode); + } + return (!isIncludedInNodeList || isUpdated); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveLearningNode.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveLearningNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveLearningNode.java new file mode 100644 index 0000000..f934c17 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveLearningNode.java @@ -0,0 +1,33 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Interface for Rule's LearningNode that updates both statistics for expanding rule and computing predictions. + * + * @author Anh Thu Vu + * + */ +public interface RuleActiveLearningNode extends RulePassiveLearningNode { + + public boolean tryToExpand(double splitConfidence, double tieThreshold); + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveRegressionNode.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveRegressionNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveRegressionNode.java new file mode 100644 index 0000000..11df518 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleActiveRegressionNode.java @@ -0,0 +1,331 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion; +import org.apache.samoa.moa.classifiers.core.attributeclassobservers.AttributeClassObserver; +import org.apache.samoa.moa.classifiers.core.attributeclassobservers.FIMTDDNumericAttributeClassObserver; +import org.apache.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; +import org.apache.samoa.moa.classifiers.rules.core.attributeclassobservers.FIMTDDNumericAttributeClassLimitObserver; +import org.apache.samoa.moa.classifiers.rules.core.splitcriteria.SDRSplitCriterionAMRules; +import org.apache.samoa.moa.classifiers.rules.driftdetection.PageHinkleyFading; +import org.apache.samoa.moa.classifiers.rules.driftdetection.PageHinkleyTest; +import org.apache.samoa.moa.core.AutoExpandVector; +import org.apache.samoa.moa.core.DoubleVector; + +/** + * LearningNode for regression rule that updates both statistics for expanding rule and computing predictions. + * + * @author Anh Thu Vu + * + */ +public class RuleActiveRegressionNode extends RuleRegressionNode implements RuleActiveLearningNode { + + /** + * + */ + private static final long serialVersionUID = 519854943188168546L; + + protected int splitIndex = 0; + + protected PageHinkleyTest pageHinckleyTest; + protected boolean changeDetection; + + protected double[] statisticsNewRuleActiveLearningNode = null; + protected double[] statisticsBranchSplit = null; + protected double[] statisticsOtherBranchSplit; + + protected AttributeSplitSuggestion bestSuggestion = null; + + protected AutoExpandVector<AttributeClassObserver> attributeObservers = new AutoExpandVector<>(); + private FIMTDDNumericAttributeClassLimitObserver numericObserver; + + /* + * Simple setters & getters + */ + public int getSplitIndex() { + return splitIndex; + } + + public void setSplitIndex(int splitIndex) { + this.splitIndex = splitIndex; + } + + public double[] getStatisticsOtherBranchSplit() { + return statisticsOtherBranchSplit; + } + + public void setStatisticsOtherBranchSplit(double[] statisticsOtherBranchSplit) { + this.statisticsOtherBranchSplit = statisticsOtherBranchSplit; + } + + public double[] getStatisticsBranchSplit() { + return statisticsBranchSplit; + } + + public void setStatisticsBranchSplit(double[] statisticsBranchSplit) { + this.statisticsBranchSplit = statisticsBranchSplit; + } + + public double[] getStatisticsNewRuleActiveLearningNode() { + return statisticsNewRuleActiveLearningNode; + } + + public void setStatisticsNewRuleActiveLearningNode( + double[] statisticsNewRuleActiveLearningNode) { + this.statisticsNewRuleActiveLearningNode = statisticsNewRuleActiveLearningNode; + } + + public AttributeSplitSuggestion getBestSuggestion() { + return bestSuggestion; + } + + public void setBestSuggestion(AttributeSplitSuggestion bestSuggestion) { + this.bestSuggestion = bestSuggestion; + } + + /* + * Constructor with builder + */ + public RuleActiveRegressionNode() { + super(); + } + + public RuleActiveRegressionNode(ActiveRule.Builder builder) { + super(builder.statistics); + this.changeDetection = builder.changeDetection; + if (!builder.changeDetection) { + this.pageHinckleyTest = new PageHinkleyFading(builder.threshold, builder.alpha); + } + this.predictionFunction = builder.predictionFunction; + this.learningRatio = builder.learningRatio; + this.ruleNumberID = builder.id; + this.numericObserver = builder.numericObserver; + + this.perceptron = new Perceptron(); + this.perceptron.prepareForUse(); + this.perceptron.originalLearningRatio = builder.learningRatio; + this.perceptron.constantLearningRatioDecay = builder.constantLearningRatioDecay; + + if (this.predictionFunction != 1) + { + this.targetMean = new TargetMean(); + if (builder.statistics[0] > 0) + this.targetMean.reset(builder.statistics[1] / builder.statistics[0], (long) builder.statistics[0]); + } + this.predictionFunction = builder.predictionFunction; + if (builder.statistics != null) + this.nodeStatistics = new DoubleVector(builder.statistics); + } + + /* + * Update with input instance + */ + public boolean updatePageHinckleyTest(double error) { + boolean changeDetected = false; + if (!this.changeDetection) { + changeDetected = pageHinckleyTest.update(error); + } + return changeDetected; + } + + public boolean updateChangeDetection(double error) { + return !changeDetection && pageHinckleyTest.update(error); + } + + @Override + public void updateStatistics(Instance inst) { + // Update the statistics for this node + // number of instances passing through the node + nodeStatistics.addToValue(0, 1); + // sum of y values + nodeStatistics.addToValue(1, inst.classValue()); + // sum of squared y values + nodeStatistics.addToValue(2, inst.classValue() * inst.classValue()); + + for (int i = 0; i < inst.numAttributes() - 1; i++) { + int instAttIndex = modelAttIndexToInstanceAttIndex(i, inst); + + AttributeClassObserver obs = this.attributeObservers.get(i); + if (obs == null) { + // At this stage all nominal attributes are ignored + if (inst.attribute(instAttIndex).isNumeric()) // instAttIndex + { + obs = newNumericClassObserver(); + this.attributeObservers.set(i, obs); + } + } + if (obs != null) { + ((FIMTDDNumericAttributeClassObserver) obs).observeAttributeClass(inst.value(instAttIndex), inst.classValue(), + inst.weight()); + } + } + + this.perceptron.trainOnInstance(inst); + if (this.predictionFunction != 1) { // Train target mean if prediction function is not Perceptron + this.targetMean.trainOnInstance(inst); + } + } + + protected AttributeClassObserver newNumericClassObserver() { + // return new FIMTDDNumericAttributeClassObserver(); + // return new FIMTDDNumericAttributeClassLimitObserver(); + // return + // (AttributeClassObserver)((AttributeClassObserver)this.numericObserverOption.getPreMaterializedObject()).copy(); + FIMTDDNumericAttributeClassLimitObserver newObserver = new FIMTDDNumericAttributeClassLimitObserver(); + newObserver.setMaxNodes(numericObserver.getMaxNodes()); + return newObserver; + } + + /* + * Init after being split from oldLearningNode + */ + public void initialize(RuleRegressionNode oldLearningNode) { + if (oldLearningNode.perceptron != null) + { + this.perceptron = new Perceptron(oldLearningNode.perceptron); + this.perceptron.resetError(); + this.perceptron.setLearningRatio(oldLearningNode.learningRatio); + } + + if (oldLearningNode.targetMean != null) + { + this.targetMean = new TargetMean(oldLearningNode.targetMean); + this.targetMean.resetError(); + } + // reset statistics + this.nodeStatistics.setValue(0, 0); + this.nodeStatistics.setValue(1, 0); + this.nodeStatistics.setValue(2, 0); + } + + /* + * Expand + */ + @Override + public boolean tryToExpand(double splitConfidence, double tieThreshold) { + + // splitConfidence. Hoeffding Bound test parameter. + // tieThreshold. Hoeffding Bound test parameter. + SplitCriterion splitCriterion = new SDRSplitCriterionAMRules(); + // SplitCriterion splitCriterion = new SDRSplitCriterionAMRulesNode();//JD + // for assessing only best branch + + // Using this criterion, find the best split per attribute and rank the + // results + AttributeSplitSuggestion[] bestSplitSuggestions = this.getBestSplitSuggestions(splitCriterion); + Arrays.sort(bestSplitSuggestions); + // Declare a variable to determine if any of the splits should be performed + boolean shouldSplit = false; + + // If only one split was returned, use it + if (bestSplitSuggestions.length < 2) { + shouldSplit = ((bestSplitSuggestions.length > 0) && (bestSplitSuggestions[0].merit > 0)); + bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1]; + } // Otherwise, consider which of the splits proposed may be worth trying + else { + // Determine the hoeffding bound value, used to select how many instances + // should be used to make a test decision + // to feel reasonably confident that the test chosen by this sample is the + // same as what would be chosen using infinite examples + double hoeffdingBound = computeHoeffdingBound(1, splitConfidence, getInstancesSeen()); + // Determine the top two ranked splitting suggestions + bestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 1]; + AttributeSplitSuggestion secondBestSuggestion = bestSplitSuggestions[bestSplitSuggestions.length - 2]; + + // If the upper bound of the sample mean for the ratio of SDR(best + // suggestion) to SDR(second best suggestion), + // as determined using the hoeffding bound, is less than 1, then the true + // mean is also less than 1, and thus at this + // particular moment of observation the bestSuggestion is indeed the best + // split option with confidence 1-delta, and + // splitting should occur. + // Alternatively, if two or more splits are very similar or identical in + // terms of their splits, then a threshold limit + // (default 0.05) is applied to the hoeffding bound; if the hoeffding + // bound is smaller than this limit then the two + // competing attributes are equally good, and the split will be made on + // the one with the higher SDR value. + + if (bestSuggestion.merit > 0) { + if ((((secondBestSuggestion.merit / bestSuggestion.merit) + hoeffdingBound) < 1) + || (hoeffdingBound < tieThreshold)) { + shouldSplit = true; + } + } + } + + if (shouldSplit) { + AttributeSplitSuggestion splitDecision = bestSplitSuggestions[bestSplitSuggestions.length - 1]; + double minValue = Double.MAX_VALUE; + double[] branchMerits = SDRSplitCriterionAMRules + .computeBranchSplitMerits(bestSuggestion.resultingClassDistributions); + + for (int i = 0; i < bestSuggestion.numSplits(); i++) { + double value = branchMerits[i]; + if (value < minValue) { + minValue = value; + splitIndex = i; + statisticsNewRuleActiveLearningNode = bestSuggestion.resultingClassDistributionFromSplit(i); + } + } + statisticsBranchSplit = splitDecision.resultingClassDistributionFromSplit(splitIndex); + statisticsOtherBranchSplit = bestSuggestion.resultingClassDistributionFromSplit(splitIndex == 0 ? 1 : 0); + + } + return shouldSplit; + } + + public AutoExpandVector<AttributeClassObserver> getAttributeObservers() { + return this.attributeObservers; + } + + public AttributeSplitSuggestion[] getBestSplitSuggestions(SplitCriterion criterion) { + + List<AttributeSplitSuggestion> bestSuggestions = new LinkedList<AttributeSplitSuggestion>(); + + // Set the nodeStatistics up as the preSplitDistribution, rather than the + // observedClassDistribution + double[] nodeSplitDist = this.nodeStatistics.getArrayCopy(); + for (int i = 0; i < this.attributeObservers.size(); i++) { + AttributeClassObserver obs = this.attributeObservers.get(i); + if (obs != null) { + + // AT THIS STAGE NON-NUMERIC ATTRIBUTES ARE IGNORED + AttributeSplitSuggestion bestSuggestion = null; + if (obs instanceof FIMTDDNumericAttributeClassObserver) { + bestSuggestion = obs.getBestEvaluatedSplitSuggestion(criterion, nodeSplitDist, i, true); + } + + if (bestSuggestion != null) { + bestSuggestions.add(bestSuggestion); + } + } + } + return bestSuggestions.toArray(new AttributeSplitSuggestion[bestSuggestions.size()]); + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveLearningNode.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveLearningNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveLearningNode.java new file mode 100644 index 0000000..c956fb6 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveLearningNode.java @@ -0,0 +1,32 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +/** + * Interface for Rule's LearningNode that does not update statistics for expanding rule. It only updates statistics for + * computing predictions. + * + * @author Anh Thu Vu + * + */ +public interface RulePassiveLearningNode { + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveRegressionNode.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveRegressionNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveRegressionNode.java new file mode 100644 index 0000000..25d634e --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RulePassiveRegressionNode.java @@ -0,0 +1,75 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.core.DoubleVector; + +/** + * LearningNode for regression rule that does not update statistics for expanding rule. It only updates statistics for + * computing predictions. + * + * @author Anh Thu Vu + * + */ +public class RulePassiveRegressionNode extends RuleRegressionNode implements RulePassiveLearningNode { + + /** + * + */ + private static final long serialVersionUID = 3720878438856489690L; + + public RulePassiveRegressionNode(double[] statistics) { + super(statistics); + } + + public RulePassiveRegressionNode() { + super(); + } + + public RulePassiveRegressionNode(RuleRegressionNode activeLearningNode) { + this.predictionFunction = activeLearningNode.predictionFunction; + this.ruleNumberID = activeLearningNode.ruleNumberID; + this.nodeStatistics = new DoubleVector(activeLearningNode.nodeStatistics); + this.learningRatio = activeLearningNode.learningRatio; + this.perceptron = new Perceptron(activeLearningNode.perceptron, true); + this.targetMean = new TargetMean(activeLearningNode.targetMean); + } + + /* + * Update with input instance + */ + @Override + public void updateStatistics(Instance inst) { + // Update the statistics for this node + // number of instances passing through the node + nodeStatistics.addToValue(0, 1); + // sum of y values + nodeStatistics.addToValue(1, inst.classValue()); + // sum of squared y values + nodeStatistics.addToValue(2, inst.classValue() * inst.classValue()); + + this.perceptron.trainOnInstance(inst); + if (this.predictionFunction != 1) { // Train target mean if prediction function is not Perceptron + this.targetMean.trainOnInstance(inst); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleRegressionNode.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleRegressionNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleRegressionNode.java new file mode 100644 index 0000000..512ebab --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleRegressionNode.java @@ -0,0 +1,294 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.Serializable; + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.moa.core.DoubleVector; + +/** + * The base class for LearningNode for regression rule. + * + * @author Anh Thu Vu + * + */ +public abstract class RuleRegressionNode implements Serializable { + + private static final long serialVersionUID = 9129659494380381126L; + + protected int predictionFunction; + protected int ruleNumberID; + // The statistics for this node: + // Number of instances that have reached it + // Sum of y values + // Sum of squared y values + protected DoubleVector nodeStatistics; + + protected Perceptron perceptron; + protected TargetMean targetMean; + protected double learningRatio; + + /* + * Simple setters & getters + */ + public Perceptron getPerceptron() { + return perceptron; + } + + public void setPerceptron(Perceptron perceptron) { + this.perceptron = perceptron; + } + + public TargetMean getTargetMean() { + return targetMean; + } + + public void setTargetMean(TargetMean targetMean) { + this.targetMean = targetMean; + } + + /* + * Create a new RuleRegressionNode + */ + public RuleRegressionNode(double[] initialClassObservations) { + this.nodeStatistics = new DoubleVector(initialClassObservations); + } + + public RuleRegressionNode() { + this(new double[0]); + } + + /* + * Update statistics with input instance + */ + public abstract void updateStatistics(Instance instance); + + /* + * Predictions + */ + public double[] getPrediction(Instance instance) { + int predictionMode = this.getLearnerToUse(this.predictionFunction); + return getPrediction(instance, predictionMode); + } + + public double[] getSimplePrediction() { + if (this.targetMean != null) + return this.targetMean.getVotesForInstance(); + else + return new double[] { 0 }; + } + + public double[] getPrediction(Instance instance, int predictionMode) { + double[] ret; + if (predictionMode == 1) + ret = this.perceptron.getVotesForInstance(instance); + else + ret = this.targetMean.getVotesForInstance(instance); + return ret; + } + + public double getNormalizedPrediction(Instance instance) { + double res; + double[] aux; + switch (this.predictionFunction) { + // perceptron - 1 + case 1: + res = this.perceptron.normalizedPrediction(instance); + break; + // target mean - 2 + case 2: + aux = this.targetMean.getVotesForInstance(); + res = normalize(aux[0]); + break; + // adaptive - 0 + case 0: + int predictionMode = this.getLearnerToUse(0); + if (predictionMode == 1) + { + res = this.perceptron.normalizedPrediction(instance); + } + else { + aux = this.targetMean.getVotesForInstance(instance); + res = normalize(aux[0]); + } + break; + default: + throw new UnsupportedOperationException("Prediction mode not in range."); + } + return res; + } + + /* + * Get learner mode + */ + public int getLearnerToUse(int predMode) { + int predictionMode = predMode; + if (predictionMode == 0) { + double perceptronError = this.perceptron.getCurrentError(); + double meanTargetError = this.targetMean.getCurrentError(); + if (perceptronError < meanTargetError) + predictionMode = 1; // PERCEPTRON + else + predictionMode = 2; // TARGET MEAN + } + return predictionMode; + } + + /* + * Error and change detection + */ + public double computeError(Instance instance) { + double normalizedPrediction = getNormalizedPrediction(instance); + double normalizedClassValue = normalize(instance.classValue()); + return Math.abs(normalizedClassValue - normalizedPrediction); + } + + public double getCurrentError() { + double error; + if (this.perceptron != null) { + if (targetMean == null) + error = perceptron.getCurrentError(); + else { + double errorP = perceptron.getCurrentError(); + double errorTM = targetMean.getCurrentError(); + error = (errorP < errorTM) ? errorP : errorTM; + } + } + else + error = Double.MAX_VALUE; + return error; + } + + /* + * no. of instances seen + */ + public long getInstancesSeen() { + if (nodeStatistics != null) { + return (long) this.nodeStatistics.getValue(0); + } else { + return 0; + } + } + + public DoubleVector getNodeStatistics() { + return this.nodeStatistics; + } + + /* + * Anomaly detection + */ + public boolean isAnomaly(Instance instance, + double uniVariateAnomalyProbabilityThreshold, + double multiVariateAnomalyProbabilityThreshold, + int numberOfInstanceesForAnomaly) { + // AMRUles is equipped with anomaly detection. If on, compute the anomaly + // value. + long perceptronIntancesSeen = this.perceptron.getInstancesSeen(); + if (perceptronIntancesSeen >= numberOfInstanceesForAnomaly) { + double attribSum; + double attribSquaredSum; + double D = 0.0; + double N = 0.0; + double anomaly; + + for (int x = 0; x < instance.numAttributes() - 1; x++) { + // Perceptron is initialized each rule. + // this is a local anomaly. + int instAttIndex = modelAttIndexToInstanceAttIndex(x, instance); + attribSum = this.perceptron.perceptronattributeStatistics.getValue(x); + attribSquaredSum = this.perceptron.squaredperceptronattributeStatistics.getValue(x); + double mean = attribSum / perceptronIntancesSeen; + double sd = computeSD(attribSquaredSum, attribSum, perceptronIntancesSeen); + double probability = computeProbability(mean, sd, instance.value(instAttIndex)); + + if (probability > 0.0) { + D = D + Math.abs(Math.log(probability)); + if (probability < uniVariateAnomalyProbabilityThreshold) {// 0.10 + N = N + Math.abs(Math.log(probability)); + } + } + } + + anomaly = 0.0; + if (D != 0.0) { + anomaly = N / D; + } + if (anomaly >= multiVariateAnomalyProbabilityThreshold) { + // debuganomaly(instance, + // uniVariateAnomalyProbabilityThreshold, + // multiVariateAnomalyProbabilityThreshold, + // anomaly); + return true; + } + } + return false; + } + + /* + * Helpers + */ + public static double computeProbability(double mean, double sd, double value) { + double probability = 0.0; + + if (sd > 0.0) { + double k = (Math.abs(value - mean) / sd); // One tailed variant of Chebyshev's inequality + probability = 1.0 / (1 + k * k); + } + + return probability; + } + + public static double computeHoeffdingBound(double range, double confidence, double n) { + return Math.sqrt(((range * range) * Math.log(1.0 / confidence)) / (2.0 * n)); + } + + private double normalize(double value) { + double meanY = this.nodeStatistics.getValue(1) / this.nodeStatistics.getValue(0); + double sdY = computeSD(this.nodeStatistics.getValue(2), this.nodeStatistics.getValue(1), + (long) this.nodeStatistics.getValue(0)); + double normalizedY = 0.0; + if (sdY > 0.0000001) { + normalizedY = (value - meanY) / (sdY); + } + return normalizedY; + } + + public double computeSD(double squaredVal, double val, long size) { + if (size > 1) { + return Math.sqrt((squaredVal - ((val * val) / size)) / (size - 1.0)); + } + return 0.0; + } + + /** + * Gets the index of the attribute in the instance, given the index of the attribute in the learner. + * + * @param index + * the index of the attribute in the learner + * @param inst + * the instance + * @return the index in the instance + */ + protected static int modelAttIndexToInstanceAttIndex(int index, Instance inst) { + return index <= inst.classIndex() ? index : index + 1; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleSplitNode.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleSplitNode.java b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleSplitNode.java new file mode 100644 index 0000000..afb5b4e --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/learners/classifiers/rules/common/RuleSplitNode.java @@ -0,0 +1,68 @@ +package org.apache.samoa.learners.classifiers.rules.common; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.Instance; +import org.apache.samoa.learners.classifiers.trees.SplitNode; +import org.apache.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalTest; +import org.apache.samoa.moa.classifiers.rules.core.Predicate; +import org.apache.samoa.moa.classifiers.rules.core.conditionaltests.NumericAttributeBinaryRulePredicate; + +/** + * Represent a feature of rules (an element of ruleÅ nodeList). + * + * @author Anh Thu Vu + * + */ +public class RuleSplitNode extends SplitNode { + + protected double lastTargetMean; + protected int operatorObserver; + + private static final long serialVersionUID = 1L; + + public InstanceConditionalTest getSplitTest() { + return this.splitTest; + } + + /** + * Create a new RuleSplitNode + */ + public RuleSplitNode() { + this(null, new double[0]); + } + + public RuleSplitNode(InstanceConditionalTest splitTest, double[] classObservations) { + super(splitTest, classObservations); + } + + public RuleSplitNode getACopy() { + InstanceConditionalTest splitTest = new NumericAttributeBinaryRulePredicate( + (NumericAttributeBinaryRulePredicate) this.getSplitTest()); + return new RuleSplitNode(splitTest, this.getObservedClassDistribution()); + } + + public boolean evaluate(Instance instance) { + Predicate predicate = (Predicate) this.splitTest; + return predicate.evaluate(instance); + } + +}
