http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/RandomRBFGeneratorEvents.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/RandomRBFGeneratorEvents.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/RandomRBFGeneratorEvents.java index a65d9a1..32a7f72 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/RandomRBFGeneratorEvents.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/RandomRBFGeneratorEvents.java @@ -46,879 +46,875 @@ import com.yahoo.labs.samoa.moa.core.FastVector; import com.yahoo.labs.samoa.instances.Instance; import com.yahoo.labs.samoa.instances.Instances; - public class RandomRBFGeneratorEvents extends ClusteringStream { - private transient Vector listeners; - - private static final long serialVersionUID = 1L; - - public IntOption modelRandomSeedOption = new IntOption("modelRandomSeed", - 'm', "Seed for random generation of model.", 1); - - public IntOption instanceRandomSeedOption = new IntOption( - "instanceRandomSeed", 'i', - "Seed for random generation of instances.", 5); - - public IntOption numClusterOption = new IntOption("numCluster", 'K', - "The average number of centroids in the model.", 5, 1, Integer.MAX_VALUE); - - public IntOption numClusterRangeOption = new IntOption("numClusterRange", 'k', - "Deviation of the number of centroids in the model.", 3, 0, Integer.MAX_VALUE); - - public FloatOption kernelRadiiOption = new FloatOption("kernelRadius", 'R', - "The average radii of the centroids in the model.", 0.07, 0, 1); - - public FloatOption kernelRadiiRangeOption = new FloatOption("kernelRadiusRange", 'r', - "Deviation of average radii of the centroids in the model.", 0, 0, 1); - - public FloatOption densityRangeOption = new FloatOption("densityRange", 'd', - "Offset of the average weight a cluster has. Value of 0 means all cluster " + - "contain the same amount of points.", 0, 0, 1); - - public IntOption speedOption = new IntOption("speed", 'V', - "Kernels move a predefined distance of 0.01 every X points", 500, 1, Integer.MAX_VALUE); - - public IntOption speedRangeOption = new IntOption("speedRange", 'v', - "Speed/Velocity point offset", 0, 0, Integer.MAX_VALUE); - - public FloatOption noiseLevelOption = new FloatOption("noiseLevel", 'N', - "Noise level", 0.1, 0, 1); - - public FlagOption noiseInClusterOption = new FlagOption("noiseInCluster", 'n', - "Allow noise to be placed within a cluster"); - - public IntOption eventFrequencyOption = new IntOption("eventFrequency", 'E', - "Event frequency. Enable at least one of the events below and set numClusterRange!", 30000, 0, Integer.MAX_VALUE); - - public FlagOption eventMergeSplitOption = new FlagOption("eventMergeSplitOption", 'M', - "Enable merging and splitting of clusters. Set eventFrequency and numClusterRange!"); - - public FlagOption eventDeleteCreateOption = new FlagOption("eventDeleteCreate", 'C', - "Enable emering and disapperaing of clusters. Set eventFrequency and numClusterRange!"); - - - private double merge_threshold = 0.7; - private int kernelMovePointFrequency = 10; - private double maxDistanceMoveThresholdByStep = 0.01; - private int maxOverlapFitRuns = 50; - private double eventFrequencyRange = 0; - - private boolean debug = false; - - private AutoExpandVector<GeneratorCluster> kernels; - protected Random instanceRandom; - protected InstancesHeader streamHeader; - private int numGeneratedInstances; - private int numActiveKernels; - private int nextEventCounter; - private int nextEventChoice = -1; - private int clusterIdCounter; - private GeneratorCluster mergeClusterA; - private GeneratorCluster mergeClusterB; - private boolean mergeKernelsOverlapping = false; - - - - private class GeneratorCluster implements Serializable{ - //TODO: points is redundant to microclusterpoints, we need to come - //up with a good strategy that microclusters get updated and - //rebuild if needed. Idea: Sort microclusterpoints by timestamp and let - // microclusterdecay hold the timestamp for when the last point in a - //microcluster gets kicked, then we rebuild... or maybe not... could be - //same as searching for point to be kicked. more likely is we rebuild - //fewer times then insert. - - private static final long serialVersionUID = -6301649898961112942L; - - SphereCluster generator; - int kill = -1; - boolean merging = false; - double[] moveVector; - int totalMovementSteps; - int currentMovementSteps; - boolean isSplitting = false; - - LinkedList<DataPoint> points = new LinkedList<DataPoint>(); - ArrayList<SphereCluster> microClusters = new ArrayList<SphereCluster>(); - ArrayList<ArrayList<DataPoint>> microClustersPoints = new ArrayList(); - ArrayList<Integer> microClustersDecay = new ArrayList(); - - - public GeneratorCluster(int label) { - boolean outofbounds = true; - int tryCounter = 0; - while(outofbounds && tryCounter < maxOverlapFitRuns){ - tryCounter++; - outofbounds = false; - double[] center = new double [numAttsOption.getValue()]; - double radius = kernelRadiiOption.getValue()+(instanceRandom.nextBoolean()?-1:1)*kernelRadiiRangeOption.getValue()*instanceRandom.nextDouble(); - while(radius <= 0){ - radius = kernelRadiiOption.getValue()+(instanceRandom.nextBoolean()?-1:1)*kernelRadiiRangeOption.getValue()*instanceRandom.nextDouble(); - } - for (int j = 0; j < numAttsOption.getValue(); j++) { - center[j] = instanceRandom.nextDouble(); - if(center[j]- radius < 0 || center[j] + radius > 1){ - outofbounds = true; - break; - } - } - generator = new SphereCluster(center, radius); - } - if(tryCounter < maxOverlapFitRuns){ - generator.setId(label); - double avgWeight = 1.0/numClusterOption.getValue(); - double weight = avgWeight + (instanceRandom.nextBoolean()?-1:1)*avgWeight*densityRangeOption.getValue()*instanceRandom.nextDouble(); - generator.setWeight(weight); - setDesitnation(null); - } - else{ - generator = null; - kill = 0; - System.out.println("Tried "+maxOverlapFitRuns+" times to create kernel. Reduce average radii." ); - } + private transient Vector listeners; + + private static final long serialVersionUID = 1L; + + public IntOption modelRandomSeedOption = new IntOption("modelRandomSeed", + 'm', "Seed for random generation of model.", 1); + + public IntOption instanceRandomSeedOption = new IntOption( + "instanceRandomSeed", 'i', + "Seed for random generation of instances.", 5); + + public IntOption numClusterOption = new IntOption("numCluster", 'K', + "The average number of centroids in the model.", 5, 1, Integer.MAX_VALUE); + + public IntOption numClusterRangeOption = new IntOption("numClusterRange", 'k', + "Deviation of the number of centroids in the model.", 3, 0, Integer.MAX_VALUE); + + public FloatOption kernelRadiiOption = new FloatOption("kernelRadius", 'R', + "The average radii of the centroids in the model.", 0.07, 0, 1); + + public FloatOption kernelRadiiRangeOption = new FloatOption("kernelRadiusRange", 'r', + "Deviation of average radii of the centroids in the model.", 0, 0, 1); + + public FloatOption densityRangeOption = new FloatOption("densityRange", 'd', + "Offset of the average weight a cluster has. Value of 0 means all cluster " + + "contain the same amount of points.", 0, 0, 1); + + public IntOption speedOption = new IntOption("speed", 'V', + "Kernels move a predefined distance of 0.01 every X points", 500, 1, Integer.MAX_VALUE); + + public IntOption speedRangeOption = new IntOption("speedRange", 'v', + "Speed/Velocity point offset", 0, 0, Integer.MAX_VALUE); + + public FloatOption noiseLevelOption = new FloatOption("noiseLevel", 'N', + "Noise level", 0.1, 0, 1); + + public FlagOption noiseInClusterOption = new FlagOption("noiseInCluster", 'n', + "Allow noise to be placed within a cluster"); + + public IntOption eventFrequencyOption = new IntOption("eventFrequency", 'E', + "Event frequency. Enable at least one of the events below and set numClusterRange!", 30000, 0, Integer.MAX_VALUE); + + public FlagOption eventMergeSplitOption = new FlagOption("eventMergeSplitOption", 'M', + "Enable merging and splitting of clusters. Set eventFrequency and numClusterRange!"); + + public FlagOption eventDeleteCreateOption = new FlagOption("eventDeleteCreate", 'C', + "Enable emering and disapperaing of clusters. Set eventFrequency and numClusterRange!"); + + private double merge_threshold = 0.7; + private int kernelMovePointFrequency = 10; + private double maxDistanceMoveThresholdByStep = 0.01; + private int maxOverlapFitRuns = 50; + private double eventFrequencyRange = 0; + + private boolean debug = false; + + private AutoExpandVector<GeneratorCluster> kernels; + protected Random instanceRandom; + protected InstancesHeader streamHeader; + private int numGeneratedInstances; + private int numActiveKernels; + private int nextEventCounter; + private int nextEventChoice = -1; + private int clusterIdCounter; + private GeneratorCluster mergeClusterA; + private GeneratorCluster mergeClusterB; + private boolean mergeKernelsOverlapping = false; + + private class GeneratorCluster implements Serializable { + // TODO: points is redundant to microclusterpoints, we need to come + // up with a good strategy that microclusters get updated and + // rebuild if needed. Idea: Sort microclusterpoints by timestamp and let + // microclusterdecay hold the timestamp for when the last point in a + // microcluster gets kicked, then we rebuild... or maybe not... could be + // same as searching for point to be kicked. more likely is we rebuild + // fewer times then insert. + + private static final long serialVersionUID = -6301649898961112942L; + + SphereCluster generator; + int kill = -1; + boolean merging = false; + double[] moveVector; + int totalMovementSteps; + int currentMovementSteps; + boolean isSplitting = false; + + LinkedList<DataPoint> points = new LinkedList<DataPoint>(); + ArrayList<SphereCluster> microClusters = new ArrayList<SphereCluster>(); + ArrayList<ArrayList<DataPoint>> microClustersPoints = new ArrayList(); + ArrayList<Integer> microClustersDecay = new ArrayList(); + + public GeneratorCluster(int label) { + boolean outofbounds = true; + int tryCounter = 0; + while (outofbounds && tryCounter < maxOverlapFitRuns) { + tryCounter++; + outofbounds = false; + double[] center = new double[numAttsOption.getValue()]; + double radius = kernelRadiiOption.getValue() + (instanceRandom.nextBoolean() ? -1 : 1) + * kernelRadiiRangeOption.getValue() * instanceRandom.nextDouble(); + while (radius <= 0) { + radius = kernelRadiiOption.getValue() + (instanceRandom.nextBoolean() ? -1 : 1) + * kernelRadiiRangeOption.getValue() * instanceRandom.nextDouble(); + } + for (int j = 0; j < numAttsOption.getValue(); j++) { + center[j] = instanceRandom.nextDouble(); + if (center[j] - radius < 0 || center[j] + radius > 1) { + outofbounds = true; + break; + } } + generator = new SphereCluster(center, radius); + } + if (tryCounter < maxOverlapFitRuns) { + generator.setId(label); + double avgWeight = 1.0 / numClusterOption.getValue(); + double weight = avgWeight + (instanceRandom.nextBoolean() ? -1 : 1) * avgWeight * densityRangeOption.getValue() + * instanceRandom.nextDouble(); + generator.setWeight(weight); + setDesitnation(null); + } + else { + generator = null; + kill = 0; + System.out.println("Tried " + maxOverlapFitRuns + " times to create kernel. Reduce average radii."); + } + } - public GeneratorCluster(int label, SphereCluster cluster) { - this.generator = cluster; - cluster.setId(label); - setDesitnation(null); - } + public GeneratorCluster(int label, SphereCluster cluster) { + this.generator = cluster; + cluster.setId(label); + setDesitnation(null); + } - public int getWorkID(){ - for(int c = 0; c < kernels.size(); c++){ - if(kernels.get(c).equals(this)) - return c; - } - return -1; + public int getWorkID() { + for (int c = 0; c < kernels.size(); c++) { + if (kernels.get(c).equals(this)) + return c; + } + return -1; + } + + private void updateKernel() { + if (kill == 0) { + kernels.remove(this); + } + if (kill > 0) { + kill--; + } + // we could be lot more precise if we would keep track of timestamps of + // points + // then we could remove all old points and rebuild the cluster on up to + // date point base + // BUT worse the effort??? so far we just want to avoid overlap with this, + // so its more + // konservative as needed. Only needs to change when we need a thighter + // representation + for (int m = 0; m < microClusters.size(); m++) { + if (numGeneratedInstances - microClustersDecay.get(m) > decayHorizonOption.getValue()) { + microClusters.remove(m); + microClustersPoints.remove(m); + microClustersDecay.remove(m); } + } - private void updateKernel(){ - if(kill == 0){ - kernels.remove(this); - } - if(kill > 0){ - kill--; - } - //we could be lot more precise if we would keep track of timestamps of points - //then we could remove all old points and rebuild the cluster on up to date point base - //BUT worse the effort??? so far we just want to avoid overlap with this, so its more - //konservative as needed. Only needs to change when we need a thighter representation - for (int m = 0; m < microClusters.size(); m++) { - if(numGeneratedInstances-microClustersDecay.get(m) > decayHorizonOption.getValue()){ - microClusters.remove(m); - microClustersPoints.remove(m); - microClustersDecay.remove(m); - } - } + if (!points.isEmpty() + && numGeneratedInstances - points.getFirst().getTimestamp() >= decayHorizonOption.getValue()) { + // if(debug) + // System.out.println("Cleaning up macro cluster "+generator.getId()); + points.removeFirst(); + } - if(!points.isEmpty() && numGeneratedInstances-points.getFirst().getTimestamp() >= decayHorizonOption.getValue()){ -// if(debug) -// System.out.println("Cleaning up macro cluster "+generator.getId()); - points.removeFirst(); - } + } + private void addInstance(Instance instance) { + DataPoint point = new DataPoint(instance, numGeneratedInstances); + points.add(point); + + int minMicroIndex = -1; + double minHullDist = Double.MAX_VALUE; + boolean inserted = false; + // we favour more recently build clusters so we can remove earlier cluster + // sooner + for (int m = microClusters.size() - 1; m >= 0; m--) { + SphereCluster micro = microClusters.get(m); + double hulldist = micro.getCenterDistance(point) - micro.getRadius(); + // point fits into existing cluster + if (hulldist <= 0) { + microClustersPoints.get(m).add(point); + microClustersDecay.set(m, numGeneratedInstances); + inserted = true; + break; + } + // if not, check if its at least the closest cluster + else { + if (hulldist < minHullDist) { + minMicroIndex = m; + minHullDist = hulldist; + } + } + } + // Reseting index choice for alternative cluster building + int alt = 1; + if (alt == 1) + minMicroIndex = -1; + if (!inserted) { + // add to closest cluster and expand cluster + if (minMicroIndex != -1) { + microClustersPoints.get(minMicroIndex).add(point); + // we should keep the miniball instances and just check in + // new points instead of rebuilding the whole thing + SphereCluster s = new SphereCluster(microClustersPoints.get(minMicroIndex), numAttsOption.getValue()); + // check if current microcluster is bigger then generating cluster + if (s.getRadius() > generator.getRadius()) { + // remove previously added point + microClustersPoints.get(minMicroIndex).remove(microClustersPoints.get(minMicroIndex).size() - 1); + minMicroIndex = -1; + } + else { + microClusters.set(minMicroIndex, s); + microClustersDecay.set(minMicroIndex, numGeneratedInstances); + } + } + // minMicroIndex might have been reset above + // create new micro cluster + if (minMicroIndex == -1) { + ArrayList<DataPoint> microPoints = new ArrayList<DataPoint>(); + microPoints.add(point); + SphereCluster s; + if (alt == 0) + s = new SphereCluster(microPoints, numAttsOption.getValue()); + else + s = new SphereCluster(generator.getCenter(), generator.getRadius(), 1); + + microClusters.add(s); + microClustersPoints.add(microPoints); + microClustersDecay.add(numGeneratedInstances); + int id = 0; + while (id < kernels.size()) { + if (kernels.get(id) == this) + break; + id++; + } + s.setGroundTruth(id); } + } - private void addInstance(Instance instance){ - DataPoint point = new DataPoint(instance, numGeneratedInstances); - points.add(point); - - int minMicroIndex = -1; - double minHullDist = Double.MAX_VALUE; - boolean inserted = false; - //we favour more recently build clusters so we can remove earlier cluster sooner - for (int m = microClusters.size()-1; m >=0 ; m--) { - SphereCluster micro = microClusters.get(m); - double hulldist = micro.getCenterDistance(point)-micro.getRadius(); - //point fits into existing cluster - if(hulldist <= 0){ - microClustersPoints.get(m).add(point); - microClustersDecay.set(m, numGeneratedInstances); - inserted = true; - break; - } - //if not, check if its at least the closest cluster - else{ - if(hulldist < minHullDist){ - minMicroIndex = m; - minHullDist = hulldist; - } - } - } - //Reseting index choice for alternative cluster building - int alt = 1; - if(alt == 1) - minMicroIndex = -1; - if(!inserted){ - //add to closest cluster and expand cluster - if(minMicroIndex!=-1){ - microClustersPoints.get(minMicroIndex).add(point); - //we should keep the miniball instances and just check in - //new points instead of rebuilding the whole thing - SphereCluster s = new SphereCluster(microClustersPoints.get(minMicroIndex),numAttsOption.getValue()); - //check if current microcluster is bigger then generating cluster - if(s.getRadius() > generator.getRadius()){ - //remove previously added point - microClustersPoints.get(minMicroIndex).remove(microClustersPoints.get(minMicroIndex).size()-1); - minMicroIndex = -1; - } - else{ - microClusters.set(minMicroIndex, s); - microClustersDecay.set(minMicroIndex, numGeneratedInstances); - } - } - //minMicroIndex might have been reset above - //create new micro cluster - if(minMicroIndex == -1){ - ArrayList<DataPoint> microPoints = new ArrayList<DataPoint>(); - microPoints.add(point); - SphereCluster s; - if(alt == 0) - s = new SphereCluster(microPoints,numAttsOption.getValue()); - else - s = new SphereCluster(generator.getCenter(),generator.getRadius(),1); - - microClusters.add(s); - microClustersPoints.add(microPoints); - microClustersDecay.add(numGeneratedInstances); - int id = 0; - while(id < kernels.size()){ - if(kernels.get(id) == this) - break; - id++; - } - s.setGroundTruth(id); - } - } + } + private void move() { + if (currentMovementSteps < totalMovementSteps) { + currentMovementSteps++; + if (moveVector == null) { + return; + } + else { + double[] center = generator.getCenter(); + boolean outofbounds = true; + while (outofbounds) { + double radius = generator.getRadius(); + outofbounds = false; + center = generator.getCenter(); + for (int d = 0; d < center.length; d++) { + center[d] += moveVector[d]; + if (center[d] - radius < 0 || center[d] + radius > 1) { + outofbounds = true; + setDesitnation(null); + break; + } + } + } + generator.setCenter(center); + } + } + else { + if (!merging) { + setDesitnation(null); + isSplitting = false; } + } + } + void setDesitnation(double[] destination) { - private void move(){ - if(currentMovementSteps < totalMovementSteps){ - currentMovementSteps++; - if( moveVector == null){ - return; - } - else{ - double[] center = generator.getCenter(); - boolean outofbounds = true; - while(outofbounds){ - double radius = generator.getRadius(); - outofbounds = false; - center = generator.getCenter(); - for ( int d = 0; d < center.length; d++ ) { - center[d]+= moveVector[d]; - if(center[d]- radius < 0 || center[d] + radius > 1){ - outofbounds = true; - setDesitnation(null); - break; - } - } - } - generator.setCenter(center); - } - } - else{ - if(!merging){ - setDesitnation(null); - isSplitting = false; - } - } + if (destination == null) { + destination = new double[numAttsOption.getValue()]; + for (int j = 0; j < numAttsOption.getValue(); j++) { + destination[j] = instanceRandom.nextDouble(); } + } + double[] center = generator.getCenter(); + int dim = center.length; - void setDesitnation(double[] destination){ + double[] v = new double[dim]; - if(destination == null){ - destination = new double [numAttsOption.getValue()]; - for (int j = 0; j < numAttsOption.getValue(); j++) { - destination[j] = instanceRandom.nextDouble(); - } - } - double[] center = generator.getCenter(); - int dim = center.length; + for (int d = 0; d < dim; d++) { + v[d] = destination[d] - center[d]; + } + setMoveVector(v); + } - double[] v = new double[dim]; + void setMoveVector(double[] vector) { + // we are ignoring the steps, otherwise we have to change + // speed of the kernels, do we want that? + moveVector = vector; + int speedInPoints = speedOption.getValue(); + if (speedRangeOption.getValue() > 0) + speedInPoints += (instanceRandom.nextBoolean() ? -1 : 1) * instanceRandom.nextInt(speedRangeOption.getValue()); + if (speedInPoints < 1) + speedInPoints = speedOption.getValue(); + + double length = 0; + for (int d = 0; d < moveVector.length; d++) { + length += Math.pow(vector[d], 2); + } + length = Math.sqrt(length); - for ( int d = 0; d < dim; d++ ) { - v[d]=destination[d]-center[d]; - } - setMoveVector(v); + totalMovementSteps = (int) (length / (maxDistanceMoveThresholdByStep * kernelMovePointFrequency) * speedInPoints); + for (int d = 0; d < moveVector.length; d++) { + moveVector[d] /= (double) totalMovementSteps; + } + + currentMovementSteps = 0; + // if(debug){ + // System.out.println("Setting new direction for C"+generator.getId()+": distance " + // +length+" in "+totalMovementSteps+" steps"); + // } + } + + private String tryMerging(GeneratorCluster merge) { + String message = ""; + double overlapDegree = generator.overlapRadiusDegree(merge.generator); + if (overlapDegree > merge_threshold) { + SphereCluster mcluster = merge.generator; + double radius = Math.max(generator.getRadius(), mcluster.getRadius()); + generator.combine(mcluster); + + // //adjust radius, get bigger and bigger with high dim data + generator.setRadius(radius); + // double[] center = generator.getCenter(); + // double[] mcenter = mcluster.getCenter(); + // double weight = generator.getWeight(); + // double mweight = generator.getWeight(); + // // for (int i = 0; i < center.length; i++) { + // // center[i] = (center[i] * weight + mcenter[i] * mweight) / (mweight + // + weight); + // // } + // generator.setWeight(weight + mweight); + message = "Clusters merging: " + mergeClusterB.generator.getId() + " into " + mergeClusterA.generator.getId(); + + // clean up and restet merging stuff + // mark kernel so it gets killed when it doesn't contain any more + // instances + merge.kill = decayHorizonOption.getValue(); + // set weight to 0 so no new instances will be created in the cluster + mcluster.setWeight(0.0); + normalizeWeights(); + numActiveKernels--; + mergeClusterB = mergeClusterA = null; + merging = false; + mergeKernelsOverlapping = false; + } + else { + if (overlapDegree > 0 && !mergeKernelsOverlapping) { + mergeKernelsOverlapping = true; + message = "Merge overlapping started"; } + } + return message; + } - void setMoveVector(double[] vector){ - //we are ignoring the steps, otherwise we have to change - //speed of the kernels, do we want that? - moveVector = vector; - int speedInPoints = speedOption.getValue(); - if(speedRangeOption.getValue() > 0) - speedInPoints +=(instanceRandom.nextBoolean()?-1:1)*instanceRandom.nextInt(speedRangeOption.getValue()); - if(speedInPoints < 1) speedInPoints = speedOption.getValue(); + private String splitKernel() { + isSplitting = true; + // todo radius range + double radius = kernelRadiiOption.getValue(); + double avgWeight = 1.0 / numClusterOption.getValue(); + double weight = avgWeight + avgWeight * densityRangeOption.getValue() * instanceRandom.nextDouble(); + SphereCluster spcluster = null; + + double[] center = generator.getCenter(); + spcluster = new SphereCluster(center, radius, weight); + + if (spcluster != null) { + GeneratorCluster gc = new GeneratorCluster(clusterIdCounter++, spcluster); + gc.isSplitting = true; + kernels.add(gc); + normalizeWeights(); + numActiveKernels++; + return "Split from Kernel " + generator.getId(); + } + else { + System.out.println("Tried to split new kernel from C" + generator.getId() + + ". Not enough room for new cluster, decrease average radii, number of clusters or enable overlap."); + return ""; + } + } + private String fadeOut() { + kill = decayHorizonOption.getValue(); + generator.setWeight(0.0); + numActiveKernels--; + normalizeWeights(); + return "Fading out C" + generator.getId(); + } - double length = 0; - for ( int d = 0; d < moveVector.length; d++ ) { - length+=Math.pow(vector[d],2); - } - length = Math.sqrt(length); + } - totalMovementSteps = (int)(length/(maxDistanceMoveThresholdByStep*kernelMovePointFrequency)*speedInPoints); - for ( int d = 0; d < moveVector.length; d++ ) { - moveVector[d]/=(double)totalMovementSteps; - } + public RandomRBFGeneratorEvents() { + noiseInClusterOption.set(); + // eventDeleteCreateOption.set(); + // eventMergeSplitOption.set(); + } + public InstancesHeader getHeader() { + return streamHeader; + } - currentMovementSteps = 0; -// if(debug){ -// System.out.println("Setting new direction for C"+generator.getId()+": distance " -// +length+" in "+totalMovementSteps+" steps"); -// } - } + public long estimatedRemainingInstances() { + return -1; + } - private String tryMerging(GeneratorCluster merge){ - String message = ""; - double overlapDegree = generator.overlapRadiusDegree(merge.generator); - if(overlapDegree > merge_threshold){ - SphereCluster mcluster = merge.generator; - double radius = Math.max(generator.getRadius(), mcluster.getRadius()); - generator.combine(mcluster); - -// //adjust radius, get bigger and bigger with high dim data - generator.setRadius(radius); -// double[] center = generator.getCenter(); -// double[] mcenter = mcluster.getCenter(); -// double weight = generator.getWeight(); -// double mweight = generator.getWeight(); -//// for (int i = 0; i < center.length; i++) { -//// center[i] = (center[i] * weight + mcenter[i] * mweight) / (mweight + weight); -//// } -// generator.setWeight(weight + mweight); - message = "Clusters merging: "+mergeClusterB.generator.getId()+" into "+mergeClusterA.generator.getId(); - - //clean up and restet merging stuff - //mark kernel so it gets killed when it doesn't contain any more instances - merge.kill = decayHorizonOption.getValue(); - //set weight to 0 so no new instances will be created in the cluster - mcluster.setWeight(0.0); - normalizeWeights(); - numActiveKernels--; - mergeClusterB = mergeClusterA = null; - merging = false; - mergeKernelsOverlapping = false; - } - else{ - if(overlapDegree > 0 && !mergeKernelsOverlapping){ - mergeKernelsOverlapping = true; - message = "Merge overlapping started"; - } - } - return message; - } + public boolean hasMoreInstances() { + return true; + } - private String splitKernel(){ - isSplitting = true; - //todo radius range - double radius = kernelRadiiOption.getValue(); - double avgWeight = 1.0/numClusterOption.getValue(); - double weight = avgWeight + avgWeight*densityRangeOption.getValue()*instanceRandom.nextDouble(); - SphereCluster spcluster = null; - - double[] center = generator.getCenter(); - spcluster = new SphereCluster(center, radius, weight); - - if(spcluster !=null){ - GeneratorCluster gc = new GeneratorCluster(clusterIdCounter++, spcluster); - gc.isSplitting = true; - kernels.add(gc); - normalizeWeights(); - numActiveKernels++; - return "Split from Kernel "+generator.getId(); - } - else{ - System.out.println("Tried to split new kernel from C"+generator.getId()+ - ". Not enough room for new cluster, decrease average radii, number of clusters or enable overlap."); - return ""; - } - } - - private String fadeOut(){ - kill = decayHorizonOption.getValue(); - generator.setWeight(0.0); - numActiveKernels--; - normalizeWeights(); - return "Fading out C"+generator.getId(); - } - - - } + public boolean isRestartable() { + return true; + } - public RandomRBFGeneratorEvents() { - noiseInClusterOption.set(); -// eventDeleteCreateOption.set(); -// eventMergeSplitOption.set(); - } + @Override + public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + monitor.setCurrentActivity("Preparing random RBF...", -1.0); + generateHeader(); + restart(); + } + + public void restart() { + instanceRandom = new Random(instanceRandomSeedOption.getValue()); + nextEventCounter = eventFrequencyOption.getValue(); + nextEventChoice = getNextEvent(); + numActiveKernels = 0; + numGeneratedInstances = 0; + clusterIdCounter = 0; + mergeClusterA = mergeClusterB = null; + kernels = new AutoExpandVector<GeneratorCluster>(); + + initKernels(); + } - public InstancesHeader getHeader() { - return streamHeader; + protected void generateHeader() { // 2013/06/02: Noise label + ArrayList<Attribute> attributes = new ArrayList<Attribute>(); + for (int i = 0; i < this.numAttsOption.getValue(); i++) { + attributes.add(new Attribute("att" + (i + 1))); } - public long estimatedRemainingInstances() { - return -1; + ArrayList<String> classLabels = new ArrayList<String>(); + for (int i = 0; i < this.numClusterOption.getValue(); i++) { + classLabels.add("class" + (i + 1)); } + if (noiseLevelOption.getValue() > 0) + classLabels.add("noise"); // The last label = "noise" + + attributes.add(new Attribute("class", classLabels)); + streamHeader = new InstancesHeader(new Instances(getCLICreationString(InstanceStream.class), attributes, 0)); + streamHeader.setClassIndex(streamHeader.numAttributes() - 1); + } - public boolean hasMoreInstances() { - return true; + protected void initKernels() { + for (int i = 0; i < numClusterOption.getValue(); i++) { + kernels.add(new GeneratorCluster(clusterIdCounter)); + numActiveKernels++; + clusterIdCounter++; } + normalizeWeights(); + } + + public InstanceExample nextInstance() { + numGeneratedInstances++; + eventScheduler(); + + // make room for the classlabel + double[] values_new = new double[numAttsOption.getValue()]; // +1 + double[] values = null; + int clusterChoice = -1; - public boolean isRestartable() { - return true; + if (instanceRandom.nextDouble() > noiseLevelOption.getValue()) { + clusterChoice = chooseWeightedElement(); + values = kernels.get(clusterChoice).generator.sample(instanceRandom).toDoubleArray(); + } + else { + // get ranodm noise point + values = getNoisePoint(); } - @Override - public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - monitor.setCurrentActivity("Preparing random RBF...", -1.0); - generateHeader(); - restart(); + if (Double.isNaN(values[0])) { + System.out.println("Instance corrupted:" + numGeneratedInstances); + } + System.arraycopy(values, 0, values_new, 0, values.length); + + Instance inst = new DenseInstance(1.0, values_new); + inst.setDataset(getHeader()); + if (clusterChoice == -1) { + // 2013/06/02 (Yunsu Kim) + // Noise instance has the last class value instead of "-1" + // Preventing ArrayIndexOutOfBoundsException in WriteStreamToARFFFile + inst.setClassValue(numClusterOption.getValue()); + } + else { + inst.setClassValue(kernels.get(clusterChoice).generator.getId()); + // Do we need micro cluster representation if have overlapping clusters? + // if(!overlappingOption.isSet()) + kernels.get(clusterChoice).addInstance(inst); } + // System.out.println(numGeneratedInstances+": Overlap is"+updateOverlaps()); - public void restart() { - instanceRandom = new Random(instanceRandomSeedOption.getValue()); - nextEventCounter = eventFrequencyOption.getValue(); - nextEventChoice = getNextEvent(); - numActiveKernels = 0; - numGeneratedInstances = 0; - clusterIdCounter = 0; - mergeClusterA = mergeClusterB = null; - kernels = new AutoExpandVector<GeneratorCluster>(); + return new InstanceExample(inst); + } - initKernels(); + public Clustering getGeneratingClusters() { + Clustering clustering = new Clustering(); + for (int c = 0; c < kernels.size(); c++) { + clustering.add(kernels.get(c).generator); } - - protected void generateHeader() { // 2013/06/02: Noise label - ArrayList<Attribute> attributes = new ArrayList<Attribute>(); - for (int i = 0; i < this.numAttsOption.getValue(); i++) { - attributes.add(new Attribute("att" + (i + 1))); - } - - ArrayList<String> classLabels = new ArrayList<String>(); - for (int i = 0; i < this.numClusterOption.getValue(); i++) { - classLabels.add("class" + (i + 1)); - } - if (noiseLevelOption.getValue() > 0) classLabels.add("noise"); // The last label = "noise" - - attributes.add(new Attribute("class", classLabels)); - streamHeader = new InstancesHeader(new Instances(getCLICreationString(InstanceStream.class), attributes, 0)); - streamHeader.setClassIndex(streamHeader.numAttributes() - 1); - } - - - protected void initKernels() { - for (int i = 0; i < numClusterOption.getValue(); i++) { - kernels.add(new GeneratorCluster(clusterIdCounter)); - numActiveKernels++; - clusterIdCounter++; - } - normalizeWeights(); + return clustering; + } + + public Clustering getMicroClustering() { + Clustering clustering = new Clustering(); + int id = 0; + + for (int c = 0; c < kernels.size(); c++) { + for (int m = 0; m < kernels.get(c).microClusters.size(); m++) { + kernels.get(c).microClusters.get(m).setId(id); + kernels.get(c).microClusters.get(m).setGroundTruth(kernels.get(c).generator.getId()); + clustering.add(kernels.get(c).microClusters.get(m)); + id++; + } } - public InstanceExample nextInstance() { - numGeneratedInstances++; - eventScheduler(); + // System.out.println("numMicroKernels "+clustering.size()); + return clustering; + } - //make room for the classlabel - double[] values_new = new double [numAttsOption.getValue()]; //+1 - double[] values = null; - int clusterChoice = -1; + /**************************** EVENTS ******************************************/ + private void eventScheduler() { - if(instanceRandom.nextDouble() > noiseLevelOption.getValue()){ - clusterChoice = chooseWeightedElement(); - values = kernels.get(clusterChoice).generator.sample(instanceRandom).toDoubleArray(); - } - else{ - //get ranodm noise point - values = getNoisePoint(); - } + for (int i = 0; i < kernels.size(); i++) { + kernels.get(i).updateKernel(); + } - if(Double.isNaN(values[0])){ - System.out.println("Instance corrupted:"+numGeneratedInstances); - } - System.arraycopy(values, 0, values_new, 0, values.length); - - Instance inst = new DenseInstance(1.0, values_new); - inst.setDataset(getHeader()); - if(clusterChoice == -1){ - // 2013/06/02 (Yunsu Kim) - // Noise instance has the last class value instead of "-1" - // Preventing ArrayIndexOutOfBoundsException in WriteStreamToARFFFile - inst.setClassValue(numClusterOption.getValue()); - } - else{ - inst.setClassValue(kernels.get(clusterChoice).generator.getId()); - //Do we need micro cluster representation if have overlapping clusters? - //if(!overlappingOption.isSet()) - kernels.get(clusterChoice).addInstance(inst); - } -// System.out.println(numGeneratedInstances+": Overlap is"+updateOverlaps()); - - return new InstanceExample(inst); + nextEventCounter--; + // only move kernels every 10 points, performance reasons???? + // should this be randomized as well??? + if (nextEventCounter % kernelMovePointFrequency == 0) { + // move kernels + for (int i = 0; i < kernels.size(); i++) { + kernels.get(i).move(); + // overlapControl(); + } } + if (eventFrequencyOption.getValue() == 0) { + return; + } - public Clustering getGeneratingClusters(){ - Clustering clustering = new Clustering(); - for (int c = 0; c < kernels.size(); c++) { - clustering.add(kernels.get(c).generator); - } - return clustering; + String type = ""; + String message = ""; + boolean eventFinished = false; + switch (nextEventChoice) { + case 0: + if (numActiveKernels > 1 && numActiveKernels > numClusterOption.getValue() - numClusterRangeOption.getValue()) { + message = mergeKernels(nextEventCounter); + type = "Merge"; + } + if (mergeClusterA == null && mergeClusterB == null && message.startsWith("Clusters merging")) { + eventFinished = true; + } + break; + case 1: + if (nextEventCounter <= 0) { + if (numActiveKernels < numClusterOption.getValue() + numClusterRangeOption.getValue()) { + type = "Split"; + message = splitKernel(); + } + eventFinished = true; + } + break; + case 2: + if (nextEventCounter <= 0) { + if (numActiveKernels > 1 && numActiveKernels > numClusterOption.getValue() - numClusterRangeOption.getValue()) { + message = fadeOut(); + type = "Delete"; + } + eventFinished = true; + } + break; + case 3: + if (nextEventCounter <= 0) { + if (numActiveKernels < numClusterOption.getValue() + numClusterRangeOption.getValue()) { + message = fadeIn(); + type = "Create"; + } + eventFinished = true; + } + break; + + } + if (eventFinished) { + nextEventCounter = (int) (eventFrequencyOption.getValue() + (instanceRandom.nextBoolean() ? -1 : 1) + * eventFrequencyOption.getValue() * eventFrequencyRange * instanceRandom.nextDouble()); + nextEventChoice = getNextEvent(); + // System.out.println("Next event choice: "+nextEventChoice); + } + if (!message.isEmpty()) { + message += " (numKernels = " + numActiveKernels + " at " + numGeneratedInstances + ")"; + if (!type.equals("Merge") || message.startsWith("Clusters merging")) + fireClusterChange(numGeneratedInstances, type, message); } + } - public Clustering getMicroClustering(){ - Clustering clustering = new Clustering(); - int id = 0; + private int getNextEvent() { + int choice = -1; + boolean lowerLimit = numActiveKernels <= numClusterOption.getValue() - numClusterRangeOption.getValue(); + boolean upperLimit = numActiveKernels >= numClusterOption.getValue() + numClusterRangeOption.getValue(); - for (int c = 0; c < kernels.size(); c++) { - for (int m = 0; m < kernels.get(c).microClusters.size(); m++) { - kernels.get(c).microClusters.get(m).setId(id); - kernels.get(c).microClusters.get(m).setGroundTruth(kernels.get(c).generator.getId()); - clustering.add(kernels.get(c).microClusters.get(m)); - id++; - } - } + if (!lowerLimit || !upperLimit) { + int mode = -1; + if (eventDeleteCreateOption.isSet() && eventMergeSplitOption.isSet()) { + mode = instanceRandom.nextInt(2); + } - //System.out.println("numMicroKernels "+clustering.size()); - return clustering; - } + if (mode == 0 || (mode == -1 && eventMergeSplitOption.isSet())) { + // have we reached a limit? if not free choice + if (!lowerLimit && !upperLimit) + choice = instanceRandom.nextInt(2); + else + // we have a limit. if lower limit, choose split + if (lowerLimit) + choice = 1; + // otherwise we reached upper level, choose merge + else + choice = 0; + } -/**************************** EVENTS ******************************************/ - private void eventScheduler(){ + if (mode == 1 || (mode == -1 && eventDeleteCreateOption.isSet())) { + // have we reached a limit? if not free choice + if (!lowerLimit && !upperLimit) + choice = instanceRandom.nextInt(2) + 2; + else + // we have a limit. if lower limit, choose create + if (lowerLimit) + choice = 3; + // otherwise we reached upper level, choose delete + else + choice = 2; + } + } - for ( int i = 0; i < kernels.size(); i++ ) { - kernels.get(i).updateKernel(); - } - - nextEventCounter--; - //only move kernels every 10 points, performance reasons???? - //should this be randomized as well??? - if(nextEventCounter%kernelMovePointFrequency == 0){ - //move kernels - for ( int i = 0; i < kernels.size(); i++ ) { - kernels.get(i).move(); - //overlapControl(); - } - } + return choice; + } + private String fadeOut() { + int id = instanceRandom.nextInt(kernels.size()); + while (kernels.get(id).kill != -1) + id = instanceRandom.nextInt(kernels.size()); - if(eventFrequencyOption.getValue() == 0){ - return; - } + String message = kernels.get(id).fadeOut(); + return message; + } - String type =""; - String message =""; - boolean eventFinished = false; - switch(nextEventChoice){ - case 0: - if(numActiveKernels > 1 && numActiveKernels > numClusterOption.getValue() - numClusterRangeOption.getValue()){ - message = mergeKernels(nextEventCounter); - type = "Merge"; - } - if(mergeClusterA==null && mergeClusterB==null && message.startsWith("Clusters merging")){ - eventFinished = true; - } - break; - case 1: - if(nextEventCounter<=0){ - if(numActiveKernels < numClusterOption.getValue() + numClusterRangeOption.getValue()){ - type = "Split"; - message = splitKernel(); - } - eventFinished = true; - } - break; - case 2: - if(nextEventCounter<=0){ - if(numActiveKernels > 1 && numActiveKernels > numClusterOption.getValue() - numClusterRangeOption.getValue()){ - message = fadeOut(); - type = "Delete"; - } - eventFinished = true; - } - break; - case 3: - if(nextEventCounter<=0){ - if(numActiveKernels < numClusterOption.getValue() + numClusterRangeOption.getValue()){ - message = fadeIn(); - type = "Create"; - } - eventFinished = true; - } - break; + private String fadeIn() { + GeneratorCluster gc = new GeneratorCluster(clusterIdCounter++); + kernels.add(gc); + numActiveKernels++; + normalizeWeights(); + return "Creating new cluster"; + } - } - if (eventFinished){ - nextEventCounter = (int)(eventFrequencyOption.getValue()+(instanceRandom.nextBoolean()?-1:1)*eventFrequencyOption.getValue()*eventFrequencyRange*instanceRandom.nextDouble()); - nextEventChoice = getNextEvent(); - //System.out.println("Next event choice: "+nextEventChoice); - } - if(!message.isEmpty()){ - message+=" (numKernels = "+numActiveKernels+" at "+numGeneratedInstances+")"; - if(!type.equals("Merge") || message.startsWith("Clusters merging")) - fireClusterChange(numGeneratedInstances, type, message); - } - } - - private int getNextEvent() { - int choice = -1; - boolean lowerLimit = numActiveKernels <= numClusterOption.getValue() - numClusterRangeOption.getValue(); - boolean upperLimit = numActiveKernels >= numClusterOption.getValue() + numClusterRangeOption.getValue(); - - if(!lowerLimit || !upperLimit){ - int mode = -1; - if(eventDeleteCreateOption.isSet() && eventMergeSplitOption.isSet()){ - mode = instanceRandom.nextInt(2); - } - - if(mode==0 || (mode==-1 && eventMergeSplitOption.isSet())){ - //have we reached a limit? if not free choice - if(!lowerLimit && !upperLimit) - choice = instanceRandom.nextInt(2); - else - //we have a limit. if lower limit, choose split - if(lowerLimit) - choice = 1; - //otherwise we reached upper level, choose merge - else - choice = 0; - } - - if(mode==1 || (mode==-1 && eventDeleteCreateOption.isSet())){ - //have we reached a limit? if not free choice - if(!lowerLimit && !upperLimit) - choice = instanceRandom.nextInt(2)+2; - else - //we have a limit. if lower limit, choose create - if(lowerLimit) - choice = 3; - //otherwise we reached upper level, choose delete - else - choice = 2; - } - } - - - return choice; - } - - private String fadeOut(){ - int id = instanceRandom.nextInt(kernels.size()); - while(kernels.get(id).kill!=-1) - id = instanceRandom.nextInt(kernels.size()); - - String message = kernels.get(id).fadeOut(); - return message; - } - - private String fadeIn(){ - GeneratorCluster gc = new GeneratorCluster(clusterIdCounter++); - kernels.add(gc); - numActiveKernels++; - normalizeWeights(); - return "Creating new cluster"; - } - - - private String changeWeight(boolean increase){ - double changeRate = 0.1; - int id = instanceRandom.nextInt(kernels.size()); - while(kernels.get(id).kill!=-1) - id = instanceRandom.nextInt(kernels.size()); - - int sign = 1; - if(!increase) - sign = -1; - double weight_old = kernels.get(id).generator.getWeight(); - double delta = sign*numActiveKernels*instanceRandom.nextDouble()*changeRate; - kernels.get(id).generator.setWeight(weight_old+delta); + private String changeWeight(boolean increase) { + double changeRate = 0.1; + int id = instanceRandom.nextInt(kernels.size()); + while (kernels.get(id).kill != -1) + id = instanceRandom.nextInt(kernels.size()); + + int sign = 1; + if (!increase) + sign = -1; + double weight_old = kernels.get(id).generator.getWeight(); + double delta = sign * numActiveKernels * instanceRandom.nextDouble() * changeRate; + kernels.get(id).generator.setWeight(weight_old + delta); + + normalizeWeights(); + + String message; + if (increase) + message = "Increase "; + else + message = "Decrease "; + message += " weight on Cluster " + id + " from " + weight_old + " to " + (weight_old + delta); + return message; - normalizeWeights(); + } - String message; - if(increase) - message = "Increase "; - else - message = "Decrease "; - message+=" weight on Cluster "+id+" from "+weight_old+" to "+(weight_old+delta); - return message; + private String changeRadius(boolean increase) { + double maxChangeRate = 0.1; + int id = instanceRandom.nextInt(kernels.size()); + while (kernels.get(id).kill != -1) + id = instanceRandom.nextInt(kernels.size()); + + int sign = 1; + if (!increase) + sign = -1; + + double r_old = kernels.get(id).generator.getRadius(); + double r_new = r_old + sign * r_old * instanceRandom.nextDouble() * maxChangeRate; + if (r_new >= 0.5) + return "Radius to big"; + kernels.get(id).generator.setRadius(r_new); + + String message; + if (increase) + message = "Increase "; + else + message = "Decrease "; + message += " radius on Cluster " + id + " from " + r_old + " to " + r_new; + return message; + } + private String splitKernel() { + int id = instanceRandom.nextInt(kernels.size()); + while (kernels.get(id).kill != -1) + id = instanceRandom.nextInt(kernels.size()); - } + String message = kernels.get(id).splitKernel(); - private String changeRadius(boolean increase){ - double maxChangeRate = 0.1; - int id = instanceRandom.nextInt(kernels.size()); - while(kernels.get(id).kill!=-1) - id = instanceRandom.nextInt(kernels.size()); + return message; + } - int sign = 1; - if(!increase) - sign = -1; + private String mergeKernels(int steps) { + if (numActiveKernels > 1 && ((mergeClusterA == null && mergeClusterB == null))) { + + // choose clusters to merge + double diseredDist = steps / speedOption.getValue() * maxDistanceMoveThresholdByStep; + double minDist = Double.MAX_VALUE; + // System.out.println("DisredDist:"+(2*diseredDist)); + for (int i = 0; i < kernels.size(); i++) { + for (int j = 0; j < i; j++) { + if (kernels.get(i).kill != -1 || kernels.get(j).kill != -1) { + continue; + } + else { + double kernelDist = kernels.get(i).generator.getCenterDistance(kernels.get(j).generator); + double d = kernelDist - 2 * diseredDist; + // System.out.println("Dist:"+i+" / "+j+" "+d); + if (Math.abs(d) < minDist && + (minDist != Double.MAX_VALUE || d > 0 || Math.abs(d) < 0.001)) { + minDist = Math.abs(d); + mergeClusterA = kernels.get(i); + mergeClusterB = kernels.get(j); + } + } + } + } - double r_old = kernels.get(id).generator.getRadius(); - double r_new =r_old+sign*r_old*instanceRandom.nextDouble()*maxChangeRate; - if(r_new >= 0.5) return "Radius to big"; - kernels.get(id).generator.setRadius(r_new); - - String message; - if(increase) - message = "Increase "; - else - message = "Decrease "; - message+=" radius on Cluster "+id+" from "+r_old+" to "+r_new; - return message; - } - - private String splitKernel(){ - int id = instanceRandom.nextInt(kernels.size()); - while(kernels.get(id).kill!=-1) - id = instanceRandom.nextInt(kernels.size()); - - String message = kernels.get(id).splitKernel(); - - return message; - } - - private String mergeKernels(int steps){ - if(numActiveKernels >1 && ((mergeClusterA == null && mergeClusterB == null))){ - - //choose clusters to merge - double diseredDist = steps / speedOption.getValue() * maxDistanceMoveThresholdByStep; - double minDist = Double.MAX_VALUE; -// System.out.println("DisredDist:"+(2*diseredDist)); - for(int i = 0; i < kernels.size(); i++){ - for(int j = 0; j < i; j++){ - if(kernels.get(i).kill!=-1 || kernels.get(j).kill!=-1){ - continue; - } - else{ - double kernelDist = kernels.get(i).generator.getCenterDistance(kernels.get(j).generator); - double d = kernelDist-2*diseredDist; -// System.out.println("Dist:"+i+" / "+j+" "+d); - if(Math.abs(d) < minDist && - (minDist != Double.MAX_VALUE || d>0 || Math.abs(d) < 0.001)){ - minDist = Math.abs(d); - mergeClusterA = kernels.get(i); - mergeClusterB = kernels.get(j); - } - } - } - } - - if(mergeClusterA!=null && mergeClusterB!=null){ - double[] merge_point = mergeClusterA.generator.getCenter(); - double[] v = mergeClusterA.generator.getDistanceVector(mergeClusterB.generator); - for (int i = 0; i < v.length; i++) { - merge_point[i]= merge_point[i]+v[i]*0.5; - } - - mergeClusterA.merging = true; - mergeClusterB.merging = true; - mergeClusterA.setDesitnation(merge_point); - mergeClusterB.setDesitnation(merge_point); - - if(debug){ - System.out.println("Center1"+Arrays.toString(mergeClusterA.generator.getCenter())); - System.out.println("Center2"+Arrays.toString(mergeClusterB.generator.getCenter())); - System.out.println("Vector"+Arrays.toString(v)); - - System.out.println("Try to merge cluster "+mergeClusterA.generator.getId()+ - " into "+mergeClusterB.generator.getId()+ - " at "+Arrays.toString(merge_point)+ - " time "+numGeneratedInstances); - } - return "Init merge"; - } + if (mergeClusterA != null && mergeClusterB != null) { + double[] merge_point = mergeClusterA.generator.getCenter(); + double[] v = mergeClusterA.generator.getDistanceVector(mergeClusterB.generator); + for (int i = 0; i < v.length; i++) { + merge_point[i] = merge_point[i] + v[i] * 0.5; } - if(mergeClusterA != null && mergeClusterB != null){ + mergeClusterA.merging = true; + mergeClusterB.merging = true; + mergeClusterA.setDesitnation(merge_point); + mergeClusterB.setDesitnation(merge_point); - //movekernels will move the kernels close to each other, - //we just need to check and merge here if they are close enough - return mergeClusterA.tryMerging(mergeClusterB); - } + if (debug) { + System.out.println("Center1" + Arrays.toString(mergeClusterA.generator.getCenter())); + System.out.println("Center2" + Arrays.toString(mergeClusterB.generator.getCenter())); + System.out.println("Vector" + Arrays.toString(v)); - return ""; + System.out.println("Try to merge cluster " + mergeClusterA.generator.getId() + + " into " + mergeClusterB.generator.getId() + + " at " + Arrays.toString(merge_point) + + " time " + numGeneratedInstances); + } + return "Init merge"; + } } + if (mergeClusterA != null && mergeClusterB != null) { + // movekernels will move the kernels close to each other, + // we just need to check and merge here if they are close enough + return mergeClusterA.tryMerging(mergeClusterB); + } + return ""; + } -/************************* TOOLS **************************************/ + /************************* TOOLS **************************************/ - public void getDescription(StringBuilder sb, int indent) { + public void getDescription(StringBuilder sb, int indent) { - } + } - private double[] getNoisePoint(){ - double [] sample = new double [numAttsOption.getValue()]; - boolean incluster = true; - int counter = 20; - while(incluster){ - for (int j = 0; j < numAttsOption.getValue(); j++) { - sample[j] = instanceRandom.nextDouble(); - } - incluster = false; - if(!noiseInClusterOption.isSet() && counter > 0){ - counter--; - for(int c = 0; c < kernels.size(); c++){ - for(int m = 0; m < kernels.get(c).microClusters.size(); m++){ - Instance inst = new DenseInstance(1, sample); - if(kernels.get(c).microClusters.get(m).getInclusionProbability(inst) > 0){ - incluster = true; - break; - } - } - if(incluster) - break; - } + private double[] getNoisePoint() { + double[] sample = new double[numAttsOption.getValue()]; + boolean incluster = true; + int counter = 20; + while (incluster) { + for (int j = 0; j < numAttsOption.getValue(); j++) { + sample[j] = instanceRandom.nextDouble(); + } + incluster = false; + if (!noiseInClusterOption.isSet() && counter > 0) { + counter--; + for (int c = 0; c < kernels.size(); c++) { + for (int m = 0; m < kernels.get(c).microClusters.size(); m++) { + Instance inst = new DenseInstance(1, sample); + if (kernels.get(c).microClusters.get(m).getInclusionProbability(inst) > 0) { + incluster = true; + break; } + } + if (incluster) + break; } - -// double [] sample = new double [numAttsOption.getValue()]; -// for (int j = 0; j < numAttsOption.getValue(); j++) { -// sample[j] = instanceRandom.nextDouble(); -// } - - return sample; + } } - private int chooseWeightedElement() { - double r = instanceRandom.nextDouble(); + // double [] sample = new double [numAttsOption.getValue()]; + // for (int j = 0; j < numAttsOption.getValue(); j++) { + // sample[j] = instanceRandom.nextDouble(); + // } - // Determine index of choosen element - int i = 0; - while (r > 0.0) { - r -= kernels.get(i).generator.getWeight(); - i++; - } - --i; // Overcounted once - //System.out.println(i); - return i; - } + return sample; + } - private void normalizeWeights(){ - double sumWeights = 0.0; - for (int i = 0; i < kernels.size(); i++) { - sumWeights+=kernels.get(i).generator.getWeight(); - } - for (int i = 0; i < kernels.size(); i++) { - kernels.get(i).generator.setWeight(kernels.get(i).generator.getWeight()/sumWeights); - } + private int chooseWeightedElement() { + double r = instanceRandom.nextDouble(); + + // Determine index of choosen element + int i = 0; + while (r > 0.0) { + r -= kernels.get(i).generator.getWeight(); + i++; } + --i; // Overcounted once + // System.out.println(i); + return i; + } + private void normalizeWeights() { + double sumWeights = 0.0; + for (int i = 0; i < kernels.size(); i++) { + sumWeights += kernels.get(i).generator.getWeight(); + } + for (int i = 0; i < kernels.size(); i++) { + kernels.get(i).generator.setWeight(kernels.get(i).generator.getWeight() / sumWeights); + } + } + /*************** EVENT Listener *********************/ + // should go into the superclass of the generator, create new one for cluster + // streams? - /*************** EVENT Listener *********************/ - // should go into the superclass of the generator, create new one for cluster streams? - /** Add a listener */ synchronized public void addClusterChangeListener(ClusterEventListener l) { if (listeners == null) @@ -939,17 +935,17 @@ public class RandomRBFGeneratorEvents extends ClusteringStream { if (listeners != null && !listeners.isEmpty()) { // create the event object to send ClusterEvent event = - new ClusterEvent(this, timestamp, type , message); + new ClusterEvent(this, timestamp, type, message); // make a copy of the listener list in case - // anyone adds/removes listeners + // anyone adds/removes listeners Vector targets; synchronized (this) { targets = (Vector) listeners.clone(); } // walk through the listener list and - // call the sunMoved method in each + // call the sunMoved method in each Enumeration e = targets.elements(); while (e.hasMoreElements()) { ClusterEventListener l = (ClusterEventListener) e.nextElement(); @@ -959,17 +955,13 @@ public class RandomRBFGeneratorEvents extends ClusteringStream { } } - @Override - public String getPurposeString() { - return "Generates a random radial basis function stream."; - } - - - public String getParameterString(){ - return ""; - } - - + @Override + public String getPurposeString() { + return "Generates a random radial basis function stream."; + } + public String getParameterString() { + return ""; + } }
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/23a35dbe/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/generators/HyperplaneGenerator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/generators/HyperplaneGenerator.java b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/generators/HyperplaneGenerator.java index d0ab735..873ab90 100644 --- a/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/generators/HyperplaneGenerator.java +++ b/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/generators/HyperplaneGenerator.java @@ -45,134 +45,141 @@ import com.yahoo.labs.samoa.moa.tasks.TaskMonitor; */ public class HyperplaneGenerator extends AbstractOptionHandler implements InstanceStream { - @Override - public String getPurposeString() { - return "Generates a problem of predicting class of a rotating hyperplane."; - } + @Override + public String getPurposeString() { + return "Generates a problem of predicting class of a rotating hyperplane."; + } - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - public IntOption instanceRandomSeedOption = new IntOption("instanceRandomSeed", 'i', "Seed for random generation of instances.", 1); + public IntOption instanceRandomSeedOption = new IntOption("instanceRandomSeed", 'i', + "Seed for random generation of instances.", 1); - public IntOption numClassesOption = new IntOption("numClasses", 'c', "The number of classes to generate.", 2, 2, Integer.MAX_VALUE); + public IntOption numClassesOption = new IntOption("numClasses", 'c', "The number of classes to generate.", 2, 2, + Integer.MAX_VALUE); - public IntOption numAttsOption = new IntOption("numAtts", 'a', "The number of attributes to generate.", 10, 0, Integer.MAX_VALUE); + public IntOption numAttsOption = new IntOption("numAtts", 'a', "The number of attributes to generate.", 10, 0, + Integer.MAX_VALUE); - public IntOption numDriftAttsOption = new IntOption("numDriftAtts", 'k', "The number of attributes with drift.", 2, 0, Integer.MAX_VALUE); + public IntOption numDriftAttsOption = new IntOption("numDriftAtts", 'k', "The number of attributes with drift.", 2, + 0, Integer.MAX_VALUE); - public FloatOption magChangeOption = new FloatOption("magChange", 't', "Magnitude of the change for every example", 0.0, 0.0, 1.0); + public FloatOption magChangeOption = new FloatOption("magChange", 't', "Magnitude of the change for every example", + 0.0, 0.0, 1.0); - public IntOption noisePercentageOption = new IntOption("noisePercentage", 'n', "Percentage of noise to add to the data.", 5, 0, 100); + public IntOption noisePercentageOption = new IntOption("noisePercentage", 'n', + "Percentage of noise to add to the data.", 5, 0, 100); - public IntOption sigmaPercentageOption = new IntOption("sigmaPercentage", 's', "Percentage of probability that the direction of change is reversed.", 10, - 0, 100); + public IntOption sigmaPercentageOption = new IntOption("sigmaPercentage", 's', + "Percentage of probability that the direction of change is reversed.", 10, + 0, 100); - protected InstancesHeader streamHeader; + protected InstancesHeader streamHeader; - protected Random instanceRandom; + protected Random instanceRandom; - protected double[] weights; + protected double[] weights; - protected int[] sigma; + protected int[] sigma; - public int numberInstance; + public int numberInstance; - @Override - protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { - monitor.setCurrentActivity("Preparing hyperplane...", -1.0); - generateHeader(); - restart(); - } + @Override + protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { + monitor.setCurrentActivity("Preparing hyperplane...", -1.0); + generateHeader(); + restart(); + } - @SuppressWarnings({ "rawtypes", "unchecked" }) - protected void generateHeader() { - FastVector attributes = new FastVector(); - for (int i = 0; i < this.numAttsOption.getValue(); i++) { - attributes.addElement(new Attribute("att" + (i + 1))); - } - - FastVector classLabels = new FastVector(); - for (int i = 0; i < this.numClassesOption.getValue(); i++) { - classLabels.addElement("class" + (i + 1)); - } - attributes.addElement(new Attribute("class", classLabels)); - this.streamHeader = new InstancesHeader(new Instances(getCLICreationString(InstanceStream.class), attributes, 0)); - this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1); + @SuppressWarnings({ "rawtypes", "unchecked" }) + protected void generateHeader() { + FastVector attributes = new FastVector(); + for (int i = 0; i < this.numAttsOption.getValue(); i++) { + attributes.addElement(new Attribute("att" + (i + 1))); } - @Override - public long estimatedRemainingInstances() { - return -1; + FastVector classLabels = new FastVector(); + for (int i = 0; i < this.numClassesOption.getValue(); i++) { + classLabels.addElement("class" + (i + 1)); } - - @Override - public InstancesHeader getHeader() { - return this.streamHeader; + attributes.addElement(new Attribute("class", classLabels)); + this.streamHeader = new InstancesHeader(new Instances(getCLICreationString(InstanceStream.class), attributes, 0)); + this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1); + } + + @Override + public long estimatedRemainingInstances() { + return -1; + } + + @Override + public InstancesHeader getHeader() { + return this.streamHeader; + } + + @Override + public boolean hasMoreInstances() { + return true; + } + + @Override + public boolean isRestartable() { + return true; + } + + @Override + public Example<Instance> nextInstance() { + + int numAtts = this.numAttsOption.getValue(); + double[] attVals = new double[numAtts + 1]; + double sum = 0.0; + double sumWeights = 0.0; + for (int i = 0; i < numAtts; i++) { + attVals[i] = this.instanceRandom.nextDouble(); + sum += this.weights[i] * attVals[i]; + sumWeights += this.weights[i]; } - - @Override - public boolean hasMoreInstances() { - return true; + int classLabel; + if (sum >= sumWeights * 0.5) { + classLabel = 1; + } else { + classLabel = 0; } - - @Override - public boolean isRestartable() { - return true; + // Add Noise + if ((1 + (this.instanceRandom.nextInt(100))) <= this.noisePercentageOption.getValue()) { + classLabel = (classLabel == 0 ? 1 : 0); } - @Override - public Example<Instance> nextInstance() { - - int numAtts = this.numAttsOption.getValue(); - double[] attVals = new double[numAtts + 1]; - double sum = 0.0; - double sumWeights = 0.0; - for (int i = 0; i < numAtts; i++) { - attVals[i] = this.instanceRandom.nextDouble(); - sum += this.weights[i] * attVals[i]; - sumWeights += this.weights[i]; - } - int classLabel; - if (sum >= sumWeights * 0.5) { - classLabel = 1; - } else { - classLabel = 0; - } - // Add Noise - if ((1 + (this.instanceRandom.nextInt(100))) <= this.noisePercentageOption.getValue()) { - classLabel = (classLabel == 0 ? 1 : 0); - } - - Instance inst = new DenseInstance(1.0, attVals); - inst.setDataset(getHeader()); - inst.setClassValue(classLabel); - addDrift(); - return new InstanceExample(inst); + Instance inst = new DenseInstance(1.0, attVals); + inst.setDataset(getHeader()); + inst.setClassValue(classLabel); + addDrift(); + return new InstanceExample(inst); + } + + private void addDrift() { + for (int i = 0; i < this.numDriftAttsOption.getValue(); i++) { + this.weights[i] += (double) ((double) sigma[i]) * ((double) this.magChangeOption.getValue()); + if (// this.weights[i] >= 1.0 || this.weights[i] <= 0.0 || + (1 + (this.instanceRandom.nextInt(100))) <= this.sigmaPercentageOption.getValue()) { + this.sigma[i] *= -1; + } } - - private void addDrift() { - for (int i = 0; i < this.numDriftAttsOption.getValue(); i++) { - this.weights[i] += (double) ((double) sigma[i]) * ((double) this.magChangeOption.getValue()); - if (// this.weights[i] >= 1.0 || this.weights[i] <= 0.0 || - (1 + (this.instanceRandom.nextInt(100))) <= this.sigmaPercentageOption.getValue()) { - this.sigma[i] *= -1; - } - } - } - - @Override - public void restart() { - this.instanceRandom = new Random(this.instanceRandomSeedOption.getValue()); - this.weights = new double[this.numAttsOption.getValue()]; - this.sigma = new int[this.numAttsOption.getValue()]; - for (int i = 0; i < this.numAttsOption.getValue(); i++) { - this.weights[i] = this.instanceRandom.nextDouble(); - this.sigma[i] = (i < this.numDriftAttsOption.getValue() ? 1 : 0); - } + } + + @Override + public void restart() { + this.instanceRandom = new Random(this.instanceRandomSeedOption.getValue()); + this.weights = new double[this.numAttsOption.getValue()]; + this.sigma = new int[this.numAttsOption.getValue()]; + for (int i = 0; i < this.numAttsOption.getValue(); i++) { + this.weights[i] = this.instanceRandom.nextDouble(); + this.sigma[i] = (i < this.numDriftAttsOption.getValue() ? 1 : 0); } + } - @Override - public void getDescription(StringBuilder sb, int indent) { - // TODO Auto-generated method stub - } + @Override + public void getDescription(StringBuilder sb, int indent) { + // TODO Auto-generated method stub + } }
