[
https://issues.apache.org/jira/browse/MAHOUT-1991?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16147995#comment-16147995
]
ASF GitHub Bot commented on MAHOUT-1991:
----------------------------------------
Github user andrewpalumbo commented on a diff in the pull request:
https://github.com/apache/mahout/pull/334#discussion_r136179872
--- Diff:
math-scala/src/main/scala/org/apache/mahout/math/algorithms/clustering/DBSCAN.scala
---
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.mahout.math.algorithms.clustering
+
+import org.apache.mahout.math.scalabindings._
+import org.apache.mahout.math.scalabindings.RLikeOps._
+import org.apache.mahout.math._
+import org.apache.mahout.math.algorithms.common.distance.{DistanceMetric,
DistanceMetricSelector}
+import org.apache.mahout.math.drm._
+import org.apache.mahout.math.drm.RLikeDrmOps._
+
+import scala.collection.mutable
+import scala.io.Source
+
+class DistributedDBSCAN extends ClusteringFitter {
+
+ var epsilon: Double = _
+ var minPts: Int = _
+ var distanceMeasure: Symbol = _
+
+ def setStandardHyperparameters(hyperparameters: Map[Symbol, Any] =
Map('foo -> None)): Unit = {
+ epsilon = hyperparameters.asInstanceOf[Map[Symbol,
Double]].getOrElse('epsilon, 0.5)
+ minPts = hyperparameters.asInstanceOf[Map[Symbol,
Int]].getOrElse('minPts, 1)
+ distanceMeasure = hyperparameters.asInstanceOf[Map[Symbol,
Symbol]].getOrElse('distanceMeasure, 'Euclidean)
+ }
+
+ def fit[K](input: DrmLike[K],
+ hyperparameters: (Symbol, Any)*): DBSCANModel = {
+
+ setStandardHyperparameters(hyperparameters.toMap)
+ implicit val ctx = input.context
+ implicit val ktag = input.keyClassTag
+
+ val dmNumber =
DistanceMetricSelector.namedMetricLookup(distanceMeasure)
+
+ val configBC = drmBroadcast(dvec(epsilon, minPts, dmNumber))
+
+ val clusters = input.allreduceBlock(
+ {
+ // Assign All Points to Clusters
+ case (keys, block: Matrix) => {
+ val epsilon_local = configBC.value.get(0)
+ val minPts_local = configBC.value.get(1)
+
+ val distanceMetric =
DistanceMetricSelector.select(configBC.value.get(3))
+ val icDBSCAN = new InCoreDBSCAN(block, epsilon_local,
minPts_local.toInt, distanceMetric)
+ // do stuff on icDBSCAN
+ icDBSCAN.DBSCAN()
+ }
+ }, {
+ // Optionally Merge Clusters that are close enough
+ case (metadata1: Matrix, metadata2: Matrix) => {
+ // this does nothing- just returns the left matrix
+ metadata1
+ }
+ })
+
+ val model = new DBSCANModel(1)
+ model.summary = s"""foo the bar"""
--- End diff --
@AdityaAS do you have a plan for the model summary?
> Implement naive DBSCAN Algorithm - O(n^2) complexity
> ----------------------------------------------------
>
> Key: MAHOUT-1991
> URL: https://issues.apache.org/jira/browse/MAHOUT-1991
> Project: Mahout
> Issue Type: New Feature
> Components: Algorithms
> Reporter: Aditya AS
> Assignee: Aditya AS
>
> Implement the naive DBSCAN algorithm in Mahout Samsara, as part of the
> Algorithms Framework.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)