Github user mengxr commented on a diff in the pull request:
https://github.com/apache/spark/pull/1975#discussion_r16322132
--- Diff:
mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala ---
@@ -556,135 +568,98 @@ object DecisionTree extends Serializable with
Logging {
// shift when more than one group is used at deep tree level
val groupShift = numNodes * groupIndex
- /** Find the filters used before reaching the current code. */
- def findParentFilters(nodeIndex: Int): List[Filter] = {
- if (level == 0) {
- List[Filter]()
- } else {
- val nodeFilterIndex = math.pow(2, level).toInt - 1 + nodeIndex +
groupShift
- filters(nodeFilterIndex)
- }
- }
-
/**
- * Find whether the sample is valid input for the current node, i.e.,
whether it passes through
- * all the filters for the current node.
+ * Get the node index corresponding to this data point.
+ * This is used during training, mimicking prediction.
+ * @return Leaf index if the data point reaches a leaf.
+ * Otherwise, last node reachable in tree matching this
example.
*/
- def isSampleValid(parentFilters: List[Filter], treePoint: TreePoint):
Boolean = {
- // leaf
- if ((level > 0) && (parentFilters.length == 0)) {
- return false
- }
-
- // Apply each filter and check sample validity. Return false when
invalid condition found.
- parentFilters.foreach { filter =>
- val featureIndex = filter.split.feature
- val comparison = filter.comparison
- val isFeatureContinuous = filter.split.featureType == Continuous
- if (isFeatureContinuous) {
- val binId = treePoint.binnedFeatures(featureIndex)
- val bin = bins(featureIndex)(binId)
- val featureValue = bin.highSplit.threshold
- val threshold = filter.split.threshold
- comparison match {
- case -1 => if (featureValue > threshold) return false
- case 1 => if (featureValue <= threshold) return false
+ def predictNodeIndex(node: Node, features: Array[Int]): Int = {
--- End diff --
`features` -> `binnedFeatures`?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]