srowen commented on a change in pull request #25383: [SPARK-13677][ML] Implement Tree-Based Feature Transformation for ML URL: https://github.com/apache/spark/pull/25383#discussion_r315709454
########## File path: mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala ########## @@ -78,13 +76,34 @@ private[spark] trait DecisionTreeModel { /** Convert to spark.mllib DecisionTreeModel (losing some information) */ private[spark] def toOld: OldDecisionTreeModel + + /** + * @return an iterator that traverses (DFS, left to right) the leaves + * in the subtree of this node. + */ + private def leafIterator(node: Node): Iterator[LeafNode] = { + node match { + case l: LeafNode => Iterator.single(l) + case n: InternalNode => + leafIterator(n.leftChild) ++ leafIterator(n.rightChild) + } + } + + @transient private lazy val leafIndices: Map[LeafNode, Int] = { + leafIterator(rootNode).zipWithIndex.toMap + } + + /** + * @return the index of leaf given a input vector. + * The leave are indexed from zero by pre-order. Review comment: Nit: leave -> leaves. I might just write: `@return the index of the leaf corresponding to the feature vector. Leaves are indexed in pre-order from 0.` (Same for other occurrences) ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org