Github user jkbradley commented on a diff in the pull request:
https://github.com/apache/spark/pull/2595#discussion_r18297137
--- Diff:
mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DTStatsAggregator.scala
---
@@ -68,90 +93,72 @@ private[tree] abstract class DTStatsAggregator(
* (node, feature, left/right child) offset
from
* [[getLeftRightNodeFeatureOffsets]].
*/
- def getImpurityCalculator(nodeFeatureOffset: Int, binIndex: Int):
ImpurityCalculator = {
- impurityAggregator.getCalculator(allStats, nodeFeatureOffset +
binIndex * statsSize)
+ def getImpurityCalculator(featureOffset: Int, binIndex: Int):
ImpurityCalculator = {
+ impurityAggregator.getCalculator(allStats, featureOffset + binIndex *
statsSize)
}
/**
- * Update the stats for a given (node, feature, bin) for ordered
features, using the given label.
+ * Update the stats for a given (feature, bin) for ordered features,
using the given label.
*/
- def update(
- nodeIndex: Int,
- featureIndex: Int,
- binIndex: Int,
- label: Double,
- instanceWeight: Double): Unit = {
- val i = getNodeFeatureOffset(nodeIndex, featureIndex) + binIndex *
statsSize
+ def update(featureIndex: Int, binIndex: Int, label: Double,
instanceWeight: Double): Unit = {
+ val i = featureOffsets(featureIndex) + binIndex * statsSize
impurityAggregator.update(allStats, i, label, instanceWeight)
}
/**
- * Pre-compute node offset for use with [[nodeUpdate]].
- */
- def getNodeOffset(nodeIndex: Int): Int
-
- /**
* Faster version of [[update]].
- * Update the stats for a given (node, feature, bin) for ordered
features, using the given label.
- * @param nodeOffset Pre-computed node offset from [[getNodeOffset]].
+ * Update the stats for a given (feature, bin), using the given label.
+ * @param nodeFeatureOffset For ordered features, this is a
pre-computed feature offset
+ * from [[getNodeFeatureOffset]].
+ * For unordered features, this is a
pre-computed
+ * (feature, left/right child) offset from
+ * [[getLeftRightNodeFeatureOffsets]].
*/
- def nodeUpdate(
- nodeOffset: Int,
- nodeIndex: Int,
- featureIndex: Int,
+ def nodeFeatureUpdate(
+ nodeFeatureOffset: Int,
binIndex: Int,
label: Double,
- instanceWeight: Double): Unit
+ instanceWeight: Double): Unit = {
+ impurityAggregator.update(allStats, nodeFeatureOffset + binIndex *
statsSize,
+ label, instanceWeight)
+ }
/**
- * Pre-compute (node, feature) offset for use with [[nodeFeatureUpdate]].
+ * Pre-compute feature offset for use with [[nodeFeatureUpdate]].
* For ordered features only.
*/
- def getNodeFeatureOffset(nodeIndex: Int, featureIndex: Int): Int
+ def getFeatureOffset(featureIndex: Int): Int = {
+ require(!isUnordered(featureIndex),
+ s"DTStatsAggregator.getNodeFeatureOffset is for ordered features
only, but was called" +
+ s" for unordered feature $featureIndex.")
+ featureOffsets(featureIndex)
+ }
/**
- * Pre-compute (node, feature) offset for use with [[nodeFeatureUpdate]].
+ * Pre-compute feature offset for use with [[nodeFeatureUpdate]].
* For unordered features only.
*/
- def getLeftRightNodeFeatureOffsets(nodeIndex: Int, featureIndex: Int):
(Int, Int) = {
+ def getLeftRightFeatureOffsets(featureIndex: Int): (Int, Int) = {
require(isUnordered(featureIndex),
s"DTStatsAggregator.getLeftRightNodeFeatureOffsets is for unordered
features only," +
s" but was called for ordered feature $featureIndex.")
- val baseOffset = getNodeFeatureOffset(nodeIndex, featureIndex)
- (baseOffset, baseOffset + (metadata.numBins(featureIndex) >> 1) *
statsSize)
+ val baseOffset = featureOffsets(featureIndex)
+ (baseOffset, baseOffset + (numBins(featureIndex) >> 1) * statsSize)
}
/**
- * Faster version of [[update]].
- * Update the stats for a given (node, feature, bin), using the given
label.
- * @param nodeFeatureOffset For ordered features, this is a
pre-computed (node, feature) offset
+ * For a given feature, merge the stats for two bins.
+ * @param nodeFeatureOffset For ordered features, this is a
pre-computed feature offset
--- End diff --
Old names in doc: "nodeFeatureOffset", "getNodeFeatureOffset",
"getLeftRightNodeFeatureOffsets"
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]