Repository: mahout Updated Branches: refs/heads/master 1f3566d35 -> 8f4ee88fb
MAHOUT-1866: Add matrix-to-tsv string function, this closes apache/mahout#237 Project: http://git-wip-us.apache.org/repos/asf/mahout/repo Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/8f4ee88f Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/8f4ee88f Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/8f4ee88f Branch: refs/heads/master Commit: 8f4ee88fb40710d983ea3fb6ad008317f6c00936 Parents: 1f3566d Author: smarthi <[email protected]> Authored: Sun May 29 13:06:59 2016 -0400 Committer: smarthi <[email protected]> Committed: Sun May 29 13:06:59 2016 -0400 ---------------------------------------------------------------------- .../org/apache/mahout/math/drm/package.scala | 36 ++++++++++++++++++++ 1 file changed, 36 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/mahout/blob/8f4ee88f/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala ---------------------------------------------------------------------- diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala index 291c538..86c7054 100644 --- a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala +++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala @@ -148,6 +148,42 @@ package object drm { def drmSampleKRows[K](drmX: DrmLike[K], numSamples: Int, replacement: Boolean = false): Matrix = drmX.context.engine.drmSampleKRows(drmX, numSamples, replacement) + /** + * Convert a DRM sample into a Tab Separated Vector (TSV) to be loaded into an R-DataFrame + * for plotting and sketching + * @param drmX - DRM + * @param samplePercent - Percentage of Sample elements from the DRM to be fished out for plotting + * @tparam K + * @return TSV String + */ + def drmSampleToTSV[K](drmX: DrmLike[K], samplePercent: Double = 1): String = { + + val drmSize = drmX.checkpoint().numRows() + val sampleRatio: Double = 1.0 * samplePercent / 100 + val numSamples: Int = (drmSize * sampleRatio).toInt + + val plotMatrix = drmSampleKRows(drmX, numSamples, replacement = false) + + // Plot Matrix rows + val matrixRows = plotMatrix.numRows() + val matrixCols = plotMatrix.numCols() + + // Convert the Plot Matrix Rows to TSV + var str = "" + + for (i <- 0 until matrixRows) { + for (j <- 0 until matrixCols) { + str += plotMatrix(i, j) + if (j <= matrixCols - 2) { + str += '\t' + } + } + str += '\n' + } + + str + } + /////////////////////////////////////////////////////////// // Elementwise unary functions on distributed operands. def dexp[K](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.exp, true)
