[
https://issues.apache.org/jira/browse/MAHOUT-1693?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14505937#comment-14505937
]
Andrew Palumbo edited comment on MAHOUT-1693 at 4/22/15 1:40 AM:
-----------------------------------------------------------------
Random 5000x5000 functional matrix view:
{code}
mahout> val inCoreB = Matrices.symmetricUniformView(5000, 5000, 1234)
inCoreB: org.apache.mahout.math.Matrix =
{
0 =>
{0:0.4586377101191827,1:0.07261898163580698,2:-0.4120814898385057,3:-0.34642075708405595,4:0.13562878996026145,5:0.45784520525982153,6:-0.4706505546910752,7:-0.23016615311845348,8:0.21877862831669628,9:0.479636479803259,10:0.19463854560108987,11:-0.22143915316681387,12:0.2097891013284503,13:-0.19744761560808702,14:-0.4407562546938582,15:0.3862335162012072,16:0.14407519172762487,17:0.04194022497803126,18:0.10862031391251574,19:-0.32684877921184075
...
1 =>
{0:0.48977896201757654,1:0.2695201068510176,2:0.2035624121801051,3:0.24936953161203088,4:-0.47270888285344,5:0.06258489700284349,6:0.49054005184097776,7:-0.18983013078048971,8:0.2565323032908135,9:-0.09123189392334696,10:0.3342825154746871,11:-0.015098754906761836,12:0.279540866689215...
{code}
Add 1 to that view and return a 5000x5000 DenseMatrix
{code}
mahout> val inCoreC = inCoreB +1
inCoreC: org.apache.mahout.math.Matrix =
{
0 =>
{0:1.4586377101191828,1:1.072618981635807,2:0.5879185101614943,3:0.653579242915944,4:1.1356287899602615,5:1.4578452052598214,6:0.5293494453089248,7:0.7698338468815465,8:1.2187786283166964,9:1.479636479803259,10:1.19463854560109,11:0.7785608468331862,12:1.2097891013284503,13:0.802552384391913,14:0.5592437453061418,15:1.3862335162012072,16:1.1440751917276248,17:1.0419402249780312,18:1.1086203139125157,19:0.6731512207881593
...
1 =>
{0:1.4897789620175765,1:1.2695201068510176,2:1.2035624121801052,3:1.2493695316120308,4:0.52729111714656,5:1.0625848970028435,6:1.4905400518409777,7:0.8101698692195103,8:1.2565323032908136,9:0.908768106076653,10:1.3342825154746871,11:0.9849012450932382,12:1.2795408666892159,13:0.5187942833296166,14:0.6908001073...
{code}
Random ~60% sparse Matrix of 1000x1000
{code}
mahout> val spc = new SparseMatrix(1000,1000)
spc: org.apache.mahout.math.SparseMatrix =
{
...
mahout> val r= new Random()
r: java.util.Random = java.util.Random@27f62689
mahout> for( i <- 0 to 1000;j <-0 to 1000){ if (r.nextGaussian()> 0.6) spc(i,j)
= 1.0}
mahout> spc
res2: org.apache.mahout.math.SparseMatrix =
{
406 => {5:1.0,8:1.0,10:1.0,16:1.0 ...
610 => {0:1.0,2:1.0,6:1.0,8:1.0,9:1.0,10:1.0,15:1.0 ...
636 => {5:1.0,10:1.0,15:1.0 ...
612 => {2:1.0,6:1.0,7:1.0,10:1.0,13:1.0,14:1.0,15:1.0,19:1.0 ...
727 => {4:1.0,5:1.0,10:1.0,11:1.0 ...
900 => {1:1.0,6:1.0,11:1.0,15:1.0,16:1.0,19:1.0 ...
492 => {3:1.0,5:1.0,6:1.0,8:1.0,14:1.0,16:1.0,17:1.0,19:1.0 ...
875 => {2:1.0,7:1.0,9:1.0,10:1.0,13:1.0,14:1.0,17:1.0 ...
175 => {3:1.0,6:1.0,9:1.0,11:1.0,12:1.0,13:1.0,15:1.0,17:1.0 ...
723 => {0:1.0,11:1.0,15:1.0 ...
...
mahout>
{code}
Random ~60% sparse DenseMatrix 1000x1000
{code}
mahout> val dns = new DenseMatrix(1000,1000)
dns: org.apache.mahout.math.DenseMatrix =
{
0 => { ...
1 => { ...
2 => { ...
3 => { ...
4 => { ...
5 => { ...
6 => { ...
7 => { ...
8 => { ...
9 => { ...
...
mahout> for( i <- 0 until 1000;j <-0 until 1000){ if (r.nextGaussian()> 0.6)
dns(i,j) = 1.0}
mahout> dns
res5: org.apache.mahout.math.DenseMatrix =
{
0 => {0:1.0,1:1.0,4:1.0,6:1.0,8:1.0,9:1.0,10:1.0,12:1.0,15:1.0,17:1.0,18:1.0
...
1 =>
{0:1.0,2:1.0,4:1.0,5:1.0,6:1.0,9:1.0,10:1.0,11:1.0,13:1.0,15:1.0,16:1.0,17:1.0
...
2 => {0:1.0,1:1.0,2:1.0,5:1.0,6:1.0,7:1.0,8:1.0,9:1.0,14:1.0,16:1.0 ...
3 => {4:1.0,5:1.0,6:1.0,9:1.0,11:1.0,18:1.0 ...
4 => {1:1.0,9:1.0,10:1.0,17:1.0,19:1.0 ...
5 => {1:1.0,2:1.0,5:1.0,6:1.0,8:1.0,12:1.0,17:1.0 ...
6 => {0:1.0,2:1.0,3:1.0,5:1.0,8:1.0,9:1.0,12:1.0,18:1.0 ...
7 => {3:1.0,9:1.0 ...
8 => {2:1.0,5:1.0,7:1.0,10:1.0,11:1.0,13:1.0,16:1.0,17:1.0 ...
9 => {0:1.0,4:1.0,6:1.0,8:1.0,9:1.0,12:1.0,14:1.0,15:1.0 ...
...
mahout>
{code}
SparseColumnMatrix 1000x1000:
{code}
mahout> for( i <- 0 until 1000;j <-0 until 1000){ if (r.nextGaussian()> 0.6)
scm(i,j) = 1.0}
mahout> scm
res3: org.apache.mahout.math.SparseColumnMatrix =
{
0 => {8:1.0,12:1.0,17:1.0 ...
1 => {8:1.0,11:1.0 ...
2 => {0:1.0,11:1.0,14:1.0,17:1.0,18:1.0 ...
3 => {1:1.0,3:1.0,8:1.0,17:1.0,19:1.0 ...
4 => {0:1.0,1:1.0,2:1.0,3:1.0,7:1.0,9:1.0,11:1.0,12:1.0,16:1.0 ...
5 => {7:1.0,17:1.0 ...
6 => {0:1.0,3:1.0,4:1.0,11:1.0,19:1.0 ...
7 => {0:1.0,1:1.0,8:1.0,13:1.0 ...
8 => {3:1.0,4:1.0,8:1.0,14:1.0 ...
9 => {1:1.0,2:1.0,3:1.0,4:1.0,5:1.0,8:1.0,10:1.0 ...
...
{code}
500000 x 500000 random FunctionalMatrixView *View ONLY*
{code}
val incCoreA = Matrices.symmetricUniformView(500000, 500000, 1234)
incCoreA: org.apache.mahout.math.Matrix =
{
0 =>
{0:0.4586377101191827,1:0.07261898163580698,2:-0.4120814898385057,3:-0.34642075708405595,4:0.13562878996026145,5:0.45784520525982153,6:-0.4706505546910752,7:-0.23016615311845348,8:0.21877862831669628,9:0.479636479803259,10:0.19463854560108987,11:-0.22143915316681387,12:0.2097891013284503,13:-0.19744761560808702,14:-0.4407562546938582,15:0.3862335162012072,16:0.14407519172762487,17:0.04194022497803126,18:0.10862031391251574,19:-0.32684877921184075
...
1 =>
{0:0.48977896201757654,1:0.2695201068510176,2:0.2035624121801051,3:0.24936953161203088,4:-0.47270888285344,5:0.06258489700284349,6:0.49054005184097776,7:-0.18983013078048971,8:0.2565323032908135,9:-0.09123189392334696,10:0.3342825154746871,11:-0.015098754906761836,12:0.27954086668921...
{code}
All tested with default {{MAHOUT_HEAPSIZE}}
was (Author: andrew_palumbo):
Random 5000x5000 functional matrix view:
{code}
mahout> val inCoreB = Matrices.symmetricUniformView(5000, 5000, 1234)
inCoreB: org.apache.mahout.math.Matrix =
{
0 =>
{0:0.4586377101191827,1:0.07261898163580698,2:-0.4120814898385057,3:-0.34642075708405595,4:0.13562878996026145,5:0.45784520525982153,6:-0.4706505546910752,7:-0.23016615311845348,8:0.21877862831669628,9:0.479636479803259,10:0.19463854560108987,11:-0.22143915316681387,12:0.2097891013284503,13:-0.19744761560808702,14:-0.4407562546938582,15:0.3862335162012072,16:0.14407519172762487,17:0.04194022497803126,18:0.10862031391251574,19:-0.32684877921184075
...
1 =>
{0:0.48977896201757654,1:0.2695201068510176,2:0.2035624121801051,3:0.24936953161203088,4:-0.47270888285344,5:0.06258489700284349,6:0.49054005184097776,7:-0.18983013078048971,8:0.2565323032908135,9:-0.09123189392334696,10:0.3342825154746871,11:-0.015098754906761836,12:0.279540866689215...
{code}
Add 1 to that view and return a 5000x5000 DenseMatrix
{code}
mahout> val inCoreC = inCoreB +1
inCoreC: org.apache.mahout.math.Matrix =
{
0 =>
{0:1.4586377101191828,1:1.072618981635807,2:0.5879185101614943,3:0.653579242915944,4:1.1356287899602615,5:1.4578452052598214,6:0.5293494453089248,7:0.7698338468815465,8:1.2187786283166964,9:1.479636479803259,10:1.19463854560109,11:0.7785608468331862,12:1.2097891013284503,13:0.802552384391913,14:0.5592437453061418,15:1.3862335162012072,16:1.1440751917276248,17:1.0419402249780312,18:1.1086203139125157,19:0.6731512207881593
...
1 =>
{0:1.4897789620175765,1:1.2695201068510176,2:1.2035624121801052,3:1.2493695316120308,4:0.52729111714656,5:1.0625848970028435,6:1.4905400518409777,7:0.8101698692195103,8:1.2565323032908136,9:0.908768106076653,10:1.3342825154746871,11:0.9849012450932382,12:1.2795408666892159,13:0.5187942833296166,14:0.6908001073...
{code}
Random ~60% sparse Matrix of 1000x1000
{code}
mahout> val spc = new SparseMatrix(1000,1000)
spc: org.apache.mahout.math.SparseMatrix =
{
...
mahout> val r= new Random()
r: java.util.Random = java.util.Random@27f62689
mahout> for( i <- 0 to 1000;j <-0 to 1000){ if (r.nextGaussian()> 0.6) spc(i,j)
= 1.0}
mahout> spc
res2: org.apache.mahout.math.SparseMatrix =
{
406 => {5:1.0,8:1.0,10:1.0,16:1.0 ...
610 => {0:1.0,2:1.0,6:1.0,8:1.0,9:1.0,10:1.0,15:1.0 ...
636 => {5:1.0,10:1.0,15:1.0 ...
612 => {2:1.0,6:1.0,7:1.0,10:1.0,13:1.0,14:1.0,15:1.0,19:1.0 ...
727 => {4:1.0,5:1.0,10:1.0,11:1.0 ...
900 => {1:1.0,6:1.0,11:1.0,15:1.0,16:1.0,19:1.0 ...
492 => {3:1.0,5:1.0,6:1.0,8:1.0,14:1.0,16:1.0,17:1.0,19:1.0 ...
875 => {2:1.0,7:1.0,9:1.0,10:1.0,13:1.0,14:1.0,17:1.0 ...
175 => {3:1.0,6:1.0,9:1.0,11:1.0,12:1.0,13:1.0,15:1.0,17:1.0 ...
723 => {0:1.0,11:1.0,15:1.0 ...
...
mahout>
{code}
Random ~60% sparse DenseMatrix 1000x1000
{code}
mahout> val dns = new DenseMatrix(1000,1000)
dns: org.apache.mahout.math.DenseMatrix =
{
0 => { ...
1 => { ...
2 => { ...
3 => { ...
4 => { ...
5 => { ...
6 => { ...
7 => { ...
8 => { ...
9 => { ...
...
mahout> for( i <- 0 until 1000;j <-0 until 1000){ if (r.nextGaussian()> 0.6)
dns(i,j) = 1.0}
mahout> dns
res5: org.apache.mahout.math.DenseMatrix =
{
0 => {0:1.0,1:1.0,4:1.0,6:1.0,8:1.0,9:1.0,10:1.0,12:1.0,15:1.0,17:1.0,18:1.0
...
1 =>
{0:1.0,2:1.0,4:1.0,5:1.0,6:1.0,9:1.0,10:1.0,11:1.0,13:1.0,15:1.0,16:1.0,17:1.0
...
2 => {0:1.0,1:1.0,2:1.0,5:1.0,6:1.0,7:1.0,8:1.0,9:1.0,14:1.0,16:1.0 ...
3 => {4:1.0,5:1.0,6:1.0,9:1.0,11:1.0,18:1.0 ...
4 => {1:1.0,9:1.0,10:1.0,17:1.0,19:1.0 ...
5 => {1:1.0,2:1.0,5:1.0,6:1.0,8:1.0,12:1.0,17:1.0 ...
6 => {0:1.0,2:1.0,3:1.0,5:1.0,8:1.0,9:1.0,12:1.0,18:1.0 ...
7 => {3:1.0,9:1.0 ...
8 => {2:1.0,5:1.0,7:1.0,10:1.0,11:1.0,13:1.0,16:1.0,17:1.0 ...
9 => {0:1.0,4:1.0,6:1.0,8:1.0,9:1.0,12:1.0,14:1.0,15:1.0 ...
...
mahout>
{code}
> FunctionalMatrixView materializes row vectors in scala shell
> ------------------------------------------------------------
>
> Key: MAHOUT-1693
> URL: https://issues.apache.org/jira/browse/MAHOUT-1693
> Project: Mahout
> Issue Type: Bug
> Components: Mahout spark shell, Math
> Affects Versions: 0.10.0
> Reporter: Suneel Marthi
> Assignee: Andrew Palumbo
> Priority: Blocker
> Fix For: 0.10.1
>
>
> FunctionalMatrixView materializes row vectors in scala shell.
> Problem first reported by a user Michael Alton, Intel:
> "When I first tried to make a large matrix, I got an out of Java heap space
> error. I increased the memory incrementally until I got it to work. “export
> MAHOUT_HEAPSIZE=8000” didn’t work, but “export MAHOUT_HEAPSIZE=64000” did.
> The question is why do we need so much memory? A 5000x5000 matrix of doubles
> should only take up ~200MB of space?"
> Problem has been narrowed down to not override toString() method in
> FunctionalMatrixView which causes it to materialize all of the row vectors
> when run in Mahout Spark Shell.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)