[ 
https://issues.apache.org/jira/browse/MAHOUT-1693?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14505937#comment-14505937
 ] 

Andrew Palumbo edited comment on MAHOUT-1693 at 4/22/15 1:40 AM:
-----------------------------------------------------------------

Random 5000x5000 functional matrix view:
{code}
mahout> val inCoreB = Matrices.symmetricUniformView(5000, 5000, 1234)
inCoreB: org.apache.mahout.math.Matrix = 
{
 0 =>   
{0:0.4586377101191827,1:0.07261898163580698,2:-0.4120814898385057,3:-0.34642075708405595,4:0.13562878996026145,5:0.45784520525982153,6:-0.4706505546910752,7:-0.23016615311845348,8:0.21877862831669628,9:0.479636479803259,10:0.19463854560108987,11:-0.22143915316681387,12:0.2097891013284503,13:-0.19744761560808702,14:-0.4407562546938582,15:0.3862335162012072,16:0.14407519172762487,17:0.04194022497803126,18:0.10862031391251574,19:-0.32684877921184075
 ... 
 1 =>   
{0:0.48977896201757654,1:0.2695201068510176,2:0.2035624121801051,3:0.24936953161203088,4:-0.47270888285344,5:0.06258489700284349,6:0.49054005184097776,7:-0.18983013078048971,8:0.2565323032908135,9:-0.09123189392334696,10:0.3342825154746871,11:-0.015098754906761836,12:0.279540866689215...
{code}

Add 1 to that view and return a 5000x5000 DenseMatrix 
{code}
mahout> val inCoreC = inCoreB +1
inCoreC: org.apache.mahout.math.Matrix = 
{
 0 =>   
{0:1.4586377101191828,1:1.072618981635807,2:0.5879185101614943,3:0.653579242915944,4:1.1356287899602615,5:1.4578452052598214,6:0.5293494453089248,7:0.7698338468815465,8:1.2187786283166964,9:1.479636479803259,10:1.19463854560109,11:0.7785608468331862,12:1.2097891013284503,13:0.802552384391913,14:0.5592437453061418,15:1.3862335162012072,16:1.1440751917276248,17:1.0419402249780312,18:1.1086203139125157,19:0.6731512207881593
 ... 
 1 =>   
{0:1.4897789620175765,1:1.2695201068510176,2:1.2035624121801052,3:1.2493695316120308,4:0.52729111714656,5:1.0625848970028435,6:1.4905400518409777,7:0.8101698692195103,8:1.2565323032908136,9:0.908768106076653,10:1.3342825154746871,11:0.9849012450932382,12:1.2795408666892159,13:0.5187942833296166,14:0.6908001073...
 
{code}

Random ~60% sparse Matrix of 1000x1000
{code}
mahout> val spc = new SparseMatrix(1000,1000)
spc: org.apache.mahout.math.SparseMatrix = 
{
...
mahout> val r= new Random()
r: java.util.Random = java.util.Random@27f62689
mahout> for( i <- 0 to 1000;j <-0 to 1000){ if (r.nextGaussian()> 0.6) spc(i,j) 
= 1.0}
mahout> spc
res2: org.apache.mahout.math.SparseMatrix = 
{
 406 => {5:1.0,8:1.0,10:1.0,16:1.0 ... 
 610 => {0:1.0,2:1.0,6:1.0,8:1.0,9:1.0,10:1.0,15:1.0 ... 
 636 => {5:1.0,10:1.0,15:1.0 ... 
 612 => {2:1.0,6:1.0,7:1.0,10:1.0,13:1.0,14:1.0,15:1.0,19:1.0 ... 
 727 => {4:1.0,5:1.0,10:1.0,11:1.0 ... 
 900 => {1:1.0,6:1.0,11:1.0,15:1.0,16:1.0,19:1.0 ... 
 492 => {3:1.0,5:1.0,6:1.0,8:1.0,14:1.0,16:1.0,17:1.0,19:1.0 ... 
 875 => {2:1.0,7:1.0,9:1.0,10:1.0,13:1.0,14:1.0,17:1.0 ... 
 175 => {3:1.0,6:1.0,9:1.0,11:1.0,12:1.0,13:1.0,15:1.0,17:1.0 ... 
 723 => {0:1.0,11:1.0,15:1.0 ... 
...
mahout>
{code}

Random ~60% sparse DenseMatrix 1000x1000
{code}
mahout> val dns = new DenseMatrix(1000,1000)
dns: org.apache.mahout.math.DenseMatrix = 
{
 0 =>   { ... 
 1 =>   { ... 
 2 =>   { ... 
 3 =>   { ... 
 4 =>   { ... 
 5 =>   { ... 
 6 =>   { ... 
 7 =>   { ... 
 8 =>   { ... 
 9 =>   { ... 
...
mahout> for( i <- 0 until 1000;j <-0 until 1000){ if (r.nextGaussian()> 0.6) 
dns(i,j) = 1.0}
mahout> dns
res5: org.apache.mahout.math.DenseMatrix = 
{
 0 =>   {0:1.0,1:1.0,4:1.0,6:1.0,8:1.0,9:1.0,10:1.0,12:1.0,15:1.0,17:1.0,18:1.0 
... 
 1 =>   
{0:1.0,2:1.0,4:1.0,5:1.0,6:1.0,9:1.0,10:1.0,11:1.0,13:1.0,15:1.0,16:1.0,17:1.0 
... 
 2 =>   {0:1.0,1:1.0,2:1.0,5:1.0,6:1.0,7:1.0,8:1.0,9:1.0,14:1.0,16:1.0 ... 
 3 =>   {4:1.0,5:1.0,6:1.0,9:1.0,11:1.0,18:1.0 ... 
 4 =>   {1:1.0,9:1.0,10:1.0,17:1.0,19:1.0 ... 
 5 =>   {1:1.0,2:1.0,5:1.0,6:1.0,8:1.0,12:1.0,17:1.0 ... 
 6 =>   {0:1.0,2:1.0,3:1.0,5:1.0,8:1.0,9:1.0,12:1.0,18:1.0 ... 
 7 =>   {3:1.0,9:1.0 ... 
 8 =>   {2:1.0,5:1.0,7:1.0,10:1.0,11:1.0,13:1.0,16:1.0,17:1.0 ... 
 9 =>   {0:1.0,4:1.0,6:1.0,8:1.0,9:1.0,12:1.0,14:1.0,15:1.0 ... 
...
mahout> 
{code}

SparseColumnMatrix 1000x1000:
{code}
mahout> for( i <- 0 until 1000;j <-0 until 1000){ if (r.nextGaussian()> 0.6) 
scm(i,j) = 1.0}
mahout> scm
res3: org.apache.mahout.math.SparseColumnMatrix = 
{
  0  => {8:1.0,12:1.0,17:1.0 ... 
  1  => {8:1.0,11:1.0 ... 
  2  => {0:1.0,11:1.0,14:1.0,17:1.0,18:1.0 ... 
  3  => {1:1.0,3:1.0,8:1.0,17:1.0,19:1.0 ... 
  4  => {0:1.0,1:1.0,2:1.0,3:1.0,7:1.0,9:1.0,11:1.0,12:1.0,16:1.0 ... 
  5  => {7:1.0,17:1.0 ... 
  6  => {0:1.0,3:1.0,4:1.0,11:1.0,19:1.0 ... 
  7  => {0:1.0,1:1.0,8:1.0,13:1.0 ... 
  8  => {3:1.0,4:1.0,8:1.0,14:1.0 ... 
  9  => {1:1.0,2:1.0,3:1.0,4:1.0,5:1.0,8:1.0,10:1.0 ... 
...
{code}

500000 x 500000 random FunctionalMatrixView *View ONLY*
{code}
val incCoreA = Matrices.symmetricUniformView(500000, 500000, 1234)
incCoreA: org.apache.mahout.math.Matrix = 
{
 0 =>   
{0:0.4586377101191827,1:0.07261898163580698,2:-0.4120814898385057,3:-0.34642075708405595,4:0.13562878996026145,5:0.45784520525982153,6:-0.4706505546910752,7:-0.23016615311845348,8:0.21877862831669628,9:0.479636479803259,10:0.19463854560108987,11:-0.22143915316681387,12:0.2097891013284503,13:-0.19744761560808702,14:-0.4407562546938582,15:0.3862335162012072,16:0.14407519172762487,17:0.04194022497803126,18:0.10862031391251574,19:-0.32684877921184075
 ... 
 1 =>   
{0:0.48977896201757654,1:0.2695201068510176,2:0.2035624121801051,3:0.24936953161203088,4:-0.47270888285344,5:0.06258489700284349,6:0.49054005184097776,7:-0.18983013078048971,8:0.2565323032908135,9:-0.09123189392334696,10:0.3342825154746871,11:-0.015098754906761836,12:0.27954086668921...
{code}

All tested with default {{MAHOUT_HEAPSIZE}}



was (Author: andrew_palumbo):
Random 5000x5000 functional matrix view:
{code}
mahout> val inCoreB = Matrices.symmetricUniformView(5000, 5000, 1234)
inCoreB: org.apache.mahout.math.Matrix = 
{
 0 =>   
{0:0.4586377101191827,1:0.07261898163580698,2:-0.4120814898385057,3:-0.34642075708405595,4:0.13562878996026145,5:0.45784520525982153,6:-0.4706505546910752,7:-0.23016615311845348,8:0.21877862831669628,9:0.479636479803259,10:0.19463854560108987,11:-0.22143915316681387,12:0.2097891013284503,13:-0.19744761560808702,14:-0.4407562546938582,15:0.3862335162012072,16:0.14407519172762487,17:0.04194022497803126,18:0.10862031391251574,19:-0.32684877921184075
 ... 
 1 =>   
{0:0.48977896201757654,1:0.2695201068510176,2:0.2035624121801051,3:0.24936953161203088,4:-0.47270888285344,5:0.06258489700284349,6:0.49054005184097776,7:-0.18983013078048971,8:0.2565323032908135,9:-0.09123189392334696,10:0.3342825154746871,11:-0.015098754906761836,12:0.279540866689215...
{code}

Add 1 to that view and return a 5000x5000 DenseMatrix 
{code}
mahout> val inCoreC = inCoreB +1
inCoreC: org.apache.mahout.math.Matrix = 
{
 0 =>   
{0:1.4586377101191828,1:1.072618981635807,2:0.5879185101614943,3:0.653579242915944,4:1.1356287899602615,5:1.4578452052598214,6:0.5293494453089248,7:0.7698338468815465,8:1.2187786283166964,9:1.479636479803259,10:1.19463854560109,11:0.7785608468331862,12:1.2097891013284503,13:0.802552384391913,14:0.5592437453061418,15:1.3862335162012072,16:1.1440751917276248,17:1.0419402249780312,18:1.1086203139125157,19:0.6731512207881593
 ... 
 1 =>   
{0:1.4897789620175765,1:1.2695201068510176,2:1.2035624121801052,3:1.2493695316120308,4:0.52729111714656,5:1.0625848970028435,6:1.4905400518409777,7:0.8101698692195103,8:1.2565323032908136,9:0.908768106076653,10:1.3342825154746871,11:0.9849012450932382,12:1.2795408666892159,13:0.5187942833296166,14:0.6908001073...
 
{code}

Random ~60% sparse Matrix of 1000x1000
{code}
mahout> val spc = new SparseMatrix(1000,1000)
spc: org.apache.mahout.math.SparseMatrix = 
{
...
mahout> val r= new Random()
r: java.util.Random = java.util.Random@27f62689
mahout> for( i <- 0 to 1000;j <-0 to 1000){ if (r.nextGaussian()> 0.6) spc(i,j) 
= 1.0}
mahout> spc
res2: org.apache.mahout.math.SparseMatrix = 
{
 406 => {5:1.0,8:1.0,10:1.0,16:1.0 ... 
 610 => {0:1.0,2:1.0,6:1.0,8:1.0,9:1.0,10:1.0,15:1.0 ... 
 636 => {5:1.0,10:1.0,15:1.0 ... 
 612 => {2:1.0,6:1.0,7:1.0,10:1.0,13:1.0,14:1.0,15:1.0,19:1.0 ... 
 727 => {4:1.0,5:1.0,10:1.0,11:1.0 ... 
 900 => {1:1.0,6:1.0,11:1.0,15:1.0,16:1.0,19:1.0 ... 
 492 => {3:1.0,5:1.0,6:1.0,8:1.0,14:1.0,16:1.0,17:1.0,19:1.0 ... 
 875 => {2:1.0,7:1.0,9:1.0,10:1.0,13:1.0,14:1.0,17:1.0 ... 
 175 => {3:1.0,6:1.0,9:1.0,11:1.0,12:1.0,13:1.0,15:1.0,17:1.0 ... 
 723 => {0:1.0,11:1.0,15:1.0 ... 
...
mahout>
{code}

Random ~60% sparse DenseMatrix 1000x1000
{code}
mahout> val dns = new DenseMatrix(1000,1000)
dns: org.apache.mahout.math.DenseMatrix = 
{
 0 =>   { ... 
 1 =>   { ... 
 2 =>   { ... 
 3 =>   { ... 
 4 =>   { ... 
 5 =>   { ... 
 6 =>   { ... 
 7 =>   { ... 
 8 =>   { ... 
 9 =>   { ... 
...
mahout> for( i <- 0 until 1000;j <-0 until 1000){ if (r.nextGaussian()> 0.6) 
dns(i,j) = 1.0}
mahout> dns
res5: org.apache.mahout.math.DenseMatrix = 
{
 0 =>   {0:1.0,1:1.0,4:1.0,6:1.0,8:1.0,9:1.0,10:1.0,12:1.0,15:1.0,17:1.0,18:1.0 
... 
 1 =>   
{0:1.0,2:1.0,4:1.0,5:1.0,6:1.0,9:1.0,10:1.0,11:1.0,13:1.0,15:1.0,16:1.0,17:1.0 
... 
 2 =>   {0:1.0,1:1.0,2:1.0,5:1.0,6:1.0,7:1.0,8:1.0,9:1.0,14:1.0,16:1.0 ... 
 3 =>   {4:1.0,5:1.0,6:1.0,9:1.0,11:1.0,18:1.0 ... 
 4 =>   {1:1.0,9:1.0,10:1.0,17:1.0,19:1.0 ... 
 5 =>   {1:1.0,2:1.0,5:1.0,6:1.0,8:1.0,12:1.0,17:1.0 ... 
 6 =>   {0:1.0,2:1.0,3:1.0,5:1.0,8:1.0,9:1.0,12:1.0,18:1.0 ... 
 7 =>   {3:1.0,9:1.0 ... 
 8 =>   {2:1.0,5:1.0,7:1.0,10:1.0,11:1.0,13:1.0,16:1.0,17:1.0 ... 
 9 =>   {0:1.0,4:1.0,6:1.0,8:1.0,9:1.0,12:1.0,14:1.0,15:1.0 ... 
...
mahout> 
{code}

> FunctionalMatrixView materializes row vectors in scala shell
> ------------------------------------------------------------
>
>                 Key: MAHOUT-1693
>                 URL: https://issues.apache.org/jira/browse/MAHOUT-1693
>             Project: Mahout
>          Issue Type: Bug
>          Components: Mahout spark shell, Math
>    Affects Versions: 0.10.0
>            Reporter: Suneel Marthi
>            Assignee: Andrew Palumbo
>            Priority: Blocker
>             Fix For: 0.10.1
>
>
> FunctionalMatrixView materializes row vectors in scala shell.
> Problem first reported by a user Michael Alton, Intel:
> "When I first tried to make a large matrix, I got an out of Java heap space 
> error. I increased the memory incrementally until I got it to work. “export 
> MAHOUT_HEAPSIZE=8000” didn’t work, but “export MAHOUT_HEAPSIZE=64000” did. 
> The question is why do we need so much memory? A 5000x5000 matrix of doubles 
> should only take up ~200MB of space?"
> Problem has been narrowed down to not override toString() method in 
> FunctionalMatrixView which causes it to materialize all of the row vectors 
> when run in Mahout Spark Shell.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to