Dear nouiz,
Thank you for your reply. I managed to run the function and the result is
below. Do you have any suspitions? It does look if its allocating resources
for intermediate values and preallocation might help, but I am unsure how
to do this in theano.
HostFromGpu [id A] '' 153
|GpuElemwise{Composite{(((i0 * (maximum(i1, i2) / Composite{((i0 + i1) +
i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2)))) + (i5 *
(maximum(i3, i2) / Composite{((i0 + i1) + i2)}(maximum(i1, i2), maximum(i3,
i2), maximum(i4, i2))))) + (i6 * (maximum(i4, i2) / Composite{((i0 + i1) +
i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2)))))},no_inplace} [id
B] '' 149
|GpuElemwise{Composite{((i0 + i1) + i2)}}[(0, 1)] [id C] '' 145
| |GpuElemwise{Mul}[(0, 1)] [id D] '' 122
| | |CudaNdarrayConstant{[[ 120.]]} [id E]
| | |GpuDimShuffle{0,x} [id F] '' 112
| | |GpuCAReduce{add}{0,1} [id G] '' 100
| | |GpuElemwise{Composite{(i0 * (i1 * i2))}}[(0, 2)] [id H] '' 90
| | |<CudaNdarrayType(float32, matrix)> [id I]
| | |<CudaNdarrayType(float32, matrix)> [id J]
| | |GpuReshape{2} [id K] '' 82
| | |GpuAlloc [id L] '' 56
| | | |GpuElemwise{TrueDiv}[(0, 0)] [id M] '' 47
| | | | |GpuElemwise{maximum,no_inplace} [id N] '' 34
| | | | | |GpuDimShuffle{1,0} [id O] '' 22
| | | | | | |<CudaNdarrayType(float32, matrix)> [id P]
| | | | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | | | |GpuElemwise{Composite{((i0 + i1) + i2)},no_inplace} [id
R] '' 39
| | | | |GpuElemwise{maximum,no_inplace} [id N] '' 34
| | | | |GpuElemwise{maximum,no_inplace} [id S] '' 32
| | | | | |GpuDimShuffle{1,0} [id T] '' 18
| | | | | | |<CudaNdarrayType(float32, matrix)> [id U]
| | | | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | | | |GpuElemwise{maximum,no_inplace} [id V] '' 33
| | | | |GpuDimShuffle{1,0} [id W] '' 19
| | | | | |<CudaNdarrayType(float32, matrix)> [id X]
| | | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | | |Shape_i{0} [id Y] '' 3
| | | | |<CudaNdarrayType(float32, matrix)> [id J]
| | | |TensorConstant{1} [id Z]
| | | |Shape_i{1} [id BA] '' 24
| | | | |<CudaNdarrayType(float32, matrix)> [id P]
| | | |Shape_i{0} [id BB] '' 23
| | | |<CudaNdarrayType(float32, matrix)> [id P]
| | |MakeVector{dtype='int64'} [id BC] '' 74
| | |Elemwise{Mul}[(0, 0)] [id BD] '' 64
| | | |Shape_i{1} [id BA] '' 24
| | | |Shape_i{0} [id Y] '' 3
| | |Shape_i{0} [id BB] '' 23
| |GpuElemwise{Composite{(((i0 * i1 * maximum(i2, i3)) / (maximum(i2,
i3) + maximum(i4, i3))) + ((i5 * i6 * maximum(i4, i3)) / (maximum(i2, i3) +
maximum(i4, i3))))},no_inplace} [id BE] '' 130
| | |CudaNdarrayConstant{[[ 600.]]} [id BF]
| | |GpuDimShuffle{0,x} [id BG] '' 124
| | | |GpuCAReduce{add}{0,1} [id BH] '' 114
| | | |GpuElemwise{Mul}[(0, 1)] [id BI] '' 102
| | | |<CudaNdarrayType(float32, matrix)> [id I]
| | | |GpuElemwise{Mul}[(0, 1)] [id BJ] '' 89
| | | |<CudaNdarrayType(float32, matrix)> [id J]
| | | |GpuReshape{2} [id BK] '' 81
| | | |GpuAlloc [id BL] '' 53
| | | | |GpuElemwise{TrueDiv}[(0, 0)] [id BM] '' 45
| | | | | |GpuElemwise{maximum,no_inplace} [id S] '' 32
| | | | | |GpuElemwise{Composite{((i0 + i1) + i2)},no_inplace}
[id R] '' 39
| | | | |Shape_i{0} [id Y] '' 3
| | | | |TensorConstant{1} [id Z]
| | | | |Shape_i{1} [id BN] '' 17
| | | | | |<CudaNdarrayType(float32, matrix)> [id U]
| | | | |Shape_i{0} [id BO] '' 16
| | | | |<CudaNdarrayType(float32, matrix)> [id U]
| | | |MakeVector{dtype='int64'} [id BP] '' 72
| | | |Elemwise{Mul}[(0, 0)] [id BQ] '' 62
| | | | |Shape_i{1} [id BN] '' 17
| | | | |Shape_i{0} [id Y] '' 3
| | | |Shape_i{0} [id BO] '' 16
| | |<CudaNdarrayType(float32, matrix)> [id BR]
| | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | |<CudaNdarrayType(float32, matrix)> [id BS]
| | |CudaNdarrayConstant{[[-120.]]} [id BT]
| | |GpuDimShuffle{0,x} [id BU] '' 111
| | |GpuCAReduce{add}{0,1} [id BV] '' 99
| | |GpuElemwise{Mul}[(0, 1)] [id BJ] '' 89
| |GpuElemwise{Composite{((((i0 * i1 * maximum(i2, i3)) / Composite{((i0
+ i1) + i2)}(maximum(i2, i3), maximum(i4, i3), maximum(i5, i3))) + (i6 *
(maximum(i4, i3) / Composite{((i0 + i1) + i2)}(maximum(i2, i3), maximum(i4,
i3), maximum(i5, i3))))) + ((i7 * i8 * maximum(i5, i3)) / Composite{((i0 +
i1) + i2)}(maximum(i2, i3), maximum(i4, i3), maximum(i5,
i3))))},no_inplace} [id BW] '' 141
| |CudaNdarrayConstant{[[ 360.]]} [id BX]
| |GpuDimShuffle{0,x} [id BY] '' 136
| | |GpuCAReduce{add}{0,1} [id BZ] '' 128
| | |GpuElemwise{Mul}[(0, 1)] [id CA] '' 121
| | |<CudaNdarrayType(float32, matrix)> [id I]
| | |GpuElemwise{Mul}[(0, 1)] [id CB] '' 103
| | |<CudaNdarrayType(float32, matrix)> [id J]
| | |GpuReshape{2} [id CC] '' 91
| | |GpuAlloc [id CD] '' 73
| | | |GpuElemwise{TrueDiv}[(0, 0)] [id CE] '' 63
| | | | |GpuElemwise{maximum,no_inplace} [id V] '' 33
| | | | |GpuElemwise{Composite{((i0 + i1) + i2)},no_inplace}
[id R] '' 39
| | | |Shape_i{0} [id Y] '' 3
| | | |TensorConstant{1} [id Z]
| | | |Shape_i{1} [id CF] '' 21
| | | | |<CudaNdarrayType(float32, matrix)> [id X]
| | | |Shape_i{0} [id CG] '' 20
| | | |<CudaNdarrayType(float32, matrix)> [id X]
| | |MakeVector{dtype='int64'} [id CH] '' 83
| | |Elemwise{Mul}[(0, 0)] [id CI] '' 75
| | | |Shape_i{1} [id CF] '' 21
| | | |Shape_i{0} [id Y] '' 3
| | |Shape_i{0} [id CG] '' 20
| |<CudaNdarrayType(float32, matrix)> [id CJ]
| |CudaNdarrayConstant{[[ 0.]]} [id Q]
| |<CudaNdarrayType(float32, matrix)> [id CK]
| |<CudaNdarrayType(float32, matrix)> [id CL]
| |GpuElemwise{Composite{((i0 * i1) + (i2 * i3))}}[(0, 1)] [id CM] ''
131
| | |CudaNdarrayConstant{[[ 600.]]} [id BF]
| | |GpuDimShuffle{0,x} [id CN] '' 126
| | | |GpuCAReduce{add}{0,1} [id CO] '' 116
| | | |GpuElemwise{Composite{(i0 * (i1 * i2))}}[(0, 2)] [id CP] ''
104
| | | |<CudaNdarrayType(float32, matrix)> [id I]
| | | |<CudaNdarrayType(float32, matrix)> [id J]
| | | |GpuReshape{2} [id CQ] '' 92
| | | |GpuAlloc [id CR] '' 65
| | | | |GpuElemwise{Composite{((i0 * i1) / i2)}}[(0, 1)] [id
CS] '' 54
| | | | | |GpuElemwise{maximum,no_inplace} [id V] '' 33
| | | | | |GpuElemwise{maximum,no_inplace} [id CT] '' 36
| | | | | | |GpuDimShuffle{1,0} [id CU] '' 26
| | | | | | | |<CudaNdarrayType(float32, matrix)> [id CV]
| | | | | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | | | | |GpuElemwise{Composite{(i0 * (i1 + i2))},no_inplace}
[id CW] '' 46
| | | | | |GpuElemwise{Composite{((i0 + i1) + i2)},no_inplace}
[id R] '' 39
| | | | | |GpuElemwise{maximum,no_inplace} [id CT] '' 36
| | | | | |GpuElemwise{maximum,no_inplace} [id CX] '' 35
| | | | | |GpuDimShuffle{1,0} [id CY] '' 25
| | | | | | |<CudaNdarrayType(float32, matrix)> [id CZ]
| | | | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | | | |Shape_i{0} [id Y] '' 3
| | | | |TensorConstant{1} [id Z]
| | | | |Shape_i{1} [id CF] '' 21
| | | | |Shape_i{0} [id CG] '' 20
| | | |MakeVector{dtype='int64'} [id CH] '' 83
| | |CudaNdarrayConstant{[[ 360.]]} [id BX]
| | |GpuDimShuffle{0,x} [id DA] '' 127
| | |GpuCAReduce{add}{0,1} [id DB] '' 117
| | |GpuElemwise{Mul}[(0, 1)] [id DC] '' 105
| | |<CudaNdarrayType(float32, matrix)> [id J]
| | |GpuReshape{2} [id DD] '' 93
| | |GpuAlloc [id DE] '' 66
| | | |GpuElemwise{Composite{((i0 * i1) / i2)}}[(0, 1)] [id
DF] '' 55
| | | | |GpuElemwise{maximum,no_inplace} [id V] '' 33
| | | | |GpuElemwise{maximum,no_inplace} [id CX] '' 35
| | | | |GpuElemwise{Composite{(i0 * (i1 + i2))},no_inplace}
[id CW] '' 46
| | | |Shape_i{0} [id Y] '' 3
| | | |TensorConstant{1} [id Z]
| | | |Shape_i{1} [id CF] '' 21
| | | |Shape_i{0} [id CG] '' 20
| | |MakeVector{dtype='int64'} [id CH] '' 83
| |CudaNdarrayConstant{[[-120.]]} [id BT]
| |GpuDimShuffle{0,x} [id DG] '' 125
| |GpuCAReduce{add}{0,1} [id DH] '' 115
| |GpuElemwise{Mul}[(0, 1)] [id CB] '' 103
|<CudaNdarrayType(float32, matrix)> [id DI]
|CudaNdarrayConstant{[[ 0.]]} [id Q]
|<CudaNdarrayType(float32, matrix)> [id DJ]
|<CudaNdarrayType(float32, matrix)> [id DK]
|GpuElemwise{Composite{((i0 * i1) + (i2 * i3))}}[(0, 1)] [id DL] '' 120
| |CudaNdarrayConstant{[[ 600.]]} [id BF]
| |GpuDimShuffle{0,x} [id DM] '' 109
| | |GpuCAReduce{add}{0,1} [id DN] '' 97
| | |GpuElemwise{Composite{(i0 * (i1 * i2))}}[(0, 2)] [id DO] '' 87
| | |<CudaNdarrayType(float32, matrix)> [id I]
| | |<CudaNdarrayType(float32, matrix)> [id J]
| | |GpuReshape{2} [id DP] '' 79
| | |GpuAlloc [id DQ] '' 51
| | | |GpuElemwise{TrueDiv}[(0, 0)] [id DR] '' 43
| | | | |GpuElemwise{maximum,no_inplace} [id DS] '' 30
| | | | | |GpuDimShuffle{1,0} [id DT] '' 12
| | | | | | |<CudaNdarrayType(float32, matrix)> [id DU]
| | | | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | | | |GpuElemwise{add,no_inplace} [id DV] '' 38
| | | | |GpuElemwise{maximum,no_inplace} [id DS] '' 30
| | | | |GpuElemwise{maximum,no_inplace} [id DW] '' 31
| | | | |GpuDimShuffle{1,0} [id DX] '' 13
| | | | | |<CudaNdarrayType(float32, matrix)> [id DY]
| | | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | | |Shape_i{0} [id Y] '' 3
| | | |TensorConstant{1} [id Z]
| | | |Shape_i{1} [id DZ] '' 11
| | | | |<CudaNdarrayType(float32, matrix)> [id DU]
| | | |Shape_i{0} [id EA] '' 10
| | | |<CudaNdarrayType(float32, matrix)> [id DU]
| | |MakeVector{dtype='int64'} [id EB] '' 70
| | |Elemwise{Mul}[(0, 0)] [id EC] '' 60
| | | |Shape_i{1} [id DZ] '' 11
| | | |Shape_i{0} [id Y] '' 3
| | |Shape_i{0} [id EA] '' 10
| |CudaNdarrayConstant{[[ 120.]]} [id E]
| |GpuDimShuffle{0,x} [id ED] '' 110
| |GpuCAReduce{add}{0,1} [id EE] '' 98
| |GpuElemwise{Mul}[(0, 1)] [id EF] '' 88
| |<CudaNdarrayType(float32, matrix)> [id J]
| |GpuReshape{2} [id EG] '' 80
| |GpuAlloc [id EH] '' 52
| | |GpuElemwise{TrueDiv}[(0, 0)] [id EI] '' 44
| | | |GpuElemwise{maximum,no_inplace} [id DW] '' 31
| | | |GpuElemwise{add,no_inplace} [id DV] '' 38
| | |Shape_i{0} [id Y] '' 3
| | |TensorConstant{1} [id Z]
| | |Shape_i{1} [id EJ] '' 15
| | | |<CudaNdarrayType(float32, matrix)> [id DY]
| | |Shape_i{0} [id EK] '' 14
| | |<CudaNdarrayType(float32, matrix)> [id DY]
| |MakeVector{dtype='int64'} [id EL] '' 71
| |Elemwise{Mul}[(0, 0)] [id EM] '' 61
| | |Shape_i{1} [id EJ] '' 15
| | |Shape_i{0} [id Y] '' 3
| |Shape_i{0} [id EK] '' 14
|GpuElemwise{Composite{((i0 + i1) + i2)}}[(0, 1)] [id EN] '' 142
|GpuElemwise{Mul}[(0, 1)] [id EO] '' 119
| |CudaNdarrayConstant{[[ 360.]]} [id BX]
| |GpuDimShuffle{0,x} [id EP] '' 108
| |GpuCAReduce{add}{0,1} [id EQ] '' 96
| |GpuElemwise{Composite{(i0 * (i1 * i2))}}[(0, 2)] [id ER] '' 86
| |<CudaNdarrayType(float32, matrix)> [id I]
| |<CudaNdarrayType(float32, matrix)> [id J]
| |GpuReshape{2} [id ES] '' 78
| |GpuAlloc [id ET] '' 50
| | |GpuElemwise{TrueDiv}[(0, 0)] [id EU] '' 42
| | | |GpuElemwise{maximum,no_inplace} [id EV] '' 29
| | | | |GpuDimShuffle{1,0} [id EW] '' 7
| | | | | |<CudaNdarrayType(float32, matrix)> [id EX]
| | | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | | |GpuElemwise{Composite{((i0 + i1) + i2)},no_inplace} [id
EY] '' 37
| | | |GpuElemwise{maximum,no_inplace} [id EV] '' 29
| | | |GpuElemwise{maximum,no_inplace} [id EZ] '' 27
| | | | |GpuDimShuffle{1,0} [id FA] '' 2
| | | | | |<CudaNdarrayType(float32, matrix)> [id FB]
| | | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | | |GpuElemwise{maximum,no_inplace} [id FC] '' 28
| | | |GpuDimShuffle{1,0} [id FD] '' 4
| | | | |<CudaNdarrayType(float32, matrix)> [id FE]
| | | |CudaNdarrayConstant{[[ 0.]]} [id Q]
| | |Shape_i{0} [id Y] '' 3
| | |TensorConstant{1} [id Z]
| | |Shape_i{1} [id FF] '' 9
| | | |<CudaNdarrayType(float32, matrix)> [id EX]
| | |Shape_i{0} [id FG] '' 8
| | |<CudaNdarrayType(float32, matrix)> [id EX]
| |MakeVector{dtype='int64'} [id FH] '' 69
| |Elemwise{Mul}[(0, 0)] [id FI] '' 59
| | |Shape_i{1} [id FF] '' 9
| | |Shape_i{0} [id Y] '' 3
| |Shape_i{0} [id FG] '' 8
|GpuElemwise{Composite{(((i0 * i1 * maximum(i2, i3)) / (maximum(i2,
i3) + maximum(i4, i3))) + ((i5 * i6 * maximum(i4, i3)) / (maximum(i2, i3) +
maximum(i4, i3))))},no_inplace} [id FJ] '' 129
| |CudaNdarrayConstant{[[ 600.]]} [id BF]
| |GpuDimShuffle{0,x} [id FK] '' 123
| | |GpuCAReduce{add}{0,1} [id FL] '' 113
| | |GpuElemwise{Mul}[(0, 1)] [id FM] '' 101
| | |<CudaNdarrayType(float32, matrix)> [id I]
| | |GpuElemwise{Mul}[(0, 1)] [id FN] '' 84
| | |<CudaNdarrayType(float32, matrix)> [id J]
| | |GpuReshape{2} [id FO] '' 76
| | |GpuAlloc [id FP] '' 48
| | | |GpuElemwise{TrueDiv}[(0, 0)] [id FQ] '' 40
| | | | |GpuElemwise{maximum,no_inplace} [id EZ] '' 27
| | | | |GpuElemwise{Composite{((i0 + i1) + i2)},no_inplace}
[id EY] '' 37
| | | |Shape_i{0} [id Y] '' 3
| | | |TensorConstant{1} [id Z]
| | | |Shape_i{1} [id FR] '' 1
| | | | |<CudaNdarrayType(float32, matrix)> [id FB]
| | | |Shape_i{0} [id FS] '' 0
| | | |<CudaNdarrayType(float32, matrix)> [id FB]
| | |MakeVector{dtype='int64'} [id FT] '' 67
| | |Elemwise{Mul}[(0, 0)] [id FU] '' 57
| | | |Shape_i{1} [id FR] '' 1
| | | |Shape_i{0} [id Y] '' 3
| | |Shape_i{0} [id FS] '' 0
| |<CudaNdarrayType(float32, matrix)> [id FV]
| |CudaNdarrayConstant{[[ 0.]]} [id Q]
| |<CudaNdarrayType(float32, matrix)> [id FW]
| |CudaNdarrayConstant{[[-360.]]} [id FX]
| |GpuDimShuffle{0,x} [id FY] '' 106
| |GpuCAReduce{add}{0,1} [id FZ] '' 94
| |GpuElemwise{Mul}[(0, 1)] [id FN] '' 84
|GpuElemwise{Mul}[(0, 1)] [id GA] '' 118
|CudaNdarrayConstant{[[ 120.]]} [id E]
|GpuDimShuffle{0,x} [id GB] '' 107
|GpuCAReduce{add}{0,1} [id GC] '' 95
|GpuElemwise{Mul}[(0, 1)] [id GD] '' 85
|<CudaNdarrayType(float32, matrix)> [id J]
|GpuReshape{2} [id GE] '' 77
|GpuAlloc [id GF] '' 49
| |GpuElemwise{TrueDiv}[(0, 0)] [id GG] '' 41
| | |GpuElemwise{maximum,no_inplace} [id FC] '' 28
| | |GpuElemwise{Composite{((i0 + i1) + i2)},no_inplace} [id
EY] '' 37
| |Shape_i{0} [id Y] '' 3
| |TensorConstant{1} [id Z]
| |Shape_i{1} [id GH] '' 6
| | |<CudaNdarrayType(float32, matrix)> [id FE]
| |Shape_i{0} [id GI] '' 5
| |<CudaNdarrayType(float32, matrix)> [id FE]
|MakeVector{dtype='int64'} [id GJ] '' 68
|Elemwise{Mul}[(0, 0)] [id GK] '' 58
| |Shape_i{1} [id GH] '' 6
| |Shape_i{0} [id Y] '' 3
|Shape_i{0} [id GI] '' 5
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GL] ''
139
|<CudaNdarrayType(float32, matrix)> [id BR]
|GpuElemwise{Mul}[(0, 1)] [id GM] '' 134
| |CudaNdarrayConstant{[[ 600.]]} [id BF]
| |GpuDimShuffle{0,x} [id BG] '' 124
|GpuElemwise{Composite{(((i0 * i1 * maximum(i2, i3)) / (maximum(i2, i3) +
maximum(i4, i3))) + ((i5 * i6 * maximum(i4, i3)) / (maximum(i2, i3) +
maximum(i4, i3))))},no_inplace} [id BE] '' 130
|CudaNdarrayConstant{[[ 0.]]} [id Q]
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GN] ''
140
|<CudaNdarrayType(float32, matrix)> [id BS]
|GpuElemwise{Mul}[(0, 1)] [id GO] '' 135
| |CudaNdarrayConstant{[[-120.]]} [id BT]
| |GpuDimShuffle{0,x} [id BU] '' 111
|GpuElemwise{Composite{(((i0 * i1 * maximum(i2, i3)) / (maximum(i2, i3) +
maximum(i4, i3))) + ((i5 * i6 * maximum(i4, i3)) / (maximum(i2, i3) +
maximum(i4, i3))))},no_inplace} [id BE] '' 130
|CudaNdarrayConstant{[[ 0.]]} [id Q]
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GP] ''
148
|<CudaNdarrayType(float32, matrix)> [id CJ]
|GpuElemwise{Mul}[(0, 1)] [id GQ] '' 144
| |CudaNdarrayConstant{[[ 360.]]} [id BX]
| |GpuDimShuffle{0,x} [id BY] '' 136
|GpuElemwise{Composite{((((i0 * i1 * maximum(i2, i3)) / Composite{((i0 +
i1) + i2)}(maximum(i2, i3), maximum(i4, i3), maximum(i5, i3))) + (i6 *
(maximum(i4, i3) / Composite{((i0 + i1) + i2)}(maximum(i2, i3), maximum(i4,
i3), maximum(i5, i3))))) + ((i7 * i8 * maximum(i5, i3)) / Composite{((i0 +
i1) + i2)}(maximum(i2, i3), maximum(i4, i3), maximum(i5,
i3))))},no_inplace} [id BW] '' 141
|CudaNdarrayConstant{[[ 0.]]} [id Q]
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GR] ''
146
|<CudaNdarrayType(float32, matrix)> [id CK]
|GpuElemwise{Composite{((i0 * i1) + (i2 * i3))}}[(0, 1)] [id CM] '' 131
|GpuElemwise{Composite{((((i0 * i1 * maximum(i2, i3)) / Composite{((i0 +
i1) + i2)}(maximum(i2, i3), maximum(i4, i3), maximum(i5, i3))) + (i6 *
(maximum(i4, i3) / Composite{((i0 + i1) + i2)}(maximum(i2, i3), maximum(i4,
i3), maximum(i5, i3))))) + ((i7 * i8 * maximum(i5, i3)) / Composite{((i0 +
i1) + i2)}(maximum(i2, i3), maximum(i4, i3), maximum(i5,
i3))))},no_inplace} [id BW] '' 141
|CudaNdarrayConstant{[[ 0.]]} [id Q]
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GS] ''
147
|<CudaNdarrayType(float32, matrix)> [id CL]
|GpuElemwise{Mul}[(0, 1)] [id GT] '' 143
| |CudaNdarrayConstant{[[-120.]]} [id BT]
| |GpuDimShuffle{0,x} [id DG] '' 125
|GpuElemwise{Composite{((((i0 * i1 * maximum(i2, i3)) / Composite{((i0 +
i1) + i2)}(maximum(i2, i3), maximum(i4, i3), maximum(i5, i3))) + (i6 *
(maximum(i4, i3) / Composite{((i0 + i1) + i2)}(maximum(i2, i3), maximum(i4,
i3), maximum(i5, i3))))) + ((i7 * i8 * maximum(i5, i3)) / Composite{((i0 +
i1) + i2)}(maximum(i2, i3), maximum(i4, i3), maximum(i5,
i3))))},no_inplace} [id BW] '' 141
|CudaNdarrayConstant{[[ 0.]]} [id Q]
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GU] ''
152
|<CudaNdarrayType(float32, matrix)> [id DI]
|GpuElemwise{Composite{((i0 + i1) + i2)}}[(0, 1)] [id C] '' 145
|GpuElemwise{Composite{(((i0 * (maximum(i1, i2) / Composite{((i0 + i1) +
i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2)))) + (i5 *
(maximum(i3, i2) / Composite{((i0 + i1) + i2)}(maximum(i1, i2), maximum(i3,
i2), maximum(i4, i2))))) + (i6 * (maximum(i4, i2) / Composite{((i0 + i1) +
i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2)))))},no_inplace} [id
B] '' 149
|CudaNdarrayConstant{[[ 0.]]} [id Q]
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GV] ''
151
|<CudaNdarrayType(float32, matrix)> [id DJ]
|GpuElemwise{Composite{((i0 * i1) + (i2 * i3))}}[(0, 1)] [id DL] '' 120
|GpuElemwise{Composite{(((i0 * (maximum(i1, i2) / Composite{((i0 + i1) +
i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2)))) + (i5 *
(maximum(i3, i2) / Composite{((i0 + i1) + i2)}(maximum(i1, i2), maximum(i3,
i2), maximum(i4, i2))))) + (i6 * (maximum(i4, i2) / Composite{((i0 + i1) +
i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2)))))},no_inplace} [id
B] '' 149
|CudaNdarrayConstant{[[ 0.]]} [id Q]
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GW] ''
150
|<CudaNdarrayType(float32, matrix)> [id DK]Function profiling
|GpuElemwise{Composite{((i0 + i1) + i2)}}[(0, 1)] [id EN] '' 142
|GpuElemwise{Composite{(((i0 * (maximum(i1, i2) / Composite{((i0 + i1) +
i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2)))) + (i5 *
(maximum(i3, i2) / Composite{((i0 + i1) + i2)}(maximum(i1, i2), maximum(i3,
i2), maximum(i4, i2))))) + (i6 * (maximum(i4, i2) / Composite{((i0 + i1) +
i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2)))))},no_inplace} [id
B] '' 149
|CudaNdarrayConstant{[[ 0.]]} [id Q]
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GX] ''
138
|<CudaNdarrayType(float32, matrix)> [id FV]
|GpuElemwise{Mul}[(0, 1)] [id GY] '' 133
| |CudaNdarrayConstant{[[ 600.]]} [id BF]
| |GpuDimShuffle{0,x} [id FK] '' 123
|GpuElemwise{Composite{(((i0 * i1 * maximum(i2, i3)) / (maximum(i2, i3) +
maximum(i4, i3))) + ((i5 * i6 * maximum(i4, i3)) / (maximum(i2, i3) +
maximum(i4, i3))))},no_inplace} [id FJ] '' 129
|CudaNdarrayConstant{[[ 0.]]} [id Q]
GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)] [id GZ] ''
137
|<CudaNdarrayType(float32, matrix)> [id FW]
|GpuElemwise{Mul}[(0, 1)] [id HA] '' 132
| |CudaNdarrayConstant{[[-360.]]} [id FX]
| |GpuDimShuffle{0,x} [id FY] '' 106
|GpuElemwise{Composite{(((i0 * i1 * maximum(i2, i3)) / (maximum(i2, i3) +
maximum(i4, i3))) + ((i5 * i6 * maximum(i4, i3)) / (maximum(i2, i3) +
maximum(i4, i3))))},no_inplace} [id FJ] '' 129
|CudaNdarrayConstant{[[ 0.]]} [id Q]
None
On Tuesday, 7 February 2017 23:52:17 UTC+2, nouiz wrote:
>
> There is a high quantity of GpuAlloc. What you have shown don't tell us
> what need it in Theano. Can you run the theano function with profiling, and
> before the script end call theano.debugprint(your_theano_function) and send
> this output? It will tell us what need it in the graph.
>
> On Fri, Feb 3, 2017 at 4:22 AM Šarūnas S. <[email protected] <javascript:>>
> wrote:
>
>> I wrote a script in theano and started profiling it. What I noticed is
>> GPU spends most of the time in GpuAlloc .
>>
>> Could somebody explain me why this is happening and how I could reduce it?
>> In C or C++ I would preallocate it, but not sure how to do this in
>> theano.
>>
>> I am running on Windows 8.1 with Nvidia GTX 1070 with Theano
>> @ 0.9.0dev4.dev-3c0be3d94102ac6864b2e5ab52ae96d07c6375c6
>>
>>
>> I am attaching extensive profile result below:
>>
>> Function profiling
>> ==================
>> Message: Sum of all(2) printed profiles at exit excluding Scan op
>> profile.
>> Time in 200 calls to Function.__call__: 3.463001e+00s
>> Time in Function.fn.__call__: 3.451001e+00s (99.653%)
>> Time in thunks: 3.425293e+00s (98.911%)
>> Total compile time: 1.413800e+01s
>> Number of Apply nodes: 590
>> Theano Optimizer time: 1.158200e+01s
>> Theano validate time: 9.390018e-01s
>> Theano Linker time (includes C, CUDA code generation/compiling):
>> 2.107000e+00s
>> Import time 3.500128e-02s
>> Node make_thunk time 2.042000e+00s
>> Node GpuCAReduce{add}{0,1}(GpuElemwise{Composite{(i0 * (i1 *
>> i2))}}[(0, 2)].0) time 9.000063e-03s
>> Node GpuCAReduce{add}{0,1}(GpuElemwise{Mul}[(0, 1)].0) time
>> 7.999897e-03s
>> Node GpuDimShuffle{0,x}(GpuCAReduce{add}{0,1}.0) time
>> 6.999969e-03s
>> Node Shape_i{1}(<CudaNdarrayType(float32, matrix)>) time
>> 4.999876e-03s
>> Node GpuElemwise{Mul}[(0, 1)](CudaNdarrayConstant{[[ 240.]]},
>> GpuDimShuffle{0,x}.0) time 4.999876e-03s
>>
>>
>> Time in all call to theano.grad() 0.000000e+00s
>> Time since theano import 41.580s
>> Class
>> ---
>> <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply>
>> <Class name>
>> 90.5% 90.5% 3.100s 3.37e-04s C 9200 92
>> theano.sandbox.cuda.basic_ops.GpuAlloc
>> 7.4% 97.9% 0.254s 4.19e-06s C 60600 606
>> theano.sandbox.cuda.basic_ops.GpuElemwise
>> 1.0% 98.9% 0.034s 2.77e-06s C 12200 122
>> theano.sandbox.cuda.basic_ops.GpuCAReduce
>> 0.5% 99.4% 0.017s 1.84e-06s C 9200 92
>> theano.sandbox.cuda.basic_ops.GpuReshape
>> 0.5% 99.9% 0.016s 7.45e-07s C 21400 214
>> theano.sandbox.cuda.basic_ops.GpuDimShuffle
>> 0.1% 99.9% 0.003s 1.57e-06s C 1900 19
>> theano.tensor.elemwise.Elemwise
>> 0.1% 100.0% 0.002s 5.24e-07s C 3800 38
>> theano.compile.ops.Shape_i
>> 0.0% 100.0% 0.000s 0.00e+00s C 1900 19
>> theano.tensor.opt.MakeVector
>> ... (remaining 0 Classes account for 0.00%(0.00s) of the runtime)
>>
>>
>> Ops
>> ---
>> <% time> <sum %> <apply time> <time per call> <type> <#call> <#apply>
>> <Op name>
>> 90.5% 90.5% 3.100s 3.37e-04s C 9200 92
>> GpuAlloc
>> 1.7% 92.2% 0.058s 4.41e-06s C 13100 131
>> GpuElemwise{Mul}[(0, 1)]
>> 1.0% 93.2% 0.034s 3.21e-06s C 10600 106
>> GpuElemwise{maximum,no_inplace}
>> 1.0% 94.2% 0.034s 2.77e-06s C 12200 122
>> GpuCAReduce{add}{0,1}
>> 0.7% 94.8% 0.023s 3.54e-06s C 6500 65
>> GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)}}[(0, 0)]
>> 0.5% 95.4% 0.018s 3.27e-06s C 5500 55
>> GpuElemwise{mul,no_inplace}
>> 0.5% 95.9% 0.018s 4.61e-06s C 3900 39
>> GpuElemwise{Composite{((i0 * i1) / i2)}}[(0, 1)]
>> 0.5% 96.4% 0.017s 1.84e-06s C 9200 92
>> GpuReshape{2}
>> 0.4% 96.8% 0.014s 4.33e-06s C 3200 32
>> GpuElemwise{Composite{(i0 * (i1 * i2))}}[(0, 2)]
>> 0.2% 97.0% 0.008s 8.69e-07s C 9200 92
>> GpuDimShuffle{1,0}
>> 0.2% 97.3% 0.008s 5.33e-06s C 1500 15
>> GpuElemwise{Composite{((i0 * i1) / i2)},no_inplace}
>> 0.2% 97.5% 0.008s 6.52e-07s C 12200 122
>> GpuDimShuffle{0,x}
>> 0.2% 97.7% 0.007s 4.38e-06s C 1600 16
>> GpuElemwise{Composite{(((i0 * i1 * maximum(i2, i3)) / (maximum(i2, i3) +
>> maximum(i4, i3))) + ((i5 * i6 * maximum(i4, i3)) / (maximum(i2, i3) +
>> maximum(i4, i3))))},no_inplace}
>> 0.2% 97.9% 0.007s 2.92e-06s C 2400 24
>> GpuElemwise{Composite{maximum(((i0 + i1) - i2), i3)},no_inplace}
>> 0.2% 98.1% 0.007s 8.75e-06s C 800 8
>> GpuElemwise{Composite{((i0 * i1 * i2) / i3)}}[(0, 2)]
>> 0.2% 98.3% 0.007s 8.73e-06s C 800 8
>> GpuElemwise{Composite{((i0 * i1) / i2)}}[(0, 0)]
>> 0.2% 98.5% 0.006s 3.54e-06s C 1700 17
>> GpuElemwise{true_div,no_inplace}
>> 0.1% 98.6% 0.005s 5.02e-06s C 1000 10
>> GpuElemwise{Composite{(i0 * (i1 + i2))},no_inplace}
>> 0.1% 98.8% 0.005s 9.99e-06s C 500 5
>> GpuElemwise{Composite{(((i0 + i1) + i2) + i3)},no_inplace}
>> 0.1% 98.9% 0.004s 6.65e-06s C 600 6
>> GpuElemwise{Composite{(((i0 * (maximum(i1, i2) / Composite{((i0 + i1) +
>> i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2)))) + ((i5 * i6 *
>> maximum(i3, i2)) / Composite{((i0 + i1) + i2)}(maximum(i1, i2), maximum(
>> i3, i2), maximum(i4, i2)))) + ((i7 * i8 * maximum(i4, i2)) / Composite{((i0
>> + i1) + i2)}(maximum(i1, i2), maximum(i3, i2), maximum(i4, i2))))},
>> no_inplace}
>> ... (remaining 33 Ops account for 1.11%(0.04s) of the runtime)
>>
>>
>> Apply
>> ------
>> <% time> <sum %> <apply time> <time per call> <#call> <id> <Apply name>
>> 1.6% 1.6% 0.055s 5.50e-04s 100 188 GpuAlloc(
>> GpuElemwise{Composite{((i0 * i1) / i2)},no_inplace}.0, TensorConstant{
>> 1326}, TensorConstant{1}, Shape_i{1}.0, Shape_i{0}.0)
>> 1.6% 3.2% 0.055s 5.50e-04s 100 217 GpuAlloc(
>> GpuElemwise{Composite{((i0 * i1) / i2)}}[(0, 1)].0, TensorConstant{1326},
>> TensorConstant{1}, Shape_i{1}.0, Shape_i{0}.0)
>> 1.6% 4.8% 0.055s 5.50e-04s 100 224 GpuAlloc(
>> GpuElemwise{Composite{((i0 * i1 * i2) / i3)}}[(0, 2)].0, TensorConstant{
>> 1326}, TensorConstant{1}, Shape_i{1}.0, Shape_i{0}.0)
>> 1.6% 6.4% 0.055s 5.50e-04s 100 183 GpuAlloc(
>> GpuElemwise{Composite{((i0 * i1) / i2)}}[(0, 1)].0, TensorConstant{1326},
>> TensorConstant{1}, Shape_i{1}.0, Shape_i{0}.0)
>> 1.6% 8.0% 0.054s 5.39e-04s 100 186 GpuAlloc(
>> GpuElemwise{Composite{((i0 * i1) / i2)},no_inplace}.0, TensorConstant{
>> 1326}, TensorConstant{1}, Shape_i{1}.0, Shape_i{0}.0)
>> 1.5% 9.5% 0.053s 5.30e-04s 100 154 GpuAlloc(
>> GpuElemwise{true_div
>>
>
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.