I have some big computation that fails after about 5 hours claiming that
there is not enough stack space. That FriCAS was compiled with
"--dynamic-space-size 23906".

I tried to figure out where all that space was used and hit recip in sttaylor.spad. In particular, the function iDiv looked suspicious to me.

    iDiv : (ST A, ST A, A) -> ST A
    iDiv(x, rst_y, ry0) == delay
      empty? x => empty()
      c0 := frst x * ry0
      concat(c0, iDiv(rst x - c0*rst_y, rst_y, ry0))

In my understanding that means that for every new computed coefficient of the result of recip(x), there are two new streams created, namely
"c0*rst_y" and "rst x - c0*rst_y".

I might be wrong, but that looked to me like wasting memory, because the initial segments of these streams are not needed anymore, once the respective coefficient is computed, but I suspect(ed) that the memory is not freed.

Anyway, I suggest another implementation, that uses the inversion formula directly, and only keeps a list of the computed coefficients in reverse order for further computation.

With that implementation, my lengthy computation finished successfully.

The implementation can be found in my branch
"wip/improve-stream-division".

https://github.com/hemmecke/fricas/tree/wip/improve-stream-division
(patch attached)

I took care that the timings are basically the same.
See attached testfile.

Is there a chance that this could make it into FriCAS?

Suggestions for further improvements are welcome.

Ralf

--
You received this message because you are subscribed to the Google Groups "FriCAS - 
computer algebra system" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/fricas-devel/432d9d28-a1d5-4658-8677-32b5c2e64033%40gmail.com.
)clear complete
ZZ ==> Integer
QQ ==> Fraction ZZ
S ==> Stream QQ
STT A ==> StreamTaylorSeriesOperations A
INV x ==> ((recip x)$STT(QQ)) :: S

CHK(x,y) ==> (_
  for i in 1..3000 repeat if x.i ~= y.i then (print(i);break);_
  print("OK"))


)set stream calc 20

s1: S := construct [3,0,0,0,2,0,0,0,3,0,4,0,0,5]
s2: S := [i for i in 1..100]
s3: S := [i for i in 1..]
s4: S := [i+1 for i in 1..]
s5: S := [99-i for i in 1..100]
s6: S := [1+i^2 for i in 1..]

)time on

u1 := INV s1; u1.50000;  -- 13.5 s
u2 := INV s2; u2.100000; --  7.2 s
u3 := INV s3; u3.100000; --  0.3 s
u4 := INV s4; u4.5000;   -- 25.6 s
u5 := INV s5; u5.3000;   -- 20.0 s
u6 := INV s6; u6.4000;   -- 20.6 s

)co sttaylor.spad

v1 := INV s1; v1.50000;  -- 13.6 s
v2 := INV s2; v2.100000; --  6.3 s
v3 := INV s3; v3.100000; --  0.1
v4 := INV s4; v4.5000;   -- 25.9 s
v5 := INV s5; v5.3000;   -- 20.4 s
v6 := INV s6; v6.4000;   -- 18.4 s

CHK(u1,v1)
CHK(u2,v2)
CHK(u3,v3)
CHK(u4,v4)
CHK(u5,v5)
CHK(u6,v6)
From a1c9d3ba2c09f9943cef59b24578b19860a8ad83 Mon Sep 17 00:00:00 2001
From: Ralf Hemmecke <[email protected]>
Date: Sat, 27 Jul 2024 00:06:28 +0200
Subject: division of streams with less memory

---
 src/algebra/sttaylor.spad | 144 ++++++++++++++++++++++++++------------
 1 file changed, 100 insertions(+), 44 deletions(-)

diff --git a/src/algebra/sttaylor.spad b/src/algebra/sttaylor.spad
index b805ed07..7729a1d6 100644
--- a/src/algebra/sttaylor.spad
+++ b/src/algebra/sttaylor.spad
@@ -194,51 +194,107 @@ StreamTaylorSeriesOperations(A) : Exports == Implementation where
       zero? s => zro()
       map(z +-> z*s, x)
 
-    iDiv : (ST A, ST A, A) -> ST A
-    iDiv(x, rst_y, ry0) == delay
-      empty? x => empty()
-      c0 := frst x * ry0
-      concat(c0, iDiv(rst x - c0*rst_y, rst_y, ry0))
-
-    i_div1 : (ST A, ST A, A, A) -> ST A
-    i_div1(rst_x, rst_y, ry0, c0) == delay
-        empty?(rst_y) => map(z +-> z*ry0, rst_x)
-        iDiv(rst_x - c0*rst_y, rst_y, ry0)
-
-    x exquo y ==
-      y0 : A
-      for n in 1.. repeat
-        n > 1000 => return "failed"
-        empty? y => return "failed"
-        empty? x => return empty()
-        (y0 := frst y) = 0 =>
-          frst x = 0 => (x := rst x; y := rst y)
-          return "failed"
-        -- first entry in y is non-zero
-        break
-      (ry0u := recip y0) case "failed" => "failed"
-      ry0 := ry0u@A
-      x0 := frst x
-      c0 := x0*ry0
-      concat(c0, i_div1(rst x, rst y, ry0, c0))
-
-    (x : ST A) / (y : ST A) == delay
-      empty? y => error "/: division by zero"
-      empty? x => empty()
-      (ry0u := recip frst y) case "failed" =>
-        error "/: second argument is not invertible"
-      ry0 := ry0u@A
-      x0 := frst x
-      c0 := x0*ry0
-      concat(c0, i_div1(rst x, rst y, ry0, c0))
-
-    recip x ==
-      empty? x => "failed"
-      rh1 := recip frst x
-      rh1 case "failed" => "failed"
-      rh := rh1@A
+-------------------------------------------------------------------
+
+    -- We use
+    -- https://en.wikipedia.org/wiki/Formal_power_series#Multiplicative_inverse.
+    -- recip computes the zeroth coefficient and computes arguments
+    -- for restRecip to compute the remaining coefficients.
+    -- restRecip avoids zero multiplication if the tail of the yet computed
+    -- partial stream ends in zeros.
+    -- Assume that x = (a0,a1,a2,...)
+    -- Arguments for restRecip:
+    -- a = (a1, a2, a3,...)
+    -- u = - 1/a0
+    -- n = in this invocation we compute (cn, c(n+1), ...)
+    -- k = the resulting coefficients ck, c(k+1), ..., c(n-1) are equal to 0
+    -- ra = (ak,a(k+1),a(k+2),...) substream of x starting at k
+    -- revc = [c(k-1),c(k-2),...,c(0)] the first k coefficients of the result
+    --        in reverse order, c(k-1)~=0.
+
+    -- local
+    revSum(ra: ST A, rvc: List A): A ==
+        cc: A := 0
+        for c in rvc while not empty? ra repeat
+            cc := cc + frst(ra) * c
+            ra := rst ra
+        cc
+
+    -- local
+    restRecip(a: ST A, u: A, n: I, k: I, ra: ST A, revc: List A): ST A == delay
+        --assert(k=#revc)
+        --assert(for i in k .. n-1 while not empty? a repeat a := rst a; a=ra)
+        --assert(#revc>0)
+        empty? ra => empty()
+        rvc: List A := revc -- create a local variable inside 'delay'
+        cn: A := u * revSum(ra, rvc) -- u = -1/a(0)
+        zero? cn => concat(cn, restRecip(a, u, n+1, k, rst ra, rvc))
+        for i in k..n-1 repeat rvc := cons(0, rvc) -- fill rvc with zeros
+        concat(cn, restRecip(a, u, n+1, n+1, a, cons(cn, rvc)))
+
+    recip(x: ST A): UN ==
+        empty? x => "failed"
+        ua: Union(A, "failed") := recip frst x
+        ua case "failed" => "failed"
+        c0: A := ua @ A
+        concat(c0, restRecip(rst x, -c0, 1, 1, rst x, [c0])) :: UN
+
+-------------------------------------------------------------------
+
+    -- We use
+    -- https://en.wikipedia.org/wiki/Formal_power_series#Division.
+    -- / computes the zeroth coefficient and computes arguments
+    -- for restDiv to compute the remaining coefficients.
+    -- restDiv avoids zero multiplication if the tail of the yet computed
+    -- partial stream ends in zeros.
+    -- Assume that x = (b0,b1,b2,...), y = (a0,a1,a2,...).
+    -- Arguments for restDiv:
+    -- a = (a1, a2, a3,...)
+    -- u = - 1/a0
+    -- n = in this invocation we compute (cn, c(n+1), ...)
+    -- k = the resulting coefficients ck, c(k+1), ..., c(n-1) are equal to 0
+    -- ra = (ak,a(k+1),a(k+2),...) substream of x starting at k
+    -- b = (bn, b(n+1), b(n+2),...)
+    -- revc = [c(k-1),c(k-2),...,c(0)] the first k coefficients of the result
+    --        in reverse order, c(k-1)~=0.
+
+    -- local
+    restDiv(a: ST A, u: A, n: I, k: I, ra: ST A, b: ST A, revc: List A): ST A ==
       delay
-        concat(rh, iDiv(- rh * rst x, rst x, rh))
+        --assert(k=#revc)
+        --assert(for i in k .. n-1 while not empty? a repeat a := rst a; a=ra)
+        --assert(#revc>0)
+        empty? b => restRecip(a, u, n, k, ra, revc)
+        rvc: List A := revc -- create a local variable inside 'delay'
+        cn: A := u * (revSum(ra, rvc) - frst b) -- u = -1/a(0)
+        zero? cn => concat(cn, restDiv(a, u, n+1, k, rst ra, rst b, rvc))
+        for i in k..n-1 repeat rvc := cons(0, rvc) -- fill rvc with zeros
+        concat(cn, restDiv(a, u, n+1, n+1, a, rst b, cons(cn, rvc)))
+
+    ((x: ST A) exquo (y: ST A)): UN ==
+        for n in 1..1000 repeat
+            empty? y => return "failed"
+            empty? x => return empty()
+            not zero? frst y => break
+            not zero? frst x => return "failed"
+            x := rst x
+            y := rst y
+        ua: Union(A, "failed") := recip frst y
+        ua case "failed" => "failed"
+        u: A := ua @ A
+        c0 := u * frst x
+        concat(c0, restDiv(rst y, -u, 1, 1, rst y, rst x, [c0]))
+
+    ((x : ST A) / (y : ST A)): ST A == delay
+        empty? y => error "/: division by zero"
+        empty? x => empty()
+        ua: Union(A, "failed") := recip frst y
+        ua case "failed" => error "/: second argument is not invertible"
+        u: A := ua @ A
+        c0 := u * frst x
+        concat(c0, restDiv(rst y, -u, 1, 1, rst y, rst x, [c0]))
+
+-------------------------------------------------------------------
 
 --% coefficients
 
-- 
2.34.1

Reply via email to