I'm only observing from afar (so to speak) since I don't have a deep 
understanding of these systems, but in discussing with AI your 
observations, the following analysis was suggested which you might find 
useful to compare to the proofs that you have generated for yourself:

```python
Awesome — here’s a tiny, self-contained SymPy helper that scans your 
Bellman-style system, builds the **worst-case matrix** $\bar A$ (entrywise 
max of all action matrices for each row), and prints:

* max row sum $\| \bar A \|_{\infty}$ (a contraction certificate if < 1),
* max column sum $\| \bar A \|_{1}$ (a second bound),
* a **right Perron vector** $w \succ 0$ of $\bar A$ (via power iteration),
* the **weighted∞ operator norm** $\| \bar A \|_{\infty,w}=\max_i 
\frac{(\bar A w)_i}{w_i}$ (equals the spectral radius when $w$ is Perron),
* a one-line **certificate** that uniqueness & policy-iteration termination 
hold when any of those bounds is < 1.

You can paste this straight into your notebook.

```python
import sympy as sp
from sympy import Eq, Max, Matrix, Tuple

# ---------- parsing helpers ----------

def _ordered_vars(eqs):
    # keep only "v*" unknowns; tweak if you use different names
    vars_all = list(sp.ordered(Tuple(*eqs).atoms(sp.Symbol)))
    return [v for v in vars_all if v.name.startswith('v')]

def _affine_coeffs(expr, vars_list):
    """Return (coeffs list, const) for an affine expr in vars_list."""
    lin = sp.expand(expr)
    coeffs = [sp.nsimplify(lin.diff(v)) for v in vars_list]
    const  = sp.nsimplify(lin - sum(c*v for c, v in zip(coeffs, vars_list)))
    return coeffs, const

def build_barA(eqs, vars_list=None):
    """
    Build worst-case matrix \bar A: for each equation v_i = Max_k{ a_{ik}^T 
v + b_{ik} }
    (or plain affine), take entrywise maximum over actions k for row i.
    Returns (\bar A, vars_list).
    """
    if vars_list is None:
        vars_list = _ordered_vars(eqs)
    n = len(vars_list)
    idx = {v:i for i, v in enumerate(vars_list)}

    # collect per-row action coefficient lists
    row_actions = [[] for _ in range(n)]

    for E in eqs:
        # normalize to v_i = RHS (ignore equations whose LHS isn't one of 
our vars)
        if E.lhs in idx:
            i = idx[E.lhs]
            rhs = E.rhs
        elif E.rhs in idx:
            i = idx[E.rhs]
            rhs = E.lhs
        else:
            continue

        # treat Max arms as separate actions; plain affine is a single 
action
        arms = rhs.args if isinstance(rhs, Max) else (rhs,)
        for a in arms:
            coeffs, _ = _affine_coeffs(a, vars_list)
            row_actions[i].append(coeffs)

    # build \bar A as entrywise max across actions for each row
    Abar = sp.zeros(n, n)
    for i in range(n):
        if not row_actions[i]:
            continue
        for j in range(n):
            # coeffs are rationals here, so Python max is exact
            Abar[i, j] = max(sp.nsimplify(cj[j]) for cj in row_actions[i])
    return Abar, vars_list

# ---------- diagnostics & certificate ----------

def analyze_bellman(eqs):
    Abar, vars_list = build_barA(eqs)
    n = Abar.shape[0]

    # sanity: nonnegativity (warn if any negative coefficient sneaks in)
    neg_entries = [(i, j, Abar[i, j]) for i in range(n) for j in range(n) 
if Abar[i, j] < 0]
    if neg_entries:
        print(f"WARNING: {len(neg_entries)} negative entries found in \\bar 
A; uniqueness test below may not apply.")

    # row/column sum bounds (exact rationals)
    row_sums = [sp.nsimplify(sum(Abar[i, j] for j in range(n))) for i in 
range(n)]
    col_sums = [sp.nsimplify(sum(Abar[i, j] for i in range(n))) for j in 
range(n)]
    rmax = max(row_sums) if row_sums else sp.Integer(0)
    cmax = max(col_sums) if col_sums else sp.Integer(0)

    # right Perron vector via power method (float, but only for a nicer 
bound/vector)
    prec = 80
    Af = sp.N(Abar, prec)
    v  = Matrix([1] * n)
    for _ in range(100):
        v1 = Af * v
        s  = max(abs(z) for z in v1) if n else 1
        if s == 0:
            break
        v1 = v1 / s
        if (v1 - v).norm() < 1e-30:
            v = v1
            break
        v = v1

    # weighted infinity norm with weight vector v (equals rho if v is 
Perron)
    if n and all(vi != 0 for vi in v):
        Av = Af * v
        beta = max(float(Av[i] / v[i]) for i in range(n))
        w = (v / sum(v)).applyfunc(lambda z: sp.N(z, 30))  # normalize to 
sum 1 for display
    else:
        beta = float(sp.N(rmax))
        w = Matrix([sp.Rational(1, n) for _ in range(n)]) if n else 
Matrix([])

    # report
    print("=== Bellman-MDP structural scan ===")
    print(f"vars: {n},   ||\\bar A||_∞ (max row sum): {rmax}")
    print(f"          ||\\bar A||_1 (max col sum): {cmax}")
    print(f"weighted-∞ operator norm with Perron-like w: ~ {beta:.12f}")
    if n <= 12:
        print("w (weights, sum=1):", list(w))

    # certificate
    cert = None
    if rmax.is_Rational and rmax < 1:
        cert = f"Contraction in ∞-norm: ||\\bar A||_∞ = {rmax} < 1 ⇒ unique 
fixed point & policy iteration terminates."
    elif cmax.is_Rational and cmax < 1:
        cert = f"Contraction in 1-norm: ||\\bar A||_1 = {cmax} < 1 ⇒ unique 
fixed point & policy iteration terminates."
    elif beta < 1 - 1e-12:
        cert = f"Spectral certificate: ρ(\\bar A) ≤ {beta:.12f} < 1 via 
weighted ∞-norm with w ⇒ unique fixed point & PI termination."
    else:
        cert = "No contraction certificate found (<1) — system may still be 
fine, but uniqueness/termination not certified by these bounds."
    print("Certificate:", cert)

    return {
        "Abar": Abar,
        "row_sums": row_sums,
        "col_sums": col_sums,
        "rmax": rmax,
        "cmax": cmax,
        "beta_weighted_inf": beta,
        "w": w,
    }
```

### How to use

```python
# eqs = [...]  # your original list of Eq(...) with Max on the RHS (or LHS)
report = analyze_bellman(eqs)

# Quick check: if report["rmax"] < 1 (or beta_weighted_inf < 1)
# you’ve got a clean uniqueness + termination certificate.
```

### What this is certifying

* $\|\bar A\|_\infty = \max_i \sum_j \bar A_{ij} < 1$ ⇒ 
$T(v)=\max_a\{A^{(a)}v+b^{(a)}\}$ is a contraction in the $\infty$-norm, 
hence a **unique** fixed point and **policy iteration** cannot cycle.
* Even if the raw max row sum isn’t <1, the **weighted**∞ norm with a 
positive weight vector $w$ can drop the operator norm below 1. Choosing $w$ 
as (an approximation to) the **right Perron vector** of $\bar A$ makes

  $$
  \| \bar A \|_{\infty,w}=\max_i \frac{(\bar A w)_i}{w_i}\approx \rho(\bar 
A),
  $$

  so if that value is <1 you again have a clean certificate.

/c
On Saturday, September 6, 2025 at 2:17:54 PM UTC-5 hungabo...@gmail.com 
wrote:

>
> Dear Oscar, Chris,
>
> Thank you very much for all your help and contribution to this.
> Finally, I found reasonably good introductory course slides on policy 
> iteration here:
> https://web.stanford.edu/class/cme241/lecture_slides/Tour-DP.pdf
>
> This refers to Markov Decision Processes (MDP) which are essentially what 
> I'm studying in the somewhat special case of gamma == 1 and no imediate 
> rewards. My system of equations are essentially the Bell equations for 
> the strongly connected components of the underlying digraph of an MDP.
>
> Up until now I didn't even realize the similarity of my problem and MDPs. 
> Without your help I probably wouldn't have. Same with the policy 
> iteration. I cleaned up the AI's code to my liking and coding style. :-) 
> It is super-fast, even calling it on a system of 3358 equations (having 
> 2401 Max equations, with 256 Max atoms) it stabilized the policy in 4 
> iterations in less than a minute on a Raspberry Pi 5!
> That said, the linear programming method hung for a system of 2337 
> equations, having 719 Max equations, with only 45 different Max atoms! So 
> it seems policy iteration really provided the breakthrough here!
>
> The only nagging thing was that I couldn't prove uniqueness of the 
> solution, and without that the algorithm is not complete. I was pretty 
> sure there is only one unique solution, but I couldn't prove it. Nor
> could I find a counterexample in my special case. The literature I found 
> almost exclusively deals with the gamma < 1 situation, because all of the 
> proofs use the fact that the policy iteration is a gamma-contraction on an 
> appropriate Banach space, and thus Banach's fixed point theorem can be 
> used. This doesn't work at all in my case.
>
> I also couldn't prove that the iteration really stops, and won't move 
> back-and-forth between two policies, but this didn't bother me much, 
> because in such a case I could simply throw out those policies and start 
> afresh. (And in practice it didn't seem to happen, anyway.)
>
> Finally, after spending the significant time of the weekend thinking about 
> this, I finally managed to prove that in my case uniqueness of solutions 
> also holds for those kinds of systems which I'm interested in! This 
> completes the policy iteration solution, I don't need to assume 
> uniqueness, anymore, it is there.
>
> Thank you again for all your help and contributions to this project!
>
> One last question: checksol seems to check solutions by using subs instead 
> of xreplace. This makes sense of course, as in general this is how a 
> solution is supposed to be checked. Though would it not make sense to try 
> to do xreplace first? And if that satisfies the system already, then 
> return True, and only if not, then do the subs? I doubt the initial 
> xreplace would have a significant performance impact, especially compared 
> to subs, but in some (many?) cases it would be able to return the positive 
> answer significantly faster.
>
> Let me know if you think it would make sense to open an issue about this.
>
> Thanks for all the help!
> Gábor
>
>
> On Thu, 28 Aug 2025, Chris Smith wrote:
>
> > That's a surprise to me, Oscar. I would not have expected Newton to work
> > with a system involving Max like this. It is interesting, too, that SymPy
> > knows the derivative of Max. 
> > I think you have to watch out for (1) infeasible systems which would 
> cycle 
> > and (2) singluar systems. For (1) AI gives as an example,
> > import sympy as sp
> > x, m = sp.symbols('x m', real=True)
> > 
> > def step(policy):  # policy: 0 -> use m=x, 1 -> use m=-x
> >     eqs = [sp.Eq(m, x if policy == 0 else -x), sp.Eq(x, 2*m + 1)]
> >     sol = sp.solve(eqs, [x, m], dict=True)[0]
> >     next_pol = 0 if sp.Max(sol[x], -sol[x]) == sol[x] else 1
> >     return sol, next_pol
> > 
> > policy = 0
> > for _ in range(4):
> >     sol, policy = step(policy)
> >     print(f"sol={sol}, next_policy={policy},  m={sol[m]},
> >  Max(x,-x)={sp.Max(sol[x], -sol[x])}")
> > 
> > 
> > which oscillates between (x,m) = (-1,-1) and (1/3,-1/3) whereas LP would
> > indicate this as being infeasible. 
> > 
> > As an example of (2) consider `x=max(y,0);y=max(x,0)` which has solution
> > (t,t) with t>=0. LP would give a canonical solution like (0,0).
> > 
> > /c
> > On Thursday, August 28, 2025 at 9:26:42 AM UTC-5 Oscar wrote:
> > On Thu, 28 Aug 2025 at 15:05, Oscar Benjamin
> > <oscar.j....@gmail.com> wrote:
> > >
> > > So Newton iteration has converged exactly after two iterations
> > in this
> > > case although a third iteration is used to confirm
> > convergence. I
> > > think what happened is that the initial conditions did not
> > have the
> > > right branches for all Maxes but after one iteration it has
> > the right
> > > branches and a single step of Newton solves the linear
> > equations
> > > exactly.
> > >
> > > ...
> > >
> > > Once you have an approximate float solution like this you can
> > use it
> > > to check which branch of each Max holds. Then you can compute
> > the
> > > exact solution in rational numbers and verify that it holds
> > exactly if
> > > that is what is needed.
> >
> > Actually not even that is needed. Since Newton iteration
> > converges
> > exactly for this case you can just do Newton with rational
> > numbers:
> >
> > In [2]: syms = list(ordered(Tuple(*eqs).atoms(Symbol)))
> >
> > In [3]: F = Matrix([e.lhs - e.rhs for e in eqs])
> >
> > In [4]: J = F.jacobian(syms)
> >
> > In [5]: x0 = [Rational(1, 2)]*len(syms)
> >
> > In [6]: xn = Matrix(x0)
> >
> > In [7]: rep = dict(zip(syms, xn.flat()))
> >
> > In [8]: xn1 = xn - J.xreplace(rep).solve(F.xreplace(rep))
> >
> > In [9]: print(sum(map(abs, xn - xn1)).evalf(3)); xn = xn1
> > 46.0
> >
> > In [10]: rep = dict(zip(syms, xn.flat()))
> >
> > In [11]: xn1 = xn - J.xreplace(rep).solve(F.xreplace(rep))
> >
> > In [12]: print(sum(map(abs, xn - xn1)).evalf(3)); xn = xn1
> > 0.563
> >
> > In [13]: rep = dict(zip(syms, xn.flat()))
> >
> > In [14]: xn1 = xn - J.xreplace(rep).solve(F.xreplace(rep))
> >
> > In [15]: print(sum(map(abs, xn - xn1)).evalf(3)); xn = xn1
> > 0
> >
> > In [16]: print([N(e) for e in xn[:10]])
> > [0.615451987540876, 0.744935782479303, 0.669554028724062,
> > 0.507340567903750, 0.507340567903750, 0.779985609984258,
> > 0.540175431864777, 0.802311871497269, 0.646942367048034,
> > 0.792756582043413]
> >
> > In [17]: print(xn[0])
> > 
> 291110919370598802432045744050541816494242844853948214642659913815020760969
> > 
> 79324693736677856978271553874982581817081789333748397161675933583/47300345
> > 
> 96748531892582771836437248008401311189634492284731711982092309743130839491
> > 4046771654831300577107337882031715520231586587481997312000
> >
> > --
> > Oscar
> > 
> > --
> > You received this message because you are subscribed to the Google Groups
> > "sympy" group.
> > To unsubscribe from this group and stop receiving emails from it, send an
> > email to sympy+un...@googlegroups.com.
> > To view this discussion visithttps://
> groups.google.com/d/msgid/sympy/ba489ce3-cd0c-4281-b0d6-36a671c2a0a
> > fn%40googlegroups.com.
> > 
> >

-- 
You received this message because you are subscribed to the Google Groups 
"sympy" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to sympy+unsubscr...@googlegroups.com.
To view this discussion visit 
https://groups.google.com/d/msgid/sympy/44b88397-79ef-45ef-a715-65e181f4e2f8n%40googlegroups.com.

Reply via email to