I'm only observing from afar (so to speak) since I don't have a deep
understanding of these systems, but in discussing with AI your
observations, the following analysis was suggested which you might find
useful to compare to the proofs that you have generated for yourself:
```python
Awesome — here’s a tiny, self-contained SymPy helper that scans your
Bellman-style system, builds the **worst-case matrix** $\bar A$ (entrywise
max of all action matrices for each row), and prints:
* max row sum $\| \bar A \|_{\infty}$ (a contraction certificate if < 1),
* max column sum $\| \bar A \|_{1}$ (a second bound),
* a **right Perron vector** $w \succ 0$ of $\bar A$ (via power iteration),
* the **weighted∞ operator norm** $\| \bar A \|_{\infty,w}=\max_i
\frac{(\bar A w)_i}{w_i}$ (equals the spectral radius when $w$ is Perron),
* a one-line **certificate** that uniqueness & policy-iteration termination
hold when any of those bounds is < 1.
You can paste this straight into your notebook.
```python
import sympy as sp
from sympy import Eq, Max, Matrix, Tuple
# ---------- parsing helpers ----------
def _ordered_vars(eqs):
# keep only "v*" unknowns; tweak if you use different names
vars_all = list(sp.ordered(Tuple(*eqs).atoms(sp.Symbol)))
return [v for v in vars_all if v.name.startswith('v')]
def _affine_coeffs(expr, vars_list):
"""Return (coeffs list, const) for an affine expr in vars_list."""
lin = sp.expand(expr)
coeffs = [sp.nsimplify(lin.diff(v)) for v in vars_list]
const = sp.nsimplify(lin - sum(c*v for c, v in zip(coeffs, vars_list)))
return coeffs, const
def build_barA(eqs, vars_list=None):
"""
Build worst-case matrix \bar A: for each equation v_i = Max_k{ a_{ik}^T
v + b_{ik} }
(or plain affine), take entrywise maximum over actions k for row i.
Returns (\bar A, vars_list).
"""
if vars_list is None:
vars_list = _ordered_vars(eqs)
n = len(vars_list)
idx = {v:i for i, v in enumerate(vars_list)}
# collect per-row action coefficient lists
row_actions = [[] for _ in range(n)]
for E in eqs:
# normalize to v_i = RHS (ignore equations whose LHS isn't one of
our vars)
if E.lhs in idx:
i = idx[E.lhs]
rhs = E.rhs
elif E.rhs in idx:
i = idx[E.rhs]
rhs = E.lhs
else:
continue
# treat Max arms as separate actions; plain affine is a single
action
arms = rhs.args if isinstance(rhs, Max) else (rhs,)
for a in arms:
coeffs, _ = _affine_coeffs(a, vars_list)
row_actions[i].append(coeffs)
# build \bar A as entrywise max across actions for each row
Abar = sp.zeros(n, n)
for i in range(n):
if not row_actions[i]:
continue
for j in range(n):
# coeffs are rationals here, so Python max is exact
Abar[i, j] = max(sp.nsimplify(cj[j]) for cj in row_actions[i])
return Abar, vars_list
# ---------- diagnostics & certificate ----------
def analyze_bellman(eqs):
Abar, vars_list = build_barA(eqs)
n = Abar.shape[0]
# sanity: nonnegativity (warn if any negative coefficient sneaks in)
neg_entries = [(i, j, Abar[i, j]) for i in range(n) for j in range(n)
if Abar[i, j] < 0]
if neg_entries:
print(f"WARNING: {len(neg_entries)} negative entries found in \\bar
A; uniqueness test below may not apply.")
# row/column sum bounds (exact rationals)
row_sums = [sp.nsimplify(sum(Abar[i, j] for j in range(n))) for i in
range(n)]
col_sums = [sp.nsimplify(sum(Abar[i, j] for i in range(n))) for j in
range(n)]
rmax = max(row_sums) if row_sums else sp.Integer(0)
cmax = max(col_sums) if col_sums else sp.Integer(0)
# right Perron vector via power method (float, but only for a nicer
bound/vector)
prec = 80
Af = sp.N(Abar, prec)
v = Matrix([1] * n)
for _ in range(100):
v1 = Af * v
s = max(abs(z) for z in v1) if n else 1
if s == 0:
break
v1 = v1 / s
if (v1 - v).norm() < 1e-30:
v = v1
break
v = v1
# weighted infinity norm with weight vector v (equals rho if v is
Perron)
if n and all(vi != 0 for vi in v):
Av = Af * v
beta = max(float(Av[i] / v[i]) for i in range(n))
w = (v / sum(v)).applyfunc(lambda z: sp.N(z, 30)) # normalize to
sum 1 for display
else:
beta = float(sp.N(rmax))
w = Matrix([sp.Rational(1, n) for _ in range(n)]) if n else
Matrix([])
# report
print("=== Bellman-MDP structural scan ===")
print(f"vars: {n}, ||\\bar A||_∞ (max row sum): {rmax}")
print(f" ||\\bar A||_1 (max col sum): {cmax}")
print(f"weighted-∞ operator norm with Perron-like w: ~ {beta:.12f}")
if n <= 12:
print("w (weights, sum=1):", list(w))
# certificate
cert = None
if rmax.is_Rational and rmax < 1:
cert = f"Contraction in ∞-norm: ||\\bar A||_∞ = {rmax} < 1 ⇒ unique
fixed point & policy iteration terminates."
elif cmax.is_Rational and cmax < 1:
cert = f"Contraction in 1-norm: ||\\bar A||_1 = {cmax} < 1 ⇒ unique
fixed point & policy iteration terminates."
elif beta < 1 - 1e-12:
cert = f"Spectral certificate: ρ(\\bar A) ≤ {beta:.12f} < 1 via
weighted ∞-norm with w ⇒ unique fixed point & PI termination."
else:
cert = "No contraction certificate found (<1) — system may still be
fine, but uniqueness/termination not certified by these bounds."
print("Certificate:", cert)
return {
"Abar": Abar,
"row_sums": row_sums,
"col_sums": col_sums,
"rmax": rmax,
"cmax": cmax,
"beta_weighted_inf": beta,
"w": w,
}
```
### How to use
```python
# eqs = [...] # your original list of Eq(...) with Max on the RHS (or LHS)
report = analyze_bellman(eqs)
# Quick check: if report["rmax"] < 1 (or beta_weighted_inf < 1)
# you’ve got a clean uniqueness + termination certificate.
```
### What this is certifying
* $\|\bar A\|_\infty = \max_i \sum_j \bar A_{ij} < 1$ ⇒
$T(v)=\max_a\{A^{(a)}v+b^{(a)}\}$ is a contraction in the $\infty$-norm,
hence a **unique** fixed point and **policy iteration** cannot cycle.
* Even if the raw max row sum isn’t <1, the **weighted**∞ norm with a
positive weight vector $w$ can drop the operator norm below 1. Choosing $w$
as (an approximation to) the **right Perron vector** of $\bar A$ makes
$$
\| \bar A \|_{\infty,w}=\max_i \frac{(\bar A w)_i}{w_i}\approx \rho(\bar
A),
$$
so if that value is <1 you again have a clean certificate.
/c
On Saturday, September 6, 2025 at 2:17:54 PM UTC-5 [email protected]
wrote:
>
> Dear Oscar, Chris,
>
> Thank you very much for all your help and contribution to this.
> Finally, I found reasonably good introductory course slides on policy
> iteration here:
> https://web.stanford.edu/class/cme241/lecture_slides/Tour-DP.pdf
>
> This refers to Markov Decision Processes (MDP) which are essentially what
> I'm studying in the somewhat special case of gamma == 1 and no imediate
> rewards. My system of equations are essentially the Bell equations for
> the strongly connected components of the underlying digraph of an MDP.
>
> Up until now I didn't even realize the similarity of my problem and MDPs.
> Without your help I probably wouldn't have. Same with the policy
> iteration. I cleaned up the AI's code to my liking and coding style. :-)
> It is super-fast, even calling it on a system of 3358 equations (having
> 2401 Max equations, with 256 Max atoms) it stabilized the policy in 4
> iterations in less than a minute on a Raspberry Pi 5!
> That said, the linear programming method hung for a system of 2337
> equations, having 719 Max equations, with only 45 different Max atoms! So
> it seems policy iteration really provided the breakthrough here!
>
> The only nagging thing was that I couldn't prove uniqueness of the
> solution, and without that the algorithm is not complete. I was pretty
> sure there is only one unique solution, but I couldn't prove it. Nor
> could I find a counterexample in my special case. The literature I found
> almost exclusively deals with the gamma < 1 situation, because all of the
> proofs use the fact that the policy iteration is a gamma-contraction on an
> appropriate Banach space, and thus Banach's fixed point theorem can be
> used. This doesn't work at all in my case.
>
> I also couldn't prove that the iteration really stops, and won't move
> back-and-forth between two policies, but this didn't bother me much,
> because in such a case I could simply throw out those policies and start
> afresh. (And in practice it didn't seem to happen, anyway.)
>
> Finally, after spending the significant time of the weekend thinking about
> this, I finally managed to prove that in my case uniqueness of solutions
> also holds for those kinds of systems which I'm interested in! This
> completes the policy iteration solution, I don't need to assume
> uniqueness, anymore, it is there.
>
> Thank you again for all your help and contributions to this project!
>
> One last question: checksol seems to check solutions by using subs instead
> of xreplace. This makes sense of course, as in general this is how a
> solution is supposed to be checked. Though would it not make sense to try
> to do xreplace first? And if that satisfies the system already, then
> return True, and only if not, then do the subs? I doubt the initial
> xreplace would have a significant performance impact, especially compared
> to subs, but in some (many?) cases it would be able to return the positive
> answer significantly faster.
>
> Let me know if you think it would make sense to open an issue about this.
>
> Thanks for all the help!
> Gábor
>
>
> On Thu, 28 Aug 2025, Chris Smith wrote:
>
> > That's a surprise to me, Oscar. I would not have expected Newton to work
> > with a system involving Max like this. It is interesting, too, that SymPy
> > knows the derivative of Max.
> > I think you have to watch out for (1) infeasible systems which would
> cycle
> > and (2) singluar systems. For (1) AI gives as an example,
> > import sympy as sp
> > x, m = sp.symbols('x m', real=True)
> >
> > def step(policy): # policy: 0 -> use m=x, 1 -> use m=-x
> > eqs = [sp.Eq(m, x if policy == 0 else -x), sp.Eq(x, 2*m + 1)]
> > sol = sp.solve(eqs, [x, m], dict=True)[0]
> > next_pol = 0 if sp.Max(sol[x], -sol[x]) == sol[x] else 1
> > return sol, next_pol
> >
> > policy = 0
> > for _ in range(4):
> > sol, policy = step(policy)
> > print(f"sol={sol}, next_policy={policy}, m={sol[m]},
> > Max(x,-x)={sp.Max(sol[x], -sol[x])}")
> >
> >
> > which oscillates between (x,m) = (-1,-1) and (1/3,-1/3) whereas LP would
> > indicate this as being infeasible.
> >
> > As an example of (2) consider `x=max(y,0);y=max(x,0)` which has solution
> > (t,t) with t>=0. LP would give a canonical solution like (0,0).
> >
> > /c
> > On Thursday, August 28, 2025 at 9:26:42 AM UTC-5 Oscar wrote:
> > On Thu, 28 Aug 2025 at 15:05, Oscar Benjamin
> > <[email protected]> wrote:
> > >
> > > So Newton iteration has converged exactly after two iterations
> > in this
> > > case although a third iteration is used to confirm
> > convergence. I
> > > think what happened is that the initial conditions did not
> > have the
> > > right branches for all Maxes but after one iteration it has
> > the right
> > > branches and a single step of Newton solves the linear
> > equations
> > > exactly.
> > >
> > > ...
> > >
> > > Once you have an approximate float solution like this you can
> > use it
> > > to check which branch of each Max holds. Then you can compute
> > the
> > > exact solution in rational numbers and verify that it holds
> > exactly if
> > > that is what is needed.
> >
> > Actually not even that is needed. Since Newton iteration
> > converges
> > exactly for this case you can just do Newton with rational
> > numbers:
> >
> > In [2]: syms = list(ordered(Tuple(*eqs).atoms(Symbol)))
> >
> > In [3]: F = Matrix([e.lhs - e.rhs for e in eqs])
> >
> > In [4]: J = F.jacobian(syms)
> >
> > In [5]: x0 = [Rational(1, 2)]*len(syms)
> >
> > In [6]: xn = Matrix(x0)
> >
> > In [7]: rep = dict(zip(syms, xn.flat()))
> >
> > In [8]: xn1 = xn - J.xreplace(rep).solve(F.xreplace(rep))
> >
> > In [9]: print(sum(map(abs, xn - xn1)).evalf(3)); xn = xn1
> > 46.0
> >
> > In [10]: rep = dict(zip(syms, xn.flat()))
> >
> > In [11]: xn1 = xn - J.xreplace(rep).solve(F.xreplace(rep))
> >
> > In [12]: print(sum(map(abs, xn - xn1)).evalf(3)); xn = xn1
> > 0.563
> >
> > In [13]: rep = dict(zip(syms, xn.flat()))
> >
> > In [14]: xn1 = xn - J.xreplace(rep).solve(F.xreplace(rep))
> >
> > In [15]: print(sum(map(abs, xn - xn1)).evalf(3)); xn = xn1
> > 0
> >
> > In [16]: print([N(e) for e in xn[:10]])
> > [0.615451987540876, 0.744935782479303, 0.669554028724062,
> > 0.507340567903750, 0.507340567903750, 0.779985609984258,
> > 0.540175431864777, 0.802311871497269, 0.646942367048034,
> > 0.792756582043413]
> >
> > In [17]: print(xn[0])
> >
> 291110919370598802432045744050541816494242844853948214642659913815020760969
> >
> 79324693736677856978271553874982581817081789333748397161675933583/47300345
> >
> 96748531892582771836437248008401311189634492284731711982092309743130839491
> > 4046771654831300577107337882031715520231586587481997312000
> >
> > --
> > Oscar
> >
> > --
> > You received this message because you are subscribed to the Google Groups
> > "sympy" group.
> > To unsubscribe from this group and stop receiving emails from it, send an
> > email to [email protected].
> > To view this discussion visithttps://
> groups.google.com/d/msgid/sympy/ba489ce3-cd0c-4281-b0d6-36a671c2a0a
> > fn%40googlegroups.com.
> >
> >
--
You received this message because you are subscribed to the Google Groups
"sympy" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion visit
https://groups.google.com/d/msgid/sympy/44b88397-79ef-45ef-a715-65e181f4e2f8n%40googlegroups.com.