Hi Daniele,

Thanks for the 'modern' suggestion.

But based on the strongly worded suggestion from Martin, I'll also share a 
script that loads all transactions into a pandas dataframe (thanks to ChatGPT). 
Further processing on this dataframe lets me do anything I want...

 -k.


#!/usr/bin/env python
"""
Extract (date, from, to, amount, currency, description) rows from Beancount 
ledger with date as a pandas‑friendly datetime index.
"""

import sys
from pathlib import Path
from typing import List, Tuple

import pandas as pd
from beancount.loader import load_file
from beancount.core.data import Transaction, Posting

Row = Tuple[str, str, str, float, str, str]  # (date, from, to, amount, 
currency, description)

def pair_postings(date: str, description: str, postings: List[Posting]) -> 
List[Row]:
    """
    For all postings of the same currency in a transaction, pair debits 
(negative)
    with credits (positive) and emit (date, from, to, amount, currency, 
description) rows.
    """
    pos = [p for p in postings if p.units and p.units.number > 0]
    neg = [p for p in postings if p.units and p.units.number < 0]

    rows: List[Row] = []
    while pos and neg:
        p_from = neg[0]
        p_to = pos[0]

        amt = min(abs(p_from.units.number), p_to.units.number)
        currency = p_from.units.currency

        rows.append((date, p_from.account, p_to.account, amt, currency, 
description))

        # Decrement amounts
        p_from = 
p_from._replace(units=p_from.units._replace(number=p_from.units.number + amt))
        p_to = p_to._replace(units=p_to.units._replace(number=p_to.units.number 
- amt))

        if p_from.units.number == 0:
            neg.pop(0)
        else:
            neg[0] = p_from

        if p_to.units.number == 0:
            pos.pop(0)
        else:
            pos[0] = p_to

    return rows


def ledger_to_dataframe(path: Path | str) -> pd.DataFrame:
    entries, errors, _ = load_file(str(path))
    if errors:
        for e in errors:
            print(e, file=sys.stderr)

    rows: List[Row] = []
    for entry in entries:
        if not isinstance(entry, Transaction):
            continue

        description = entry.payee or ""
        if entry.narration:
            if description:
                description += " "
            description += entry.narration

        by_currency: dict[str, List[Posting]] = {}
        for p in entry.postings:
            if p.units:
                by_currency.setdefault(p.units.currency, []).append(p)

        for plist in by_currency.values():
            rows.extend(pair_postings(entry.date, description, plist))
            
    df = pd.DataFrame(rows, columns=["date", "from", "to", "amount", 
"currency", "description"])
    df["date"] = pd.to_datetime(df["date"]).dt.normalize()
    df["amount"] = df["amount"].astype(float)
    df = df.set_index('date')
    return df



if __name__ == "__main__":
    if len(sys.argv) != 2:
        sys.exit("Usage: python txns_to_dataframe.py /path/to/ledger.beancount")
    df = ledger_to_dataframe(Path(sys.argv[1]).expanduser())
    print(df.head())

-- 
You received this message because you are subscribed to the Google Groups 
"Beancount" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to beancount+unsubscr...@googlegroups.com.
To view this discussion visit 
https://groups.google.com/d/msgid/beancount/875xfvhtu4.fsf%40gmail.com.

Reply via email to