Re: [sqlalchemy] Combining yield_per and eager loading

Martijn van Oosterhout Wed, 20 Jul 2016 08:05:27 -0700

Ok, so this is what I have for today. It works, and handles all kinds of 
corner cases and yet it's not quite what I want. It does everything as a 
joinedload. It's much easier to use now though.


You can do things like:

q = Session.query(Animal)

for animal in yielded_load(q, (joinedload(Animal.owner).joinedload(Human.
family),
                               joinedload(Animal.species).joinedload(Species
.phylum)):
    do_something(animal)

It says joinedload() but it doesn't actually pay attention to that, it just 
uses it to determine the path. It would be really nice to be able to 
specify that some things should be fetched using subqueryload(), but that 
would require unpacking/manipulating the Load objects and I don't think 
there's a supported interface for that. Additionally, it would be nice if 
could notice that paths share a common prefix and only fetch those once. 
Still, for the amount of code it's pretty effective.

from itertools import groupby, islice
from sqlalchemy.orm import attributes, Load, aliased
from sqlalchemy import tuple_


def yielded_load(query, load_options, N=1000):
    # Note: query must return only a single object (for now anyway)
    main_query = query.yield_per(N)

    main_res = iter(main_query)

    while True:
        # Fetch block of results from query
        objs = list(islice(main_res, N))

        if not objs:
            break

        for load_option in load_options:
            # Get path of attributes to follow
            path = load_option.path
            pk = path[0].prop.parent.primary_key

            # Generate query that joins against original table
            child_q = main_query.session.query().order_by(*pk)

            for i, attr in enumerate(path):
                if i == 0:
                    # For the first relationship we add the target and the 
pkey columns
                    # Note: add_columns() doesn't work here? 
with_entities() does....
                    next_table = attr.prop.target
                    child_q = child_q.join(next_table, attr)
                    child_q = child_q.with_entities(attr.prop.mapper).
add_columns(*pk)
                    if attr.prop.order_by:
                        child_q = child_q.order_by(*attr.prop.order_by)
                    opts = Load(attr.prop.mapper)
                else:
                    # For all relationships after the first we can use 
contains_eager
                    # Note: The aliasing is to handle cases where the 
relationships loop
                    next_table = aliased(attr.prop.target)
                    child_q = child_q.join(next_table, attr, isouter=True)
                    opts = opts.contains_eager(attr, alias=next_table)

            child_q = child_q.options(opts)

            keys = [[getattr(obj, col.key) for col in pk] for obj in objs]

            child_q = child_q.filter(tuple_(*pk).in_(keys))

            # Here we use the fact that the first column is the target 
object
            collections = dict((k, [r[0] for r in v]) for k, v in groupby(
                child_q,
                lambda x: tuple([getattr(x, c.key) for c in pk])
            ))

            for obj in objs:
                # We can traverse many-to-one and one-to-many
                if path[0].prop.uselist:
                    attributes.set_committed_value(
                        obj,
                        path[0].key,
                        collections.get(
                            tuple(getattr(obj, c.key) for c in pk),
                            ())
                    )
                else:
                    attributes.set_committed_value(
                        obj,
                        path[0].key,
                        collections.get(
                            tuple(getattr(obj, c.key) for c in pk),
                            [None])[0]
                    )

        for obj in objs:
            yield obj


-- 
Martijn

-- 
You received this message because you are subscribed to the Google Groups 
"sqlalchemy" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/sqlalchemy.
For more options, visit https://groups.google.com/d/optout.

Re: [sqlalchemy] Combining yield_per and eager loading

Reply via email to