Hello:
I am trying to build a complicated relation, but am having trouble due
to the following:
* there are multiple intermediate tables involved
* the relation requires EXISTS clauses as the join condition between
several of these tables
My model is shown below. The relation I am trying to build is
RunFilter.matching_runs, which is effectively a many-to-many
relationship between RunFilter and Run. Currently, I'm using raw SQL
to model this relationship, as shown at the end of the example.
Ideally, I would like to build this relationship on top of other
relationships, so that RunFilter.matching_runs would use
RunFilter.var_clauses.any() and VarClause.matching_run_items.any() in
its join condition, and VarClause.matching_run_items would use
VarClause.values.any(). I realize that SQLAlchemy does not support
this because mapper properties are not compiled in any predictable
order, but it would be nice.
import sqlalchemy as sa
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.ext.declarative import declarative_base
import sqlalchemy.orm as orm
from sqlalchemy.orm.collections import attribute_mapped_collection
Base = declarative_base()
# Association tables
var_clause_to_value = sa.Table("var_clause_to_value", Base.metadata,
sa.Column("value_id", sa.Integer, sa.ForeignKey("value.id"),
primary_key=True),
sa.Column("var_clause_id", sa.Integer, sa.ForeignKey
("var_clause.id"), primary_key=True))
# Entities
class Variable(Base):
__tablename__ = "variable"
# Fields
id = sa.Column(sa.Integer, primary_key=True)
# In the real world, we would have a `name` attribute as well, but
it's not
# important for this example.
# Relations
values = orm.relation("Value")
class Value(Base):
"""
Each variable can be attached to zero or more values.
"""
__tablename__ = "value"
# Fields
id = sa.Column(sa.Integer, primary_key=True)
variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"),
nullable=False)
# In the real world, we would have a `name` attribute as well, but
it's not
# important for this example.
# Relations
variable = orm.relation("Variable")
class Run(Base):
"""
A run consists of a combination of inputs (variable=value pairs)
and an
output (result).
"""
__tablename__ = "run"
def __str__(self):
return "run%d" % self.id
# Fields
id = sa.Column(sa.Integer, primary_key=True)
result = sa.Column(sa.Unicode, nullable=False)
# Relations
var2item = orm.relation("RunItem",
collection_class=attribute_mapped_collection("variable"))
var2value = association_proxy(
"var2item",
"value",
creator=lambda var, value: RunItem(value=value, variable=var))
class RunItem(Base):
"""
A single variable=value pair for a given run.
"""
__tablename__ = "run_item"
# Fields
run_id = sa.Column(sa.Integer, sa.ForeignKey("run.id"),
primary_key=True)
value_id = sa.Column(sa.Integer, sa.ForeignKey("value.id"),
primary_key=True)
variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"),
primary_key=True)
# Relations
run = orm.relation("Run")
value = orm.relation("Value")
variable = orm.relation("Variable")
class RunFilter(Base):
"""
A user-defined query for runs. For example, if you want to find
all runs
where var1=value1 and var2=value2, you could use::
>>> filter = RunFilter(var_clauses=[VarClause(variable=var1,
values=[value1]),
... VarClause(variable=var2,
values=[value2])])
Note that for a run filter to match a run, *all* of its var
clauses have
to be satisfied.
"""
__tablename__ = "run_filter"
def __str__(self):
return "run_filter%d" % self.id
# Fields
id = sa.Column(sa.Integer, primary_key=True)
# Relations
var_clauses = orm.relation("VarClause")
# Desired SQL for `matching_runs`:
# (assume `run` and `run_filter` are correlated to an outer query)
# NOT EXISTS (SELECT 1 FROM var_clause
# WHERE var_clause.run_filter_id = run_filter.id
# AND NOT EXISTS (SELECT 1 FROM run_item
# WHERE run_item.variable_id =
var_clause.variable_id
# AND EXISTS (SELECT 1 FROM
var_clause_to_value
# WHERE
var_clause_to_value.var_clause_id = var_clause.id
# AND
var_clause_to_value.value_id = run_item.value_id)))
#
# I would like to define this relation as (using `if False` to
effectively
# comment it out but retain syntax highlighting):
if False:
def _primaryjoin():
return ~RunFilter.var_clauses.any(
~VarClause.matching_run_items.any(RunItem.run_id ==
Run.id))
matching_runs = orm.relation("Value",
primaryjoin=_primaryjoin)
del _primaryjoin
class VarClause(Base):
"""
A VarClause matches a RunItem iff:
* RunItem.variable_id == VarClause.variable_id
* AND RunItem.value in VarClause.values
Essentially it is saying "Run.var2value[var1] = value1
OR Run.var2value[var1] = value2
OR ...".
"""
__tablename__ = "var_clause"
# Fields
id = sa.Column(sa.Integer, primary_key=True)
run_filter_id = sa.Column(sa.Integer, sa.ForeignKey
("run_filter.id"),
nullable=False)
variable_id = sa.Column(sa.Integer, sa.ForeignKey("variable.id"),
nullable=False)
# Relations
run_filter = orm.relation("RunFilter")
values = orm.relation("Value", secondary=var_clause_to_value)
variable = orm.relation("Variable")
# Desired SQL for `matching_run_items`:
# (assume `var_clause` and `run_item` are correlated to an outer
query)
# run_item.variable_id = var_clause.variable_id
# AND EXISTS (SELECT 1 FROM var_clause_to_value
# WHERE var_clause_to_value.var_clause_id =
var_clause.id
# AND var_clause_to_value.value_id =
run_item.value_id)
#
# I would like to define this relation as:
if False:
def _primaryjoin():
return ((RunItem.variable_id == VarClause.variable_id)
& (VarClause.values.any(Value.id ==
RunItem.value_id)))
matching_run_items = orm.relation("RunItem",
primaryjoin=_primaryjoin)
del _primaryjoin
# Test Case
engine = sa.create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
session = orm.create_session(engine, autocommit=False, autoflush=True)
var1 = Variable()
value1_1 = Value(variable=var1)
value1_2 = Value(variable=var1)
session.add_all([var1, value1_1, value1_2])
var2 = Variable()
value2_1 = Value(variable=var2)
value2_2 = Value(variable=var2)
session.add_all([var2, value2_1, value2_2])
run1 = Run(id=1, result=u"good")
run1.var2value[var1] = value1_1
run1.var2value[var2] = value2_1
run2 = Run(id=2, result=u"bad")
run2.var2value[var1] = value1_2
run2.var2value[var2] = value2_2
session.add_all([run1, run2])
# run_filter1 should match run1.
run_filter1 = RunFilter(id=1)
run_filter1.var_clauses.append(VarClause(variable=var1, values=
[value1_1, value1_2]))
run_filter1.var_clauses.append(VarClause(variable=var2, values=
[value2_1]))
session.add(run_filter1)
# run_filter2 should match run1 and run2.
run_filter2 = RunFilter(id=2)
run_filter2.var_clauses.append(VarClause(variable=var1, values=
[value1_1, value1_2]))
run_filter2.var_clauses.append(VarClause(variable=var2, values=
[value2_1, value2_2]))
session.add(run_filter2)
# run_filter3 should match no runs.
run_filter3 = RunFilter(id=3)
run_filter3.var_clauses.append(VarClause(variable=var1, values=
[value1_2]))
run_filter3.var_clauses.append(VarClause(variable=var2, values=
[value2_1]))
session.add(run_filter3)
session.flush()
engine.echo = True
if False:
# Query all run filters and eager-load their matching runs.
q = session.query(RunFilter)
q = q.options(orm.eagerload("matching_runs"))
run_filter2run = dict((x, x.runs) for x in q)
else:
# Do it manually, sort of.
q = session.query(RunFilter, Run)
q = q.from_statement(
"""SELECT run_filter.id AS run_filter_id,
run.id AS run_id
FROM run_filter, run
WHERE NOT EXISTS (SELECT 1 FROM var_clause
WHERE var_clause.run_filter_id = run_filter.id
AND NOT EXISTS (SELECT 1 FROM run_item
WHERE run_item.run_id = run.id
AND run_item.variable_id =
var_clause.variable_id
AND EXISTS (SELECT 1 FROM
var_clause_to_value
WHERE
var_clause_to_value.var_clause_id = var_clause.id
AND
var_clause_to_value.value_id = run_item.value_id)))
""")
run_filter2run = {}
for (run_filter, run) in q:
runs = run_filter2run.setdefault(run_filter, [])
runs.append(run)
for (run_filter, runs) in run_filter2run.iteritems():
print "%s matching runs: %s" % (run_filter, ", ".join(str(run) for
run in runs))
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"sqlalchemy" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to
[email protected]
For more options, visit this group at
http://groups.google.com/group/sqlalchemy?hl=en
-~----------~----~----~----~------~----~------~--~---