On Tue, Feb 5, 2019 at 11:51 AM Ian Miller <[email protected]> wrote:
>
> Hello all -
>
> I am relatively new to using SQLAlchemy for more complex use cases. I am in
> the process of creating a time series query, but I am unable to reference a
> column by its alias at the top level of the query.
>
> This is the query that I am trying to address that SQLAlchemy is currently
> generating:
>
> SELECT non_interval_query.metadata_value AS non_interval_query_metadata_value,
> coalesce(sum(non_interval_query.coalesce_2), 0) AS coalesce_1,
> timestamp
> FROM
> (SELECT generate_series(date_trunc('day',
> date('2019-01-06T00:00:00+00:00')), date_trunc('day',
> date('2019-01-12T00:00:00+00:00')), '1 day') AS timestamp) AS time_series
> LEFT OUTER JOIN
> (SELECT post_metadata_1.metadata_value AS post_metadata_1_metadata_value,
> post_metadata_2.metadata_value AS post_metadata_2_metadata_value,
> vw_post.created_at AS vw_post_created_at,
> coalesce(count(DISTINCT vw_post.id), 0) AS coalesce_1
> FROM vw_post
> JOIN post_metadata AS post_metadata_1 ON post_metadata_1.post_id =
> vw_post.id
> JOIN post_metadata AS post_metadata_2 ON post_metadata_2.post_id =
> vw_post.id
> WHERE post_metadata_1.metadata_value IN ('<metadata_values>')
> AND post_metadata_2.metadata_value IN ('<metadata_value>')
> AND vw_post.created_at >= '2019-01-06T00:00:00+00:00'
> AND vw_post.created_at <= '2019-01-12T00:00:00+00:00'
> AND post_metadata_1.schema_uid = '<schema_uid>'
> AND post_metadata_1.metadata_name = '<metadata_name>'
> AND post_metadata_2.schema_uid = '<schema_uid>'
> AND post_metadata_2.metadata_name = '<metadata_name>'
> AND vw_post.license_id IN (<license_ids>)
> GROUP BY vw_post.created_at,
> post_metadata_1.metadata_value,
> post_metadata_2.metadata_value,
> vw_post.created_at) AS non_interval_query ON date_trunc('day',
> created_at) = timestamp;
>
> You'll notice that "non_interval_query.metadata_value AS
> non_interval_query_metadata_value" specified at the beginning of the query is
> ambiguous due to the 2 "metadata_value" selects in the "non_interval_query"
> subquery. What I'm trying to do is have 2 selects at the top level - one for
> "non_interval_query.post_metadata_1_metadata_value" and one for
> "non_interval_query.post_metadata_2_metadata_value".
>
>
> For reference, here is the code used to generate the above query:
so this code is incomplete, referring to something called
"_prepare_non_gb_columns" which is likely where this is going wrong,
the subquery that you SELECT from has a .c namespace from which you
would be selecting both
non_interval_query.c.post_metadata_1_metadata_value and
non_interval_query.c.post_metadata_2_metadata_value from, separately.
The names that are available on .c. come directly from the label names
you use in the subquery, like
metadata_value.label("post_metadata_1_metadata_value").
I mocked the important part there up as a script below, but I did it
in Core which is easier for this kind of query, but then for
demonstration I adapted it to Query as well. long term plan is to
unify these two query interfaces more completely.
Core:
from sqlalchemy import table, column, select
post_metadata = table(
"post_metadata", column("post_id"), column("metadata_value")
)
vw_post = table("vw_post", column("id"))
post_metadata_1 = post_metadata.alias("post_metadata_1")
post_metadata_2 = post_metadata.alias("post_metadata_2")
non_interval_query = (
select(
[
post_metadata_1.c.metadata_value.label(
"post_metadata_1_metadata_value"
),
post_metadata_2.c.metadata_value.label(
"post_metadata_2_metadata_value"
),
]
)
.select_from(
vw_post.join(
post_metadata_1, post_metadata_1.c.post_id == vw_post.c.id
).join(post_metadata_2, post_metadata_2.c.post_id == vw_post.c.id)
)
.alias("non_interval_query")
)
stmt = select(
[
non_interval_query.c.post_metadata_1_metadata_value,
non_interval_query.c.post_metadata_2_metadata_value,
]
).apply_labels()
print(stmt)
ORM version:
from sqlalchemy import table, column, select
post_metadata = table(
"post_metadata", column("post_id"), column("metadata_value")
)
vw_post = table("vw_post", column("id"))
from sqlalchemy.orm import Session, aliased
s = Session()
post_metadata_1 = aliased(post_metadata, "post_metadata_1")
post_metadata_2 = aliased(post_metadata, "post_metadata_2")
non_interval_query = (
s.query(
post_metadata_1.c.metadata_value.label(
"post_metadata_1_metadata_value"
),
post_metadata_2.c.metadata_value.label(
"post_metadata_2_metadata_value"
),
)
.select_from(vw_post)
.join(post_metadata_1, post_metadata_1.c.post_id == vw_post.c.id)
.join(post_metadata_2, post_metadata_2.c.post_id == vw_post.c.id)
.subquery("non_interval_query")
)
stmt = s.query(
non_interval_query.c.post_metadata_1_metadata_value,
non_interval_query.c.post_metadata_2_metadata_value,
)
print(stmt)
>
>
> def apply_date_group_by(self, session, query, range_gb_params):
> field_name = self.db.get("column")
> model = self._object.get("model")
>
> if not field_name or not model:
> raise ValueError("Invalid date group by")
>
> gb_column = self._build_column()
> interval = range_gb_params.get("interval")
> interval_type = range_gb_params.get("interval_type")
>
> time_series = func.generate_series(
> func.date_trunc(interval_type, func.date(range_gb_params["start"])),
> func.date_trunc(interval_type, func.date(range_gb_params["end"])),
> interval,
> ).label("timestamp")
>
> ts_column = column("timestamp")
>
> time_series_query = session.query(time_series).subquery("time_series")
> non_interval_query = query.subquery("non_interval_query")
> # have to replace the original gb_column with the 'timestamp' column
> # in order to properly merge the dataset into the time series dataset
> non_gb_columns, gbs = self._prepare_non_gb_columns(
> ts_column, gb_column, non_interval_query.columns
> )
>
> # construct query with correct position passed in from `range_gb_params`
> query_position = range_gb_params.get("query_index_position", 0)
> non_gb_columns.insert(query_position, ts_column)
>
> date_gb_query = session.query(*non_gb_columns).select_from(
> time_series_query.outerjoin(
> non_interval_query,
> func.date_trunc(interval_type, column(field_name)) == ts_column,
> )
> )
>
> if gbs:
> date_gb_query = date_gb_query.group_by(*gbs)
>
> return date_gb_query.order_by(ts_column)
>
>
>
> Any help on this would be greatly appreciated!
>
> --
> SQLAlchemy -
> The Python SQL Toolkit and Object Relational Mapper
>
> http://www.sqlalchemy.org/
>
> To post example code, please provide an MCVE: Minimal, Complete, and
> Verifiable Example. See http://stackoverflow.com/help/mcve for a full
> description.
> ---
> You received this message because you are subscribed to the Google Groups
> "sqlalchemy" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to [email protected].
> To post to this group, send email to [email protected].
> Visit this group at https://groups.google.com/group/sqlalchemy.
> For more options, visit https://groups.google.com/d/optout.
--
SQLAlchemy -
The Python SQL Toolkit and Object Relational Mapper
http://www.sqlalchemy.org/
To post example code, please provide an MCVE: Minimal, Complete, and Verifiable
Example. See http://stackoverflow.com/help/mcve for a full description.
---
You received this message because you are subscribed to the Google Groups
"sqlalchemy" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/sqlalchemy.
For more options, visit https://groups.google.com/d/optout.