shahar1 commented on code in PR #30658:
URL: https://github.com/apache/airflow/pull/30658#discussion_r1181077707
##########
airflow/providers/google/cloud/transfers/bigquery_to_mssql.py:
##########
@@ -39,86 +37,39 @@ class BigQueryToMsSqlOperator(BaseOperator):
For more information on how to use this operator, take a look at the
guide:
:ref:`howto/operator:BigQueryToMsSqlOperator`
- .. note::
- If you pass fields to ``selected_fields`` which are in different order
than the
- order of columns already in
- BQ table, the data will still be in the order of BQ table.
- For example if the BQ table has 3 columns as
- ``[A,B,C]`` and you pass 'B,A' in the ``selected_fields``
- the data would still be of the form ``'A,B'`` and passed through this
form
- to MSSQL
-
- **Example**: ::
-
- transfer_data = BigQueryToMsSqlOperator(
- task_id='task_id',
- source_project_dataset_table='my-project.mydataset.mytable',
- mssql_table='dest_table_name',
- replace=True,
- )
-
:param source_project_dataset_table: A dotted
``<project>.<dataset>.<table>``:
the big query table of origin
- :param selected_fields: List of fields to return (comma-separated). If
- unspecified, all fields are returned.
- :param gcp_conn_id: reference to a specific Google Cloud hook.
+ :param mssql_table: target MsSQL table (templated)
:param mssql_conn_id: reference to a specific mssql hook
- :param database: name of database which overwrite defined one in connection
- :param replace: Whether to replace instead of insert
- :param batch_size: The number of rows to take in each batch
- :param location: The location used for the operation.
- :param impersonation_chain: Optional service account to impersonate using
short-term
- credentials, or chained list of accounts required to get the
access_token
- of the last account in the list, which will be impersonated in the
request.
- If set as a string, the account must grant the originating account
- the Service Account Token Creator IAM role.
- If set as a sequence, the identities from the list must grant
- Service Account Token Creator IAM role to the directly preceding
identity, with first
- account from the list granting this role to the originating account
(templated).
"""
- template_fields: Sequence[str] = ("source_project_dataset_table",
"mssql_table", "impersonation_chain")
+ template_fields: Sequence[str] =
tuple(BaseBigQueryToSqlOperator.template_fields) + (
+ "source_project_dataset_table",
+ )
operator_extra_links = (BigQueryTableLink(),)
def __init__(
self,
*,
source_project_dataset_table: str,
mssql_table: str,
- selected_fields: list[str] | str | None = None,
- gcp_conn_id: str = "google_cloud_default",
mssql_conn_id: str = "mssql_default",
- database: str | None = None,
- replace: bool = False,
- batch_size: int = 1000,
- location: str | None = None,
- impersonation_chain: str | Sequence[str] | None = None,
**kwargs,
) -> None:
- super().__init__(**kwargs)
- self.selected_fields = selected_fields
- self.gcp_conn_id = gcp_conn_id
- self.mssql_conn_id = mssql_conn_id
- self.database = database
- self.mssql_table = mssql_table
- self.replace = replace
- self.batch_size = batch_size
- self.location = location
- self.impersonation_chain = impersonation_chain
try:
- _, self.dataset_id, self.table_id =
source_project_dataset_table.split(".")
+ _, dataset_id, table_id = source_project_dataset_table.split(".")
except ValueError:
raise ValueError(
f"Could not parse {source_project_dataset_table} as
<project>.<dataset>.<table>"
) from None
+ super().__init__(sql_table=mssql_table,
dataset_table=f"{dataset_id}.{table_id}", **kwargs)
Review Comment:
Yeah, now I realize that I missed the `project` definition in the
`dataset_table` :)
It could wait for another time.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]