kaxil closed pull request #4137: [AIRFLOW-XXX] Fix Docstrings in Hooks, Sensors
& Operators
URL: https://github.com/apache/incubator-airflow/pull/4137
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/airflow/contrib/hooks/spark_submit_hook.py
b/airflow/contrib/hooks/spark_submit_hook.py
index 65bb6134e6..197b84a7b6 100644
--- a/airflow/contrib/hooks/spark_submit_hook.py
+++ b/airflow/contrib/hooks/spark_submit_hook.py
@@ -33,14 +33,15 @@ class SparkSubmitHook(BaseHook, LoggingMixin):
This hook is a wrapper around the spark-submit binary to kick off a
spark-submit job.
It requires that the "spark-submit" binary is in the PATH or the
spark_home to be
supplied.
+
:param conf: Arbitrary Spark configuration properties
:type conf: dict
:param conn_id: The connection id as configured in Airflow administration.
When an
- invalid connection_id is supplied, it will default to yarn.
+ invalid connection_id is supplied, it will default to yarn.
:type conn_id: str
:param files: Upload additional files to the executor running the job,
separated by a
- comma. Files will be placed in the working directory of each
executor.
- For example, serialized objects.
+ comma. Files will be placed in the working directory of each executor.
+ For example, serialized objects.
:type files: str
:param py_files: Additional python files used by the job, can be .zip,
.egg or .py.
:type py_files: str
@@ -51,19 +52,19 @@ class SparkSubmitHook(BaseHook, LoggingMixin):
:param java_class: the main class of the Java application
:type java_class: str
:param packages: Comma-separated list of maven coordinates of jars to
include on the
- driver and executor classpaths
+ driver and executor classpaths
:type packages: str
:param exclude_packages: Comma-separated list of maven coordinates of jars
to exclude
- while resolving the dependencies provided in 'packages'
+ while resolving the dependencies provided in 'packages'
:type exclude_packages: str
:param repositories: Comma-separated list of additional remote
repositories to search
- for the maven coordinates given with 'packages'
+ for the maven coordinates given with 'packages'
:type repositories: str
:param total_executor_cores: (Standalone & Mesos only) Total cores for all
executors
- (Default: all the available cores on the worker)
+ (Default: all the available cores on the worker)
:type total_executor_cores: int
:param executor_cores: (Standalone, YARN and Kubernetes only) Number of
cores per
- executor (Default: 2)
+ executor (Default: 2)
:type executor_cores: int
:param executor_memory: Memory per executor (e.g. 1000M, 2G) (Default: 1G)
:type executor_memory: str
@@ -80,7 +81,7 @@ class SparkSubmitHook(BaseHook, LoggingMixin):
:param application_args: Arguments for the application being submitted
:type application_args: list
:param env_vars: Environment variables for spark-submit. It
- supports yarn and k8s mode too.
+ supports yarn and k8s mode too.
:type env_vars: dict
:param verbose: Whether to pass the verbose flag to spark-submit process
for debugging
:type verbose: bool
diff --git a/airflow/contrib/hooks/sqoop_hook.py
b/airflow/contrib/hooks/sqoop_hook.py
index 74cddc2b21..f4bad83144 100644
--- a/airflow/contrib/hooks/sqoop_hook.py
+++ b/airflow/contrib/hooks/sqoop_hook.py
@@ -36,13 +36,14 @@ class SqoopHook(BaseHook, LoggingMixin):
Additional arguments that can be passed via the 'extra' JSON field of the
sqoop connection:
- * job_tracker: Job tracker local|jobtracker:port.
- * namenode: Namenode.
- * lib_jars: Comma separated jar files to include in the classpath.
- * files: Comma separated files to be copied to the map reduce cluster.
- * archives: Comma separated archives to be unarchived on the compute
- machines.
- * password_file: Path to file containing the password.
+
+ * ``job_tracker``: Job tracker local|jobtracker:port.
+ * ``namenode``: Namenode.
+ * ``lib_jars``: Comma separated jar files to include in the classpath.
+ * ``files``: Comma separated files to be copied to the map reduce
cluster.
+ * ``archives``: Comma separated archives to be unarchived on the
compute
+ machines.
+ * ``password_file``: Path to file containing the password.
:param conn_id: Reference to the sqoop connection.
:type conn_id: str
@@ -205,6 +206,7 @@ def import_table(self, table, target_dir=None,
append=False, file_type="text",
"""
Imports table from remote location to target dir. Arguments are
copies of direct sqoop command line arguments
+
:param table: Table to read
:param target_dir: HDFS destination dir
:param append: Append data to an existing dataset in HDFS
@@ -235,6 +237,7 @@ def import_query(self, query, target_dir, append=False,
file_type="text",
split_by=None, direct=None, driver=None,
extra_import_options=None):
"""
Imports a specific query from the rdbms to hdfs
+
:param query: Free format query to run
:param target_dir: HDFS destination dir
:param append: Append data to an existing dataset in HDFS
@@ -319,6 +322,7 @@ def export_table(self, table, export_dir, input_null_string,
"""
Exports Hive table to remote location. Arguments are copies of direct
sqoop command line Arguments
+
:param table: Table remote destination
:param export_dir: Hive table to export
:param input_null_string: The string to be interpreted as null for
diff --git a/airflow/contrib/operators/bigquery_operator.py
b/airflow/contrib/operators/bigquery_operator.py
index 26f47ed6b8..735c7583ae 100644
--- a/airflow/contrib/operators/bigquery_operator.py
+++ b/airflow/contrib/operators/bigquery_operator.py
@@ -408,7 +408,7 @@ class BigQueryCreateExternalTableOperator(BaseOperator):
:type delegate_to: str
:param src_fmt_configs: configure optional fields specific to the source
format
:type src_fmt_configs: dict
- :param labels a dictionary containing labels for the table, passed to
BigQuery
+ :param labels: a dictionary containing labels for the table, passed to
BigQuery
:type labels: dict
"""
template_fields = ('bucket', 'source_objects',
@@ -501,9 +501,10 @@ def execute(self, context):
class BigQueryDeleteDatasetOperator(BaseOperator):
- """"
+ """
This operator deletes an existing dataset from your Project in Big query.
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete
+
:param project_id: The project id of the dataset.
:type project_id: str
:param dataset_id: The dataset to be deleted.
@@ -552,7 +553,7 @@ def execute(self, context):
class BigQueryCreateEmptyDatasetOperator(BaseOperator):
- """"
+ """
This operator is used to create new dataset for your Project in Big query.
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource
diff --git a/airflow/contrib/operators/dataproc_operator.py
b/airflow/contrib/operators/dataproc_operator.py
index 60fc2bcf15..120693c77d 100644
--- a/airflow/contrib/operators/dataproc_operator.py
+++ b/airflow/contrib/operators/dataproc_operator.py
@@ -71,7 +71,7 @@ class DataprocClusterCreateOperator(BaseOperator):
:type image_version: str
:param custom_image: custom Dataproc image for more info see
https://cloud.google.com/dataproc/docs/guides/dataproc-images
- :type: custom_image: str
+ :type custom_image: str
:param properties: dict of properties to set on
config files (e.g. spark-defaults.conf), see
https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.clusters#SoftwareConfig
diff --git a/airflow/contrib/operators/gcs_to_s3.py
b/airflow/contrib/operators/gcs_to_s3.py
index d8b180c81a..23a4e9cec8 100644
--- a/airflow/contrib/operators/gcs_to_s3.py
+++ b/airflow/contrib/operators/gcs_to_s3.py
@@ -47,13 +47,14 @@ class
GoogleCloudStorageToS3Operator(GoogleCloudStorageListOperator):
:type dest_aws_conn_id: str
:param dest_s3_key: The base S3 key to be used to store the files.
(templated)
:type dest_s3_key: str
- :parame dest_verify: Whether or not to verify SSL certificates for S3
connection.
+ :param dest_verify: Whether or not to verify SSL certificates for S3
connection.
By default SSL certificates are verified.
You can provide the following values:
- - False: do not validate SSL certificates. SSL will still be used
+
+ - ``False``: do not validate SSL certificates. SSL will still be used
(unless use_ssl is False), but SSL certificates will not be
verified.
- - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.
+ - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to
uses.
You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.
:type dest_verify: bool or str
diff --git a/airflow/contrib/operators/pubsub_operator.py
b/airflow/contrib/operators/pubsub_operator.py
index e40828bf92..7c4d2fde5a 100644
--- a/airflow/contrib/operators/pubsub_operator.py
+++ b/airflow/contrib/operators/pubsub_operator.py
@@ -378,7 +378,7 @@ class PubSubPublishOperator(BaseOperator):
create_topic=True,
dag=dag)
- ``project`` , ``topic``, and ``messages`` are templated so you can use
+ ``project`` , ``topic``, and ``messages`` are templated so you can use
variables in them.
"""
template_fields = ['project', 'topic', 'messages']
diff --git a/airflow/contrib/operators/s3_delete_objects_operator.py
b/airflow/contrib/operators/s3_delete_objects_operator.py
index 1aa1b3901e..635765496d 100644
--- a/airflow/contrib/operators/s3_delete_objects_operator.py
+++ b/airflow/contrib/operators/s3_delete_objects_operator.py
@@ -49,10 +49,10 @@ class S3DeleteObjectsOperator(BaseOperator):
You can provide the following values:
- - False: do not validate SSL certificates. SSL will still be used,
+ - ``False``: do not validate SSL certificates. SSL will still be used,
but SSL certificates will not be
verified.
- - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.
+ - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to
uses.
You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.
:type verify: bool or str
diff --git a/airflow/contrib/operators/s3_list_operator.py
b/airflow/contrib/operators/s3_list_operator.py
index 3ca22d5932..9c67c2fa3b 100644
--- a/airflow/contrib/operators/s3_list_operator.py
+++ b/airflow/contrib/operators/s3_list_operator.py
@@ -38,17 +38,19 @@ class S3ListOperator(BaseOperator):
:type delimiter: str
:param aws_conn_id: The connection ID to use when connecting to S3 storage.
:type aws_conn_id: str
- :parame verify: Whether or not to verify SSL certificates for S3
connection.
+ :param verify: Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- - False: do not validate SSL certificates. SSL will still be used
+
+ - ``False``: do not validate SSL certificates. SSL will still be used
(unless use_ssl is False), but SSL certificates will not be
verified.
- - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.
+ - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to
uses.
You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.
:type verify: bool or str
+
**Example**:
The following operator would list all the files
(excluding subfolders) from the S3
diff --git a/airflow/contrib/operators/s3_to_gcs_operator.py
b/airflow/contrib/operators/s3_to_gcs_operator.py
index 5dd355a6fd..6fbe2c0b83 100644
--- a/airflow/contrib/operators/s3_to_gcs_operator.py
+++ b/airflow/contrib/operators/s3_to_gcs_operator.py
@@ -41,13 +41,14 @@ class S3ToGoogleCloudStorageOperator(S3ListOperator):
:type delimiter: str
:param aws_conn_id: The source S3 connection
:type aws_conn_id: str
- :parame verify: Whether or not to verify SSL certificates for S3
connection.
+ :param verify: Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- - False: do not validate SSL certificates. SSL will still be used
+
+ - ``False``: do not validate SSL certificates. SSL will still be used
(unless use_ssl is False), but SSL certificates will not be
verified.
- - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.
+ - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to
uses.
You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.
:type verify: bool or str
diff --git a/airflow/contrib/operators/spark_jdbc_operator.py
b/airflow/contrib/operators/spark_jdbc_operator.py
index 42f9dd5676..b1c2a138b9 100644
--- a/airflow/contrib/operators/spark_jdbc_operator.py
+++ b/airflow/contrib/operators/spark_jdbc_operator.py
@@ -64,7 +64,7 @@ class SparkJDBCOperator(SparkSubmitOperator):
:param jdbc_table: The name of the JDBC table
:type jdbc_table: str
:param jdbc_conn_id: Connection id used for connection to JDBC database
- :type: jdbc_conn_id: str
+ :type jdbc_conn_id: str
:param jdbc_driver: Name of the JDBC driver to use for the JDBC
connection. This
driver (usually a jar) should be passed in the 'jars'
parameter
:type jdbc_driver: str
diff --git a/airflow/contrib/sensors/bigquery_sensor.py
b/airflow/contrib/sensors/bigquery_sensor.py
index 89ec476d9d..fe8bd2ed6a 100644
--- a/airflow/contrib/sensors/bigquery_sensor.py
+++ b/airflow/contrib/sensors/bigquery_sensor.py
@@ -25,22 +25,22 @@ class BigQueryTableSensor(BaseSensorOperator):
"""
Checks for the existence of a table in Google Bigquery.
- :param project_id: The Google cloud project in which to look for the
table.
- The connection supplied to the hook must provide
- access to the specified project.
- :type project_id: str
- :param dataset_id: The name of the dataset in which to look for the
table.
- storage bucket.
- :type dataset_id: str
- :param table_id: The name of the table to check the existence of.
- :type table_id: str
- :param bigquery_conn_id: The connection ID to use when connecting to
- Google BigQuery.
- :type bigquery_conn_id: str
- :param delegate_to: The account to impersonate, if any.
- For this to work, the service account making the request must
- have domain-wide delegation enabled.
- :type delegate_to: str
+ :param project_id: The Google cloud project in which to look for the table.
+ The connection supplied to the hook must provide
+ access to the specified project.
+ :type project_id: str
+ :param dataset_id: The name of the dataset in which to look for the table.
+ storage bucket.
+ :type dataset_id: str
+ :param table_id: The name of the table to check the existence of.
+ :type table_id: str
+ :param bigquery_conn_id: The connection ID to use when connecting to
+ Google BigQuery.
+ :type bigquery_conn_id: str
+ :param delegate_to: The account to impersonate, if any.
+ For this to work, the service account making the request must
+ have domain-wide delegation enabled.
+ :type delegate_to: str
"""
template_fields = ('project_id', 'dataset_id', 'table_id',)
ui_color = '#f0eee4'
diff --git a/airflow/contrib/sensors/sftp_sensor.py
b/airflow/contrib/sensors/sftp_sensor.py
index 088e3e54b4..40ff6c9cf2 100644
--- a/airflow/contrib/sensors/sftp_sensor.py
+++ b/airflow/contrib/sensors/sftp_sensor.py
@@ -27,6 +27,7 @@
class SFTPSensor(BaseSensorOperator):
"""
Waits for a file or directory to be present on SFTP.
+
:param path: Remote file or directory path
:type path: str
:param sftp_conn_id: The connection to run the sensor against
diff --git a/airflow/operators/http_operator.py
b/airflow/operators/http_operator.py
index 3e00de96eb..d26743f6f1 100644
--- a/airflow/operators/http_operator.py
+++ b/airflow/operators/http_operator.py
@@ -46,7 +46,9 @@ class SimpleHttpOperator(BaseOperator):
'requests' documentation (options to modify timeout, ssl, etc.)
:type extra_options: A dictionary of options, where key is string and value
depends on the option that's being modified.
- :param xcom_push: Push the response to Xcom (default: False)
+ :param xcom_push: Push the response to Xcom (default: False).
+ If xcom_push is True, response of an HTTP request will also
+ be pushed to an XCom.
:type xcom_push: bool
:param log_response: Log the response (default: False)
:type log_response: bool
@@ -68,10 +70,6 @@ def __init__(self,
http_conn_id='http_default',
log_response=False,
*args, **kwargs):
- """
- If xcom_push is True, response of an HTTP request will also
- be pushed to an XCom.
- """
super(SimpleHttpOperator, self).__init__(*args, **kwargs)
self.http_conn_id = http_conn_id
self.method = method
diff --git a/airflow/operators/python_operator.py
b/airflow/operators/python_operator.py
index a01c93ca0a..9b31838b0c 100644
--- a/airflow/operators/python_operator.py
+++ b/airflow/operators/python_operator.py
@@ -190,6 +190,7 @@ class PythonVirtualenvOperator(PythonOperator):
can use a return value.
Note that if your virtualenv runs in a different Python major version than
Airflow,
you cannot use return values, op_args, or op_kwargs. You can use
string_args though.
+
:param python_callable: A python function with no references to outside
variables,
defined with def, which will be run in a virtualenv
:type python_callable: function
diff --git a/airflow/operators/redshift_to_s3_operator.py
b/airflow/operators/redshift_to_s3_operator.py
index 2bad90a861..c0c4db3503 100644
--- a/airflow/operators/redshift_to_s3_operator.py
+++ b/airflow/operators/redshift_to_s3_operator.py
@@ -39,14 +39,14 @@ class RedshiftToS3Transfer(BaseOperator):
:type redshift_conn_id: str
:param aws_conn_id: reference to a specific S3 connection
:type aws_conn_id: str
- :parame verify: Whether or not to verify SSL certificates for S3
connection.
+ :param verify: Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- - False: do not validate SSL certificates. SSL will still be used
+ - ``False``: do not validate SSL certificates. SSL will still be used
(unless use_ssl is False), but SSL certificates will not be
verified.
- - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.
+ - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to
uses.
You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.
:type verify: bool or str
diff --git a/airflow/operators/s3_to_hive_operator.py
b/airflow/operators/s3_to_hive_operator.py
index 85f05325f6..228470fad7 100644
--- a/airflow/operators/s3_to_hive_operator.py
+++ b/airflow/operators/s3_to_hive_operator.py
@@ -78,13 +78,14 @@ class S3ToHiveTransfer(BaseOperator):
:type delimiter: str
:param aws_conn_id: source s3 connection
:type aws_conn_id: str
- :parame verify: Whether or not to verify SSL certificates for S3
connection.
+ :param verify: Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- - False: do not validate SSL certificates. SSL will still be used
+
+ - ``False``: do not validate SSL certificates. SSL will still be used
(unless use_ssl is False), but SSL certificates will not be
verified.
- - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.
+ - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to
uses.
You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.
:type verify: bool or str
diff --git a/airflow/operators/s3_to_redshift_operator.py
b/airflow/operators/s3_to_redshift_operator.py
index 265d6e2563..65bc679f24 100644
--- a/airflow/operators/s3_to_redshift_operator.py
+++ b/airflow/operators/s3_to_redshift_operator.py
@@ -39,13 +39,14 @@ class S3ToRedshiftTransfer(BaseOperator):
:type redshift_conn_id: str
:param aws_conn_id: reference to a specific S3 connection
:type aws_conn_id: str
- :parame verify: Whether or not to verify SSL certificates for S3
connection.
+ :param verify: Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- - False: do not validate SSL certificates. SSL will still be used
+
+ - ``False``: do not validate SSL certificates. SSL will still be used
(unless use_ssl is False), but SSL certificates will not be
verified.
- - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.
+ - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to
uses.
You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.
:type verify: bool or str
diff --git a/airflow/sensors/s3_key_sensor.py b/airflow/sensors/s3_key_sensor.py
index 462091ff86..f1e668e733 100644
--- a/airflow/sensors/s3_key_sensor.py
+++ b/airflow/sensors/s3_key_sensor.py
@@ -46,10 +46,11 @@ class S3KeySensor(BaseSensorOperator):
:param verify: Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- - False: do not validate SSL certificates. SSL will still be used
+
+ - ``False``: do not validate SSL certificates. SSL will still be used
(unless use_ssl is False), but SSL certificates will not be
verified.
- - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.
+ - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to
uses.
You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.
:type verify: bool or str
diff --git a/airflow/sensors/s3_prefix_sensor.py
b/airflow/sensors/s3_prefix_sensor.py
index 4617c97cf3..e27cd2efa9 100644
--- a/airflow/sensors/s3_prefix_sensor.py
+++ b/airflow/sensors/s3_prefix_sensor.py
@@ -43,10 +43,11 @@ class S3PrefixSensor(BaseSensorOperator):
:param verify: Whether or not to verify SSL certificates for S3 connection.
By default SSL certificates are verified.
You can provide the following values:
- - False: do not validate SSL certificates. SSL will still be used
+
+ - ``False``: do not validate SSL certificates. SSL will still be used
(unless use_ssl is False), but SSL certificates will not be
verified.
- - path/to/cert/bundle.pem: A filename of the CA cert bundle to uses.
+ - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to
uses.
You can specify this argument if you want to use a different
CA cert bundle than the one used by botocore.
:type verify: bool or str
diff --git a/airflow/sensors/sql_sensor.py b/airflow/sensors/sql_sensor.py
index d2ef6b3626..c304ff594c 100644
--- a/airflow/sensors/sql_sensor.py
+++ b/airflow/sensors/sql_sensor.py
@@ -33,6 +33,7 @@ class SqlSensor(BaseSensorOperator):
:type conn_id: str
:param sql: The sql to run. To pass, it needs to return at least one cell
that contains a non-zero / empty string value.
+ :type sql: str
"""
template_fields = ('sql',)
template_ext = ('.hql', '.sql',)
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services