This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 2c26b15  Make `pandas` an optional core dependency (#17575)
2c26b15 is described below

commit 2c26b15a8087cb8a81eb19fedbc768bd6da92df7
Author: Kaxil Naik <[email protected]>
AuthorDate: Fri Aug 13 00:07:50 2021 +0100

    Make `pandas` an optional core dependency (#17575)
    
    We only use `pandas` in `DbApiHook.get_pandas_df`. Not all users use it, 
plus
    while `pandas` now supports many pre-compiled packages it still can take 
forever where
    it needs to be compiled.
    
    So for first-time users this can be a turn off. If pandas is already 
installed this
    will work fine, but if not users have an option to run `pip install 
apache-airflow[pandas]`
    
    closes #12500
---
 BREEZE.rst                                 | 12 ++++++------
 CONTRIBUTING.rst                           |  4 ++--
 Dockerfile                                 |  2 +-
 INSTALL                                    |  4 ++--
 UPDATING.md                                | 13 +++++++++++++
 airflow/executors/celery_executor.py       |  6 +++++-
 airflow/hooks/dbapi.py                     |  5 ++++-
 airflow/utils/json.py                      | 12 ++++++++----
 docs/apache-airflow/extra-packages-ref.rst |  2 ++
 setup.cfg                                  |  3 ---
 setup.py                                   |  8 ++++++--
 11 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/BREEZE.rst b/BREEZE.rst
index 683d2fb..f86f6f8 100644
--- a/BREEZE.rst
+++ b/BREEZE.rst
@@ -1315,8 +1315,8 @@ This is the current syntax for  `./breeze <./breeze>`_:
 
           Production image:
                  
async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,
-                 
http,ldap,google,google_auth,microsoft.azure,mysql,postgres,redis,sendgrid,sftp,
-                 slack,ssh,statsd,virtualenv
+                 
http,ldap,google,google_auth,microsoft.azure,mysql,pandas,postgres,redis,sendgrid,
+                 sftp,slack,ssh,statsd,virtualenv
 
   --image-tag TAG
           Additional tag in the image.
@@ -1914,8 +1914,8 @@ This is the current syntax for  `./breeze <./breeze>`_:
 
           Production image:
                  
async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,
-                 
http,ldap,google,google_auth,microsoft.azure,mysql,postgres,redis,sendgrid,sftp,
-                 slack,ssh,statsd,virtualenv
+                 
http,ldap,google,google_auth,microsoft.azure,mysql,pandas,postgres,redis,sendgrid,
+                 sftp,slack,ssh,statsd,virtualenv
 
   --image-tag TAG
           Additional tag in the image.
@@ -2501,8 +2501,8 @@ This is the current syntax for  `./breeze <./breeze>`_:
 
           Production image:
                  
async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,
-                 
http,ldap,google,google_auth,microsoft.azure,mysql,postgres,redis,sendgrid,sftp,
-                 slack,ssh,statsd,virtualenv
+                 
http,ldap,google,google_auth,microsoft.azure,mysql,pandas,postgres,redis,sendgrid,
+                 sftp,slack,ssh,statsd,virtualenv
 
   --image-tag TAG
           Additional tag in the image.
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index d8e2c43..8874a8d 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -593,8 +593,8 @@ devel_all, devel_ci, devel_hadoop, dingding, discord, doc, 
docker, druid, elasti
 facebook, ftp, gcp, gcp_api, github_enterprise, google, google_auth, grpc, 
hashicorp, hdfs, hive,
 http, imap, jdbc, jenkins, jira, kerberos, kubernetes, ldap, leveldb, 
microsoft.azure,
 microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, 
odbc, openfaas,
-opsgenie, oracle, pagerduty, papermill, password, pinot, plexus, postgres, 
presto, qds, qubole,
-rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, sftp, 
singularity, slack,
+opsgenie, oracle, pagerduty, pandas, papermill, password, pinot, plexus, 
postgres, presto, qds,
+qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, 
sftp, singularity, slack,
 snowflake, spark, sqlite, ssh, statsd, tableau, telegram, trino, vertica, 
virtualenv, webhdfs,
 winrm, yandex, zendesk
 
diff --git a/Dockerfile b/Dockerfile
index 782e5b4..847ad5a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -34,7 +34,7 @@
 #                        much smaller.
 #
 ARG AIRFLOW_VERSION="2.2.0.dev0"
-ARG 
AIRFLOW_EXTRAS="async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,mysql,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv"
+ARG 
AIRFLOW_EXTRAS="async,amazon,celery,cncf.kubernetes,docker,dask,elasticsearch,ftp,grpc,hashicorp,http,ldap,google,google_auth,microsoft.azure,mysql,pandas,postgres,redis,sendgrid,sftp,slack,ssh,statsd,virtualenv"
 ARG ADDITIONAL_AIRFLOW_EXTRAS=""
 ARG ADDITIONAL_PYTHON_DEPS=""
 
diff --git a/INSTALL b/INSTALL
index 47f48c3..d938ffb 100644
--- a/INSTALL
+++ b/INSTALL
@@ -97,8 +97,8 @@ devel_all, devel_ci, devel_hadoop, dingding, discord, doc, 
docker, druid, elasti
 facebook, ftp, gcp, gcp_api, github_enterprise, google, google_auth, grpc, 
hashicorp, hdfs, hive,
 http, imap, jdbc, jenkins, jira, kerberos, kubernetes, ldap, leveldb, 
microsoft.azure,
 microsoft.mssql, microsoft.psrp, microsoft.winrm, mongo, mssql, mysql, neo4j, 
odbc, openfaas,
-opsgenie, oracle, pagerduty, papermill, password, pinot, plexus, postgres, 
presto, qds, qubole,
-rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, sftp, 
singularity, slack,
+opsgenie, oracle, pagerduty, pandas, papermill, password, pinot, plexus, 
postgres, presto, qds,
+qubole, rabbitmq, redis, s3, salesforce, samba, segment, sendgrid, sentry, 
sftp, singularity, slack,
 snowflake, spark, sqlite, ssh, statsd, tableau, telegram, trino, vertica, 
virtualenv, webhdfs,
 winrm, yandex, zendesk
 
diff --git a/UPDATING.md b/UPDATING.md
index b18dff5..a0cd1d6 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -73,6 +73,19 @@ https://developers.google.com/style/inclusive-documentation
 
 -->
 
+### `pandas` is now an optional dependency
+
+Previously `pandas` was a core requirement so when you run `pip install 
apache-airflow` it looked for `pandas`
+library and installed it if it does not exist.
+
+If you want to install `pandas` compatible with Airflow, you can use 
`[pandas]` extra while
+installing Airflow, example for Python 3.8 and Airflow 2.1.2:
+
+```shell
+pip install -U "apache-airflow[pandas]==2.1.2" \
+  --constraint 
https://raw.githubusercontent.com/apache/airflow/constraints-2.1.2/constraints-3.8.txt";
+```
+
 ### Dummy trigger rule has been deprecated
 
 `TriggerRule.DUMMY` is replaced by `TriggerRule.ALWAYS`.
diff --git a/airflow/executors/celery_executor.py 
b/airflow/executors/celery_executor.py
index e3fc398..56edb6e 100644
--- a/airflow/executors/celery_executor.py
+++ b/airflow/executors/celery_executor.py
@@ -183,7 +183,6 @@ def on_celery_import_modules(*args, **kwargs):
     doesn't matter, but for short tasks this starts to be a noticeable impact.
     """
     import jinja2.ext  # noqa: F401
-    import numpy  # noqa: F401
 
     import airflow.jobs.local_task_job
     import airflow.macros
@@ -192,6 +191,11 @@ def on_celery_import_modules(*args, **kwargs):
     import airflow.operators.subdag  # noqa: F401
 
     try:
+        import numpy  # noqa: F401
+    except ImportError:
+        pass
+
+    try:
         import kubernetes.client  # noqa: F401
     except ImportError:
         pass
diff --git a/airflow/hooks/dbapi.py b/airflow/hooks/dbapi.py
index bac75a2..4156500 100644
--- a/airflow/hooks/dbapi.py
+++ b/airflow/hooks/dbapi.py
@@ -129,7 +129,10 @@ class DbApiHook(BaseHook):
         :param kwargs: (optional) passed into pandas.io.sql.read_sql method
         :type kwargs: dict
         """
-        from pandas.io import sql as psql
+        try:
+            from pandas.io import sql as psql
+        except ImportError:
+            raise Exception("pandas library not installed, run: pip install 
'apache-airflow[pandas]'.")
 
         with closing(self.get_conn()) as conn:
             return psql.read_sql(sql, con=conn, params=parameters, **kwargs)
diff --git a/airflow/utils/json.py b/airflow/utils/json.py
index 5847ef4..d859fd1 100644
--- a/airflow/utils/json.py
+++ b/airflow/utils/json.py
@@ -19,10 +19,14 @@
 from datetime import date, datetime
 from decimal import Decimal
 
-import numpy as np
 from flask.json import JSONEncoder
 
 try:
+    import numpy as np
+except ImportError:
+    np = None
+
+try:
     from kubernetes.client import models as k8s
 except ImportError:
     k8s = None
@@ -51,7 +55,7 @@ class AirflowJsonEncoder(JSONEncoder):
             # Technically lossy due to floating point errors, but the best we
             # can do without implementing a custom encode function.
             return float(obj)
-        elif isinstance(
+        elif np is not None and isinstance(
             obj,
             (
                 np.int_,
@@ -68,9 +72,9 @@ class AirflowJsonEncoder(JSONEncoder):
             ),
         ):
             return int(obj)
-        elif isinstance(obj, np.bool_):
+        elif np is not None and isinstance(obj, np.bool_):
             return bool(obj)
-        elif isinstance(
+        elif np is not None and isinstance(
             obj, (np.float_, np.float16, np.float32, np.float64, np.complex_, 
np.complex64, np.complex128)
         ):
             return float(obj)
diff --git a/docs/apache-airflow/extra-packages-ref.rst 
b/docs/apache-airflow/extra-packages-ref.rst
index ae88728..dba5e1c 100644
--- a/docs/apache-airflow/extra-packages-ref.rst
+++ b/docs/apache-airflow/extra-packages-ref.rst
@@ -62,6 +62,8 @@ python dependencies for the provided package.
 
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+
 | leveldb             | ``pip install 'apache-airflow[leveldb]'``           | 
Required for use leveldb extra in google provider                          |
 
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+
+| pandas              | ``pip install 'apache-airflow[pandas]'``            | 
Install Pandas library compatible with Airflow                             |
++---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+
 | password            | ``pip install 'apache-airflow[password]'``          | 
Password authentication for users                                          |
 
+---------------------+-----------------------------------------------------+----------------------------------------------------------------------------+
 | rabbitmq            | ``pip install 'apache-airflow[rabbitmq]'``          | 
RabbitMQ support as a Celery backend                                       |
diff --git a/setup.cfg b/setup.cfg
index d3c5f57..69ad425 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -126,9 +126,6 @@ install_requires =
     numpy;python_version>="3.7"
     # Required by vendored-in connexion
     openapi-spec-validator>=0.2.4
-    # Pandas stopped releasing 3.6 binaries for 1.2.* series.
-    pandas>=0.17.1, <1.2;python_version<"3.7"
-    pandas>=0.17.1, <2.0;python_version>="3.7"
     pendulum~=2.0
     pep562~=1.0;python_version<"3.7"
     psutil>=4.2.0, <6.0.0
diff --git a/setup.py b/setup.py
index 3b6650f..801721f 100644
--- a/setup.py
+++ b/setup.py
@@ -395,6 +395,9 @@ oracle = [
 pagerduty = [
     'pdpyras>=4.1.2,<5',
 ]
+pandas = [
+    'pandas>=0.17.1, <2.0',
+]
 papermill = [
     'papermill[all]>=1.2.1',
     'scrapbook[all]',
@@ -535,7 +538,7 @@ devel = [
     'yamllint',
 ]
 
-devel_minreq = cgroups + devel + doc + kubernetes + mysql + password
+devel_minreq = cgroups + devel + doc + kubernetes + mysql + pandas + password
 devel_hadoop = devel_minreq + hdfs + hive + kerberos + presto + webhdfs
 
 # Dict of all providers which are part of the Apache Airflow repository 
together with their requirements
@@ -636,6 +639,7 @@ CORE_EXTRAS_REQUIREMENTS: Dict[str, List[str]] = {
     'kerberos': kerberos,
     'ldap': ldap,
     'leveldb': leveldb,
+    'pandas': pandas,
     'password': password,
     'rabbitmq': rabbitmq,
     'sentry': sentry,
@@ -765,7 +769,7 @@ _all_requirements = list({req for extras_reqs in 
EXTRAS_REQUIREMENTS.values() fo
 EXTRAS_REQUIREMENTS["all"] = _all_requirements
 
 # All db user extras here
-EXTRAS_REQUIREMENTS["all_dbs"] = all_dbs
+EXTRAS_REQUIREMENTS["all_dbs"] = all_dbs + pandas
 
 # This can be simplified to devel_hadoop + _all_requirements due to inclusions
 # but we keep it for explicit sake. We are de-duplicating it anyway.

Reply via email to