This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new 674368f  Fixes MySQLToS3 float to int conversion (#10437)
674368f is described below

commit 674368f66cf61b2a105f326f23868ac3aee08807
Author: Gabriel Montañola <[email protected]>
AuthorDate: Mon Oct 19 04:53:18 2020 -0300

    Fixes MySQLToS3 float to int conversion (#10437)
    
    * fix: 🐛 Float to Int columns conversion
    
    The `_fix_int_dytpes` method is applying the `astype` transformation to
    the return of a `np.where` call. I added an extra step to the method in
    order to apply this to the whole pd.Series. Note that Int64Dtype must be
    used as an instance, since Pandas will raise an Exception if a class is
    used.
    
    * test: Add dtype test for integers
    
    * style: Change line length
---
 airflow/providers/amazon/aws/transfers/mysql_to_s3.py    | 3 ++-
 tests/providers/amazon/aws/transfers/test_mysql_to_s3.py | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/airflow/providers/amazon/aws/transfers/mysql_to_s3.py 
b/airflow/providers/amazon/aws/transfers/mysql_to_s3.py
index 33ffd59..791a31d 100644
--- a/airflow/providers/amazon/aws/transfers/mysql_to_s3.py
+++ b/airflow/providers/amazon/aws/transfers/mysql_to_s3.py
@@ -111,7 +111,8 @@ class MySQLToS3Operator(BaseOperator):
                 notna_series = df[col].dropna().values
                 if np.isclose(notna_series, notna_series.astype(int)).all():
                     # set to dtype that retains integers and supports NaNs
-                    df[col] = np.where(df[col].isnull(), None, 
df[col]).astype(pd.Int64Dtype)
+                    df[col] = np.where(df[col].isnull(), None, df[col])
+                    df[col] = df[col].astype(pd.Int64Dtype())
 
     def execute(self, context) -> None:
         mysql_hook = MySqlHook(mysql_conn_id=self.mysql_conn_id)
diff --git a/tests/providers/amazon/aws/transfers/test_mysql_to_s3.py 
b/tests/providers/amazon/aws/transfers/test_mysql_to_s3.py
index 6f3eba1..008aa01 100644
--- a/tests/providers/amazon/aws/transfers/test_mysql_to_s3.py
+++ b/tests/providers/amazon/aws/transfers/test_mysql_to_s3.py
@@ -60,3 +60,9 @@ class TestMySqlToS3Operator(unittest.TestCase):
             mock_s3_hook.return_value.load_file.assert_called_once_with(
                 filename=f.name, key=s3_key, bucket_name=s3_bucket
             )
+
+    def test_fix_int_dtypes(self):
+        op = MySQLToS3Operator(query="query", s3_bucket="s3_bucket", 
s3_key="s3_key", task_id="task_id")
+        dirty_df = pd.DataFrame({"strings": ["a", "b", "c"], "ints": [1, 2, 
None]})
+        op._fix_int_dtypes(df=dirty_df)
+        assert dirty_df["ints"].dtype.kind == "i"

Reply via email to