mistercrunch closed pull request #5467: Fix broken dedup and remove redundant 
db_spec logic
URL: https://github.com/apache/incubator-superset/pull/5467
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/superset/dataframe.py b/superset/dataframe.py
index 5fba4ffed6..30ba4c776b 100644
--- a/superset/dataframe.py
+++ b/superset/dataframe.py
@@ -70,12 +70,11 @@ def __init__(self, data, cursor_description, 
db_engine_spec):
         if cursor_description:
             column_names = [col[0] for col in cursor_description]
 
-        self.column_names = dedup(
-            db_engine_spec.get_normalized_column_names(cursor_description))
+        self.column_names = dedup(column_names)
 
         data = data or []
         self.df = (
-            pd.DataFrame(list(data), columns=column_names).infer_objects())
+            pd.DataFrame(list(data), 
columns=self.column_names).infer_objects())
 
         self._type_dict = {}
         try:
diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index 2b7454160d..cc1345e371 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -321,15 +321,6 @@ def get_configuration_for_impersonation(cls, uri, 
impersonate_user, username):
         """
         return {}
 
-    @classmethod
-    def get_normalized_column_names(cls, cursor_description):
-        columns = cursor_description if cursor_description else []
-        return [cls.normalize_column_name(col[0]) for col in columns]
-
-    @staticmethod
-    def normalize_column_name(column_name):
-        return column_name
-
     @staticmethod
     def execute(cursor, query, async=False):
         cursor.execute(query)
@@ -402,10 +393,6 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
         Grain('year', _('year'), "DATE_TRUNC('YEAR', {col})", 'P1Y'),
     )
 
-    @staticmethod
-    def normalize_column_name(column_name):
-        return column_name.lower()
-
 
 class VerticaEngineSpec(PostgresBaseEngineSpec):
     engine = 'vertica'
@@ -414,10 +401,6 @@ class VerticaEngineSpec(PostgresBaseEngineSpec):
 class RedshiftEngineSpec(PostgresBaseEngineSpec):
     engine = 'redshift'
 
-    @staticmethod
-    def normalize_column_name(column_name):
-        return column_name.lower()
-
 
 class OracleEngineSpec(PostgresBaseEngineSpec):
     engine = 'oracle'
@@ -440,10 +423,6 @@ def convert_dttm(cls, target_type, dttm):
             """TO_TIMESTAMP('{}', 'YYYY-MM-DD"T"HH24:MI:SS.ff6')"""
         ).format(dttm.isoformat())
 
-    @staticmethod
-    def normalize_column_name(column_name):
-        return column_name.lower()
-
 
 class Db2EngineSpec(BaseEngineSpec):
     engine = 'ibm_db_sa'
diff --git a/tests/dataframe_test.py b/tests/dataframe_test.py
index b56770240b..fdba431491 100644
--- a/tests/dataframe_test.py
+++ b/tests/dataframe_test.py
@@ -113,3 +113,15 @@ def test_get_columns_type_inference(self):
                 },
             ],
         )
+
+    def test_dedup_with_data(self):
+        data = [
+            ('a', 1),
+            ('a', 2),
+        ]
+        cursor_descr = (
+            ('a', 'string'),
+            ('a', 'string'),
+        )
+        cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
+        self.assertListEqual(cdf.column_names, ['a', 'a__1'])


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to