[GitHub] [airflow] potiuk commented on a change in pull request #6692: [AIRFLOW-6130] Make Cassandra to GCS operator pylint compatible

GitBox Sat, 30 Nov 2019 05:44:33 -0800

potiuk commented on a change in pull request #6692: [AIRFLOW-6130] Make 
Cassandra to GCS operator pylint compatible
URL: https://github.com/apache/airflow/pull/6692#discussion_r352286047


 ##########
 File path: airflow/operators/cassandra_to_gcs.py
 ##########
 @@ -247,120 +248,140 @@ def convert_value(cls, name, value):
         elif isinstance(value, Time):
             return str(value).split('.')[0]
         elif isinstance(value, (list, SortedSet)):
-            return cls.convert_array_types(name, value)
+            return cls.convert_array_types(value)
         elif hasattr(value, '_fields'):
-            return cls.convert_user_type(name, value)
+            return cls.convert_user_type(value)
         elif isinstance(value, tuple):
-            return cls.convert_tuple_type(name, value)
+            return cls.convert_tuple_type(value)
         elif isinstance(value, OrderedMapSerializedKey):
-            return cls.convert_map_type(name, value)
+            return cls.convert_map_type(value)
         else:
-            raise AirflowException('unexpected value: ' + str(value))
+            raise AirflowException('Unexpected value: ' + str(value))
 
     @classmethod
-    def convert_array_types(cls, name, value):
-        return [cls.convert_value(name, nested_value) for nested_value in 
value]
+    def convert_array_types(cls, value: Union[list, SortedSet]) -> List:
+        """
+        Maps convert_value over array.
+        """
+        return [cls.convert_value(nested_value) for nested_value in value]
 
     @classmethod
-    def convert_user_type(cls, name, value):
+    def convert_user_type(cls, value):
         """
         Converts a user type to RECORD that contains n fields, where n is the
         number of attributes. Each element in the user type class will be 
converted to its
         corresponding data type in BQ.
         """
         names = value._fields
-        values = [cls.convert_value(name, getattr(value, name)) for name in 
names]
+        values = [cls.convert_value(getattr(value, name)) for name in names]
         return cls.generate_data_dict(names, values)
 
     @classmethod
-    def convert_tuple_type(cls, name, value):
+    def convert_tuple_type(cls, values: Tuple):
         """
         Converts a tuple to RECORD that contains n fields, each will be 
converted
         to its corresponding data type in bq and will be named 
'field_<index>', where
         index is determined by the order of the tuple elements defined in 
cassandra.
         """
-        names = ['field_' + str(i) for i in range(len(value))]
-        values = [cls.convert_value(name, value) for name, value in zip(names, 
value)]
+        names = ['field_' + str(i) for i in range(len(values))]
         return cls.generate_data_dict(names, values)
 
     @classmethod
-    def convert_map_type(cls, name, value):
+    def convert_map_type(cls, value: OrderedMapSerializedKey) -> List[Dict]:
         """
         Converts a map to a repeated RECORD that contains two fields: 'key' 
and 'value',
         each will be converted to its corresponding data type in BQ.
         """
         converted_map = []
         for k, v in zip(value.keys(), value.values()):
             converted_map.append({
-                'key': cls.convert_value('key', k),
-                'value': cls.convert_value('value', v)
+                'key': cls.convert_value(k),
+                'value': cls.convert_value(v)
             })
         return converted_map
 
     @classmethod
-    def generate_schema_dict(cls, name, type):
-        field_schema = dict()
+    def generate_schema_dict(cls, name: str, type_) -> Dict:
+        """
+        Generates BQ schema.
+        """
+        field_schema: Dict[str, Any] = dict()
         field_schema.update({'name': name})
-        field_schema.update({'type': cls.get_bq_type(type)})
-        field_schema.update({'mode': cls.get_bq_mode(type)})
-        fields = cls.get_bq_fields(name, type)
+        field_schema.update({'type_': cls.get_bq_type(type_)})
+        field_schema.update({'mode': cls.get_bq_mode(type_)})
+        fields = cls.get_bq_fields(type_)
         if fields:
             field_schema.update({'fields': fields})
         return field_schema
 
     @classmethod
-    def get_bq_fields(cls, name, type):
-        fields = []
-
-        if not cls.is_simple_type(type):
-            names, types = [], []
-
-            if cls.is_array_type(type) and 
cls.is_record_type(type.subtypes[0]):
-                names = type.subtypes[0].fieldnames
-                types = type.subtypes[0].subtypes
-            elif cls.is_record_type(type):
-                names = type.fieldnames
-                types = type.subtypes
-
-            if types and not names and type.cassname == 'TupleType':
-                names = ['field_' + str(i) for i in range(len(types))]
-            elif types and not names and type.cassname == 'MapType':
-                names = ['key', 'value']
-
-            for name, type in zip(names, types):
-                field = cls.generate_schema_dict(name, type)
-                fields.append(field)
-
-        return fields
-
-    @classmethod
-    def is_simple_type(cls, type):
-        return type.cassname in 
CassandraToGoogleCloudStorageOperator.CQL_TYPE_MAP
+    def get_bq_fields(cls, type_) -> List[Dict]:
+        """
+        Converts non simple type value to BQ representation.
+        """
+        if cls.is_simple_type(type_):
+            return []
+
+        # In case of not simple type
+        names: List[str] = []
+        types: List[Any] = []
+        if cls.is_array_type(type_) and cls.is_record_type(type_.subtypes[0]):
+            names = type_.subtypes[0].fieldnames
+            types = type_.subtypes[0].subtypes
+        elif cls.is_record_type(type_):
+            names = type_.fieldnames
+            types = type_.subtypes
+
+        if types and not names and type_.cassname == 'TupleType':
+            names = ['field_' + str(i) for i in range(len(types))]
+        elif types and not names and type_.cassname == 'MapType':
+            names = ['key', 'value']
+
+        return [cls.generate_schema_dict(n, t) for n, t in zip(names, types)]
+
+    @staticmethod
+    def is_simple_type(type_) -> bool:
 
 Review comment:
   ```suggestion
       def is_simple_type(type_: Any) -> bool:
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [airflow] potiuk commented on a change in pull request #6692: [AIRFLOW-6130] Make Cassandra to GCS operator pylint compatible

Reply via email to