potiuk commented on a change in pull request #6692: [AIRFLOW-6130] Make
Cassandra to GCS operator pylint compatible
URL: https://github.com/apache/airflow/pull/6692#discussion_r352286033
##########
File path: airflow/operators/cassandra_to_gcs.py
##########
@@ -247,120 +248,140 @@ def convert_value(cls, name, value):
elif isinstance(value, Time):
return str(value).split('.')[0]
elif isinstance(value, (list, SortedSet)):
- return cls.convert_array_types(name, value)
+ return cls.convert_array_types(value)
elif hasattr(value, '_fields'):
- return cls.convert_user_type(name, value)
+ return cls.convert_user_type(value)
elif isinstance(value, tuple):
- return cls.convert_tuple_type(name, value)
+ return cls.convert_tuple_type(value)
elif isinstance(value, OrderedMapSerializedKey):
- return cls.convert_map_type(name, value)
+ return cls.convert_map_type(value)
else:
- raise AirflowException('unexpected value: ' + str(value))
+ raise AirflowException('Unexpected value: ' + str(value))
@classmethod
- def convert_array_types(cls, name, value):
- return [cls.convert_value(name, nested_value) for nested_value in
value]
+ def convert_array_types(cls, value: Union[list, SortedSet]) -> List:
+ """
+ Maps convert_value over array.
+ """
+ return [cls.convert_value(nested_value) for nested_value in value]
@classmethod
- def convert_user_type(cls, name, value):
+ def convert_user_type(cls, value):
"""
Converts a user type to RECORD that contains n fields, where n is the
number of attributes. Each element in the user type class will be
converted to its
corresponding data type in BQ.
"""
names = value._fields
- values = [cls.convert_value(name, getattr(value, name)) for name in
names]
+ values = [cls.convert_value(getattr(value, name)) for name in names]
return cls.generate_data_dict(names, values)
@classmethod
- def convert_tuple_type(cls, name, value):
+ def convert_tuple_type(cls, values: Tuple):
"""
Converts a tuple to RECORD that contains n fields, each will be
converted
to its corresponding data type in bq and will be named
'field_<index>', where
index is determined by the order of the tuple elements defined in
cassandra.
"""
- names = ['field_' + str(i) for i in range(len(value))]
- values = [cls.convert_value(name, value) for name, value in zip(names,
value)]
+ names = ['field_' + str(i) for i in range(len(values))]
return cls.generate_data_dict(names, values)
@classmethod
- def convert_map_type(cls, name, value):
+ def convert_map_type(cls, value: OrderedMapSerializedKey) -> List[Dict]:
"""
Converts a map to a repeated RECORD that contains two fields: 'key'
and 'value',
each will be converted to its corresponding data type in BQ.
"""
converted_map = []
for k, v in zip(value.keys(), value.values()):
converted_map.append({
- 'key': cls.convert_value('key', k),
- 'value': cls.convert_value('value', v)
+ 'key': cls.convert_value(k),
+ 'value': cls.convert_value(v)
})
return converted_map
@classmethod
- def generate_schema_dict(cls, name, type):
- field_schema = dict()
+ def generate_schema_dict(cls, name: str, type_) -> Dict:
+ """
+ Generates BQ schema.
+ """
+ field_schema: Dict[str, Any] = dict()
field_schema.update({'name': name})
- field_schema.update({'type': cls.get_bq_type(type)})
- field_schema.update({'mode': cls.get_bq_mode(type)})
- fields = cls.get_bq_fields(name, type)
+ field_schema.update({'type_': cls.get_bq_type(type_)})
+ field_schema.update({'mode': cls.get_bq_mode(type_)})
+ fields = cls.get_bq_fields(type_)
if fields:
field_schema.update({'fields': fields})
return field_schema
@classmethod
- def get_bq_fields(cls, name, type):
- fields = []
-
- if not cls.is_simple_type(type):
- names, types = [], []
-
- if cls.is_array_type(type) and
cls.is_record_type(type.subtypes[0]):
- names = type.subtypes[0].fieldnames
- types = type.subtypes[0].subtypes
- elif cls.is_record_type(type):
- names = type.fieldnames
- types = type.subtypes
-
- if types and not names and type.cassname == 'TupleType':
- names = ['field_' + str(i) for i in range(len(types))]
- elif types and not names and type.cassname == 'MapType':
- names = ['key', 'value']
-
- for name, type in zip(names, types):
- field = cls.generate_schema_dict(name, type)
- fields.append(field)
-
- return fields
-
- @classmethod
- def is_simple_type(cls, type):
- return type.cassname in
CassandraToGoogleCloudStorageOperator.CQL_TYPE_MAP
+ def get_bq_fields(cls, type_) -> List[Dict]:
Review comment:
```suggestion
def get_bq_fields(cls, type_: Any) -> List[Dict[str, Any]]:
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services