sunjincheng121 commented on a change in pull request #8474:
[FLINK-12409][python] Adds from_elements in TableEnvironment
URL: https://github.com/apache/flink/pull/8474#discussion_r287658543
##########
File path: flink-python/pyflink/table/table_environment.py
##########
@@ -379,13 +383,97 @@ def create(cls, table_config):
return t_env
+ def from_elements(self, elements, schema=None, verify_schema=True):
+ """
+ Creates a table from a collection of elements.
+
+ :param elements: The elements to create table from.
+ :param schema: The schema of the table.
+ :param verify_schema: Whether to verify the elements against the
schema.
+ :return: A Table.
+ """
+
+ # verifies the elements against the specified schema
+ if isinstance(schema, RowType):
+ verify_func = _create_type_verifier(schema) if verify_schema else
lambda _: True
+
+ def verify_obj(obj):
+ verify_func(obj)
+ return obj
+ elif isinstance(schema, DataType):
+ data_type = schema
+ schema = RowType().add("value", schema)
+
+ verify_func = _create_type_verifier(
+ data_type, name="field value") if verify_schema else lambda _:
True
+
+ def verify_obj(obj):
+ verify_func(obj)
+ return obj
+ else:
+ def verify_obj(obj):
+ return obj
+
+ # makes sure we distribute data evenly if it's smaller than
self.batchSize
+ if "__len__" not in dir(elements):
+ elements = list(elements) # Makes it a list so we can compute its
length
+
+ # infers the schema if not specified
+ if schema is None or isinstance(schema, (list, tuple)):
+ schema = _infer_schema_from_data(elements, names=schema)
+ converter = _create_converter(schema)
+ elements = map(converter, elements)
+ if isinstance(schema, (list, tuple)):
+ for i, name in enumerate(schema):
+ schema.fields[i].name = name
+ schema.names[i] = name
+
+ elif not isinstance(schema, RowType):
+ raise TypeError(
+ "schema should be RowType, list, tuple or None, but got: %s" %
schema)
+
+ # converts python data to sql data
+ elements = [schema.to_sql_type(element) for element in elements]
+ return self._from_elements(map(verify_obj, elements), schema)
+
+ def _from_elements(self, elements, schema):
+ """
+ Creates a table from a collection of elements.
+
+ :param elements: The elements to create table from.
+ :return: A table.
+ """
+
+ # serializes to a file, and we read the file in java
+ temp_file = tempfile.NamedTemporaryFile(delete=False,
dir=tempfile.mkdtemp())
Review comment:
+1 to serializes to a file. due to using py4j to send a large dataset to the
JVM is really slow.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services