Github user ueshin commented on a diff in the pull request: https://github.com/apache/spark/pull/19459#discussion_r144827985 --- Diff: python/pyspark/sql/session.py --- @@ -414,6 +415,43 @@ def _createFromLocal(self, data, schema): data = [schema.toInternal(row) for row in data] return self._sc.parallelize(data), schema + def _createFromPandasWithArrow(self, df, schema): + """ + Create a DataFrame from a given pandas.DataFrame by slicing the into partitions, converting + to Arrow data, then reading into the JVM to parallelsize. If a schema is passed in, the --- End diff -- typo: `parallelsize` -> `parallelize`?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org