Update dataframe_passing.py (#267)

flyteorg · Jun 4, 2021 · 52e4276 · 52e4276
1 parent 2c08c02
commit 52e4276
Showing 1 changed file with 8 additions and 7 deletions.
diff --git a/cookbook/integrations/kubernetes/k8s_spark/dataframe_passing.py b/cookbook/integrations/kubernetes/k8s_spark/dataframe_passing.py
@@ -1,8 +1,8 @@
 """
 .. _intermediate_spark_dataframes_passing:
 
-Pass pandas dataframes
-===========================
+Converting a Spark DataFrame to a Pandas DataFrame
+==================================================
 
 This example shows how users can return a spark.Dataset from a task and consume it as a pandas.DataFrame.
 If the dataframe does not fit in memory, it will result in a runtime failure.
@@ -17,12 +17,13 @@
 # .. _df_my_schema_definition:
 #
 # Define my_schema
-# --------------------
+# -----------------
 # This section defines a simple schema type with 2 columns, `name: str` and `age: int`
 my_schema = FlyteSchema[kwtypes(name=str, age=int)]
 
 # %%
-# ``create_spark_df`` is a spark task, that runs within a spark cotext (relies on having spark cluster up and running). This task generates a spark DataFrame whose schema matches the predefined :any:`df_my_schema_definition`
+# ``create_spark_df`` is a spark task that runs within a spark cotext (and relies on having a spark cluster up and running). This task generates a spark DataFrame whose schema matches the predefined :any:`df_my_schema_definition`
+# 
 # Notice that the task simply returns a pyspark.DataFrame object, even though the return type specifies  :any:`df_my_schema_definition`
 # The flytekit type-system will automatically convert the pyspark.DataFrame to Flyte Schema object.
 # FlyteSchema object is an abstract representation of a DataFrame, that can conform to multiple different dataframe formats.
@@ -55,7 +56,7 @@ def create_spark_df() -> my_schema:
 # The task ``sum_of_all_ages`` receives a parameter of type :any:`df_my_schema_definition`. It is important to note that there is no
 # expectation that the schema is a pandas dataframe or a spark dataframe, but just a generic schema object. The Flytekit schema object
 # can be read into multiple formats using the ``open()`` method. Default conversion is to :py:class:`pandas.DataFrame`
-# Refer to :py:class:`flytekit.types.schema.FlyteSchema` for more details
+# Refer to :py:class:`flytekit.types.schema.FlyteSchema` for more details.
 #
 @task(cache_version="1")
 def sum_of_all_ages(s: my_schema) -> int:
@@ -84,8 +85,8 @@ def my_smart_schema() -> int:
 
 
 # %%
-# This program can be executed locally and it should work as expected and this greatly simplifies usage of disparate DataFrame technologies for the end user.
-# Also new DataFrame technologies can be dynamically loaded in flytekit's TypeEngine. More on this later.
+# This program can be executed locally and it should work as expected. This greatly simplifies using disparate DataFrame technologies for the end user.
+# New DataFrame technologies can also be dynamically loaded in flytekit's TypeEngine.
 if __name__ == "__main__":
     """
     This program can be run locally