diff --git a/daft/dataframe/dataframe.py b/daft/dataframe/dataframe.py index f9bed61f1e..88fee845ec 100644 --- a/daft/dataframe/dataframe.py +++ b/daft/dataframe/dataframe.py @@ -268,7 +268,7 @@ def iter_rows( entire partitions of data, see: :meth:`df.iter_partitions() `. By default, Daft will convert the columns to Python lists for easy consumption. However, for nested data such as List or Struct arrays, this can be expensive. - You may wish to set `column_format` to "arrow" such that the nested data is returned as an Arrow array. + You may wish to set `column_format` to "arrow" such that the nested data is returned as Arrow scalars. .. NOTE:: A quick note on configuring asynchronous/parallel execution using `results_buffer_size`. @@ -296,7 +296,7 @@ def iter_rows( Args: results_buffer_size: how many partitions to allow in the results buffer (defaults to the total number of CPUs available on the machine). - column_format: the format of the columns to iterate over. One of "python", "arrow", or "numpy". Defaults to "python". + column_format: the format of the columns to iterate over. One of "python" or "arrow". Defaults to "python". .. seealso:: :meth:`df.iter_partitions() `: iterator over entire partitions instead of single rows diff --git a/tests/dataframe/test_iter.py b/tests/dataframe/test_iter.py index 51fd30269f..43b5ab3f51 100644 --- a/tests/dataframe/test_iter.py +++ b/tests/dataframe/test_iter.py @@ -101,18 +101,11 @@ def compare_values(v1, v2): assert compare_values(actual_row, expected_row) -@pytest.mark.parametrize( - "format", - [ - "arrow", - "numpy", - ], -) -def test_iter_rows_column_format_not_compatible(format): +def test_iter_rows_arrow_column_format_not_compatible(): df = daft.from_pydict({"a": [object()]}) # Object type is not supported by arrow or numpy with pytest.raises(ValueError): - list(df.iter_rows(column_format=format)) + list(df.iter_rows(column_format="arrow")) @pytest.mark.parametrize("materialized", [False, True])