Skip to content

Commit

Permalink
fixup!
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Mar 16, 2024
1 parent 7758a2b commit cc3106a
Show file tree
Hide file tree
Showing 7 changed files with 115 additions and 100 deletions.
11 changes: 6 additions & 5 deletions demo.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# ruff: noqa
# type: ignore
from typing import Any
import polars as pl
import modin.pandas as mpd
# import modin.pandas as mpd

import narwhals as nw


def func(df_raw: nw.typing.T) -> nw.typing.T:
df: nw.DataFrame[nw.typing.T] = nw.DataFrame(df_raw)
def func(df_raw):
df = nw.DataFrame(df_raw)
res = df.with_columns(
d=nw.col("a") + 1,
e=nw.col("a") + nw.col("b"),
Expand All @@ -24,8 +25,8 @@ def func(df_raw: nw.typing.T) -> nw.typing.T:

df = pd.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
print(func(df))
df = mpd.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
print(func(df))
# df = mpd.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
# print(func(df))
df = pl.DataFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
print(func(df))
df = pl.LazyFrame({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
Expand Down
2 changes: 2 additions & 0 deletions narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from narwhals.containers import is_polars
from narwhals.containers import is_series
from narwhals.dataframe import DataFrame
from narwhals.dataframe import LazyFrame
from narwhals.dtypes import * # noqa: F403
from narwhals.expression import all
from narwhals.expression import col
Expand Down Expand Up @@ -34,5 +35,6 @@
"sum",
"sum_horizontal",
"DataFrame",
"LazyFrame",
"Series",
]
154 changes: 81 additions & 73 deletions narwhals/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,40 +36,14 @@ def _validate_features(df: Any, features: set[str]) -> None:
raise TypeError(msg)


class DataFrame(Generic[T]):
def __init__(
self,
df: T,
*,
features: Iterable[str] | None = None,
implementation: str | None = None,
) -> None:
self._features: set[str] = set(features) if features is not None else set()
if implementation is not None:
self._dataframe: Any = df
self._implementation = implementation
return
if (pl := get_polars()) is not None and isinstance(
df, (pl.DataFrame, pl.LazyFrame)
):
self._dataframe = df
self._implementation = "polars"
elif (pd := get_pandas()) is not None and isinstance(df, pd.DataFrame):
self._dataframe = PandasDataFrame(df, implementation="pandas")
self._implementation = "pandas"
elif (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame):
self._dataframe = PandasDataFrame(df, implementation="modin")
self._implementation = "modin"
else:
msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(df)}"
raise TypeError(msg)
_validate_features(self._dataframe, self._features)
class BaseFrame(Generic[T]):
_dataframe: Any
_implementation: str

def _from_dataframe(self, df: Any) -> Self:
# construct, preserving properties
return self.__class__(
return self.__class__( # type: ignore[call-arg]
df,
features=self._features,
implementation=self._implementation,
)

Expand All @@ -86,7 +60,7 @@ def _extract_native(self, arg: Any) -> Any:

if self._implementation != "polars":
return arg
if isinstance(arg, DataFrame):
if isinstance(arg, BaseFrame):
return arg._dataframe
if isinstance(arg, Series):
return arg._series
Expand Down Expand Up @@ -121,23 +95,6 @@ def schema(self) -> dict[str, DType]:
def columns(self) -> list[str]:
return self._dataframe.columns # type: ignore[no-any-return]

@property
def shape(self) -> tuple[int, int]:
if "eager" not in self._features:
raise RuntimeError(
"`DataFrame.shape` can only be called when feature 'eager' is enabled"
)
return self._dataframe.shape # type: ignore[no-any-return]

def __getitem__(self, col_name: str) -> Series[Any]:
from narwhals.series import Series

if "eager" not in self._features:
raise RuntimeError(
"`DataFrame.__getitem__` can only be called when feature 'eager' is enabled"
)
return Series(self._dataframe[col_name], implementation=self._implementation)

def with_columns(
self, *exprs: IntoExpr | Iterable[IntoExpr], **named_exprs: IntoExpr
) -> Self:
Expand Down Expand Up @@ -168,7 +125,8 @@ def filter(self, *predicates: IntoExpr | Iterable[IntoExpr]) -> Self:
def group_by(self, *keys: str | Iterable[str]) -> GroupBy[T]:
from narwhals.group_by import GroupBy

return GroupBy(self, *keys)
# todo: groupby and lazygroupby
return GroupBy(self, *keys) # type: ignore[arg-type]

def sort(
self,
Expand All @@ -180,22 +138,6 @@ def sort(
self._dataframe.sort(by, *more_by, descending=descending)
)

def collect(self) -> Self:
if "lazy" not in self._features:
raise RuntimeError(
"`DataFrame.collect` can only be called when feature 'lazy' is enabled"
)
features = {f for f in self._features if f != "lazy"}
features.add("eager")
return self.__class__(
self._dataframe.collect(),
implementation=self._implementation,
features=features,
)

def to_dict(self, *, as_series: bool = True) -> dict[str, Any]:
return self._dataframe.to_dict(as_series=as_series) # type: ignore[no-any-return]

def join(
self,
other: Self,
Expand All @@ -213,16 +155,82 @@ def join(
)
)

def to_pandas(self) -> Any:
if "eager" not in self._features:
raise RuntimeError(
"`DataFrame.to_pandas` can only be called when feature 'eager' is enabled"

class DataFrame(BaseFrame[T]):
def __init__(
self,
df: T,
*,
implementation: str | None = None,
) -> None:
if implementation is not None:
self._dataframe: Any = df
self._implementation = implementation
return
if (pl := get_polars()) is not None and isinstance(df, pl.DataFrame):
self._dataframe = df
self._implementation = "polars"
elif (pl := get_polars()) is not None and isinstance(df, pl.LazyFrame):
raise TypeError(
"Can't instantiate DataFrame from Polars LazyFrame. Call `collect()` first, or use `narwhals.LazyFrame` if you don't specifically require eager execution."
)
elif (pd := get_pandas()) is not None and isinstance(df, pd.DataFrame):
self._dataframe = PandasDataFrame(df, implementation="pandas")
self._implementation = "pandas"
elif (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame):
self._dataframe = PandasDataFrame(df, implementation="modin")
self._implementation = "modin"
else:
msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(df)}"
raise TypeError(msg)

def to_pandas(self) -> Any:
return self._dataframe.to_pandas()

def to_numpy(self) -> Any:
if "eager" not in self._features:
raise RuntimeError(
"`DataFrame.to_numpy` can only be called when feature 'eager' is enabled"
)
return self._dataframe.to_numpy()

@property
def shape(self) -> tuple[int, int]:
return self._dataframe.shape # type: ignore[no-any-return]

def __getitem__(self, col_name: str) -> Series[Any]:
from narwhals.series import Series

return Series(self._dataframe[col_name], implementation=self._implementation)

def to_dict(self, *, as_series: bool = True) -> dict[str, Any]:
return self._dataframe.to_dict(as_series=as_series) # type: ignore[no-any-return]


class LazyFrame(BaseFrame[T]):
def __init__(
self,
df: T,
*,
implementation: str | None = None,
) -> None:
if implementation is not None:
self._dataframe: Any = df
self._implementation = implementation
return
if (pl := get_polars()) is not None and isinstance(
df, (pl.DataFrame, pl.LazyFrame)
):
self._dataframe = df.lazy()
self._implementation = "polars"
elif (pd := get_pandas()) is not None and isinstance(df, pd.DataFrame):
self._dataframe = PandasDataFrame(df, implementation="pandas")
self._implementation = "pandas"
elif (mpd := get_modin()) is not None and isinstance(df, mpd.DataFrame):
self._dataframe = PandasDataFrame(df, implementation="modin")
self._implementation = "modin"
else:
msg = f"Expected pandas-like dataframe, Polars dataframe, or Polars lazyframe, got: {type(df)}"
raise TypeError(msg)

def collect(self) -> DataFrame[Any]:
return DataFrame(
self._dataframe.collect(),
implementation=self._implementation,
)
10 changes: 7 additions & 3 deletions narwhals/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,25 @@

if TYPE_CHECKING:
from narwhals.dataframe import DataFrame
from narwhals.dataframe import LazyFrame
from narwhals.typing import IntoExpr
from narwhals.typing import T

# todo: make groupby and lazygroupby


class GroupBy(Generic[T]):
def __init__(self, df: DataFrame[T], *keys: str | Iterable[str]) -> None:
def __init__(
self, df: DataFrame[T] | LazyFrame[T], *keys: str | Iterable[str]
) -> None:
self._df = df
self._keys = keys

def agg(
self, *aggs: IntoExpr | Iterable[IntoExpr], **named_aggs: IntoExpr
) -> DataFrame[T]:
) -> DataFrame[T] | LazyFrame[T]:
aggs, named_aggs = self._df._flatten_and_extract(*aggs, **named_aggs)
return self._df.__class__(
self._df._dataframe.group_by(*self._keys).agg(*aggs, **named_aggs),
implementation=self._df._implementation,
features=self._df._features,
)
8 changes: 4 additions & 4 deletions narwhals/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@
from narwhals.dependencies import get_polars

if TYPE_CHECKING:
from narwhals.dataframe import DataFrame
from narwhals.dataframe import BaseFrame
from narwhals.series import Series
from narwhals.typing import T


def to_native(obj: DataFrame[T] | Series[T]) -> T:
from narwhals.dataframe import DataFrame
def to_native(obj: BaseFrame[T] | Series[T]) -> T:
from narwhals.dataframe import BaseFrame
from narwhals.series import Series

if isinstance(obj, DataFrame):
if isinstance(obj, BaseFrame):
return ( # type: ignore[no-any-return]
obj._dataframe
if obj._implementation == "polars"
Expand Down
Loading

0 comments on commit cc3106a

Please sign in to comment.