Skip to content

Commit

Permalink
fix: boolean and date failures with empty data
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbarros committed Apr 4, 2024
1 parent 7884e2d commit 2c616c6
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 15 deletions.
15 changes: 11 additions & 4 deletions src/ydata_profiling/model/pandas/describe_boolean_pandas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Tuple

import pandas as pd
import numpy as np

from ydata_profiling.config import Settings
from ydata_profiling.model.pandas.imbalance_pandas import column_imbalance_score
Expand All @@ -26,9 +27,15 @@ def pandas_describe_boolean_1d(
A dict containing calculated series description values.
"""

value_counts = summary["value_counts_without_nan"]
summary.update({"top": value_counts.index[0], "freq": value_counts.iloc[0]})

summary["imbalance"] = column_imbalance_score(value_counts, len(value_counts))
value_counts: pd.Series = summary["value_counts_without_nan"]
if not value_counts.empty:
summary.update({"top": value_counts.index[0], "freq": value_counts.iloc[0]})
summary["imbalance"] = column_imbalance_score(value_counts, len(value_counts))
else:
summary.update({

Check warning on line 35 in src/ydata_profiling/model/pandas/describe_boolean_pandas.py

View check run for this annotation

Codecov / codecov/patch

src/ydata_profiling/model/pandas/describe_boolean_pandas.py#L35

Added line #L35 was not covered by tests
"top": np.nan,
"freq": 0,
"imbalance": 0,
})

return config, series, summary
30 changes: 20 additions & 10 deletions src/ydata_profiling/model/pandas/describe_date_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,26 @@ def pandas_describe_date_1d(
Returns:
A dict containing calculated series description values.
"""
summary.update(
{
"min": pd.Timestamp.to_pydatetime(series.min()),
"max": pd.Timestamp.to_pydatetime(series.max()),
}
)

summary["range"] = summary["max"] - summary["min"]

values = series.values.astype(np.int64) // 10**9
if summary["value_counts_without_nan"].empty:
values = series.values
summary.update(

Check warning on line 34 in src/ydata_profiling/model/pandas/describe_date_pandas.py

View check run for this annotation

Codecov / codecov/patch

src/ydata_profiling/model/pandas/describe_date_pandas.py#L33-L34

Added lines #L33 - L34 were not covered by tests
{
"min": pd.NaT,
"max": pd.NaT,
"range": 0,
}
)
else:
summary.update(
{
"min": pd.Timestamp.to_pydatetime(series.min()),
"max": pd.Timestamp.to_pydatetime(series.max()),
}
)

summary["range"] = summary["max"] - summary["min"]

values = series.values.astype(np.int64) // 10**9

if config.vars.num.chi_squared_threshold > 0.0:
summary["chi_squared"] = chi_square(values)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,14 @@ def render_date(config: Settings, summary: Dict[str, Any]) -> Dict[str, Any]:
)

# Bottom
n_bins = len(summary['histogram'][1]) - 1 if summary['histogram'] else 0
bottom = Container(
[
Image(
hist_data,
image_format=image_format,
alt="Histogram",
caption=f"<strong>Histogram with fixed size bins</strong> (bins={len(summary['histogram'][1]) - 1})",
caption=f"<strong>Histogram with fixed size bins</strong> (bins={n_bins})",
name="Histogram",
anchor_id=f"{varid}histogram",
)
Expand Down

0 comments on commit 2c616c6

Please sign in to comment.