Skip to content

Commit

Permalink
fix: add validation to avoid zero division for cat colums with only m…
Browse files Browse the repository at this point in the history
…issing data
  • Loading branch information
alexbarros committed Apr 3, 2024
1 parent 0701737 commit fe2ebda
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -195,10 +195,12 @@ def length_summary_vc(vc: pd.Series) -> dict:

summary = {
"max_length": np.max(length_counts.index),
"mean_length": np.average(length_counts.index, weights=length_counts.values),
"mean_length": np.average(
length_counts.index, weights=length_counts.values
) if not length_counts.empty else np.nan,
"median_length": weighted_median(
length_counts.index.values, weights=length_counts.values
),
) if not length_counts.empty else np.nan,
"min_length": np.min(length_counts.index),
"length_histogram": length_counts,
}
Expand Down
4 changes: 4 additions & 0 deletions src/ydata_profiling/model/summary_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def histogram_compute(
weights: Optional[np.ndarray] = None,
) -> dict:
stats = {}
if len(finite_values) == 0:
return {name: []}
hist_config = config.plot.histogram
bins_arg = "auto" if hist_config.bins == 0 else min(hist_config.bins, n_unique)
bins = np.histogram_bin_edges(finite_values, bins=bins_arg)
Expand All @@ -54,6 +56,8 @@ def chi_square(
if histogram is None:
bins = np.histogram_bin_edges(values, bins="auto")
histogram, _ = np.histogram(values, bins=bins)
if len(histogram) == 0 or np.sum(histogram) == 0:
return {"statistic": np.nan, "pvalue": np.nan}
return dict(chisquare(histogram)._asdict())


Expand Down

0 comments on commit fe2ebda

Please sign in to comment.