diff --git a/src/ydata_profiling/model/pandas/describe_numeric_pandas.py b/src/ydata_profiling/model/pandas/describe_numeric_pandas.py index 5dc8baee2..e4e13f156 100644 --- a/src/ydata_profiling/model/pandas/describe_numeric_pandas.py +++ b/src/ydata_profiling/model/pandas/describe_numeric_pandas.py @@ -50,19 +50,30 @@ def numeric_stats_numpy( index_values = vc.index.values # FIXME: can be performance optimized by using weights in std, var, kurt and skew... - - return { - "mean": np.average(index_values, weights=vc.values), - "std": np.std(present_values, ddof=1), - "variance": np.var(present_values, ddof=1), - "min": np.min(index_values), - "max": np.max(index_values), - # Unbiased kurtosis obtained using Fisher's definition (kurtosis of normal == 0.0). Normalized by N-1. - "kurtosis": series.kurt(), - # Unbiased skew normalized by N-1 - "skewness": series.skew(), - "sum": np.dot(index_values, vc.values), - } + if len(index_values): + return { + "mean": np.average(index_values, weights=vc.values), + "std": np.std(present_values, ddof=1), + "variance": np.var(present_values, ddof=1), + "min": np.min(index_values), + "max": np.max(index_values), + # Unbiased kurtosis obtained using Fisher's definition (kurtosis of normal == 0.0). Normalized by N-1. + "kurtosis": series.kurt(), + # Unbiased skew normalized by N-1 + "skewness": series.skew(), + "sum": np.dot(index_values, vc.values), + } + else: # Empty numerical series + return { + "mean": np.nan, + "std": 0., + "variance": 0., + "min": np.nan, + "max": np.nan, + "kurtosis": 0., + "skewness": 0., + "sum": 0, + } @describe_numeric_1d.register @@ -151,13 +162,14 @@ def pandas_describe_numeric_1d( else: stats["monotonic"] = 0 - stats.update( - histogram_compute( - config, - value_counts[~infinity_index].index.values, - summary["n_distinct"], - weights=value_counts[~infinity_index].values, + if len(value_counts[~infinity_index].index.values) > 0: + stats.update( + histogram_compute( + config, + value_counts[~infinity_index].index.values, + summary["n_distinct"], + weights=value_counts[~infinity_index].values, + ) ) - ) return config, series, stats diff --git a/src/ydata_profiling/report/structure/variables/render_real.py b/src/ydata_profiling/report/structure/variables/render_real.py index 471ee5844..607b4e064 100644 --- a/src/ydata_profiling/report/structure/variables/render_real.py +++ b/src/ydata_profiling/report/structure/variables/render_real.py @@ -118,12 +118,12 @@ def render_real(config: Settings, summary: dict) -> dict: style=config.html.style, ) - if isinstance(summary["histogram"], list): + if isinstance(summary.get("histogram", []), list): mini_histo = Image( mini_histogram( config, - [x[0] for x in summary["histogram"]], - [x[1] for x in summary["histogram"]], + [x[0] for x in summary.get("histogram", [])], + [x[1] for x in summary.get("histogram", [])], ), image_format=image_format, alt="Mini histogram", @@ -243,13 +243,14 @@ def render_real(config: Settings, summary: dict) -> dict: sequence_type="grid", ) - if isinstance(summary["histogram"], list): + if isinstance(summary.get("histogram", []), list): hist_data = histogram( config, - [x[0] for x in summary["histogram"]], - [x[1] for x in summary["histogram"]], + [x[0] for x in summary.get("histogram", [])], + [x[1] for x in summary.get("histogram", [])], ) - hist_caption = f"Histogram with fixed size bins (bins={len(summary['histogram'][0][1]) - 1})" + bins = len(summary['histogram'][0][1]) - 1 if 'histogram' in summary else 0 + hist_caption = f"Histogram with fixed size bins (bins={bins})" else: hist_data = histogram(config, *summary["histogram"]) hist_caption = f"Histogram with fixed size bins (bins={len(summary['histogram'][1]) - 1})" diff --git a/src/ydata_profiling/visualisation/plot.py b/src/ydata_profiling/visualisation/plot.py index 34ed7d133..88c38b98d 100644 --- a/src/ydata_profiling/visualisation/plot.py +++ b/src/ydata_profiling/visualisation/plot.py @@ -73,14 +73,15 @@ def _plot_histogram( plot = fig.add_subplot(111) for idx in reversed(list(range(n_labels))): - diff = np.diff(bins[idx]) - plot.bar( - bins[idx][:-1] + diff / 2, # type: ignore - series[idx], - diff, - facecolor=config.html.style.primary_colors[idx], - alpha=0.6, - ) + if len(bins): + diff = np.diff(bins[idx]) + plot.bar( + bins[idx][:-1] + diff / 2, # type: ignore + series[idx], + diff, + facecolor=config.html.style.primary_colors[idx], + alpha=0.6, + ) if date: plot.xaxis.set_major_formatter(FuncFormatter(format_fn))