Skip to content
This repository has been archived by the owner on Aug 1, 2024. It is now read-only.

Commit

Permalink
remove sample values & show most frequent
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromedockes committed Jan 12, 2024
1 parent 3c5c7cd commit f0bfd0c
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 20 deletions.
8 changes: 8 additions & 0 deletions src/skrubview/_data/templates/base.css
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,14 @@
font-size: var(--skrubview-large);
}

{{ report_id_selector }}.skrubview-report .skrubview-box {
border: 1px solid #555555;
padding: var(--skrubview-tiny);
max-width: 40rem;
max-height: 10rem;
overflow: auto;
}

{{ report_id_selector }}.skrubview-report :is(.skrubview-ok, .skrubview-warning, .skrubview-critical) {
font-weight: bold;
}
Expand Down
10 changes: 10 additions & 0 deletions src/skrubview/_data/templates/column-summary.html
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,17 @@ <h3 class="skrubview-margin-r-t">
</div>

{% for plot_name in column.plot_names %}
<div>
<img class="pure-img" src="{{ column[plot_name] | svg_to_img_src | safe }}" alt={{ plot_name }} />
{% if plot_name == "value_counts_plot" and column.dtype == "String" %}
<details>
<summary>Most frequent values</summary>
{% for value in column.value_counts %}
<pre class="skrubview-box">{{ value }}</pre>
{% endfor %}
</details>
{% endif %}
</div>
{% endfor %}

</div>
Expand Down
15 changes: 0 additions & 15 deletions src/skrubview/_summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def _summarize_column(
"value_is_constant": False,
}
_add_nulls_summary(summary, column, dataframe_summary=dataframe_summary)
_add_sample_values(summary, column)
_add_value_counts(
summary, column, dataframe_summary=dataframe_summary, with_plots=with_plots
)
Expand All @@ -86,20 +85,6 @@ def _add_nulls_summary(summary, column, dataframe_summary):
summary["nulls_level"] = "warning"


def _add_sample_values(summary, column):
rng = np.random.default_rng(0)
non_missing = column.filter(~column.is_null())
n_non_missing = int(non_missing.len())
if n_non_missing == 0:
return
size = min(n_non_missing, 5)
sample_indices = sorted(rng.choice(range(n_non_missing), replace=False, size=size))
ns = column.__column_namespace__()
sample_indices = ns.column_from_sequence(sample_indices)
sample_values = np.asarray(non_missing.take(sample_indices).to_array()).tolist()
summary["sample_values"] = list(map(_utils.ellide_string, sample_values))


def _add_value_counts(summary, column, *, dataframe_summary, with_plots):
ns = column.__column_namespace__()
dtype = _utils.get_dtype(column)
Expand Down
10 changes: 5 additions & 5 deletions src/skrubview/_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def _print_summary(summary, console):

def _print_first_row(summary, console):
console.print("First row:")
console.print(summary["first_row_dict"])
console.print({k: _utils.ellide_string(v) for (k, v) in summary["first_row_dict"].items()})

def _print_constant_columns(summary, console):
cols = [col for col in summary["columns"] if col.get("value_is_constant")]
Expand Down Expand Up @@ -72,10 +72,10 @@ def _print_column_summary(summary, console):
text.append(f"[{color}]{summary['null_proportion']:0.2%}[/{color}]\n")
if "n_unique" in summary:
text.append(f"Unique values: {summary['n_unique']}\n")
if not summary["high_cardinality"]:
text.append(f"Value counts: {summary['value_counts']}\n")
elif summary["dtype"] == "String":
text.append(f"Sample values: {list(summary['sample_values'])}\n")
if "value_counts" in summary:
# TODO in theory ellide_string could create collisions
ellided = {_utils.ellide_string(k): v for (k, v) in summary["value_counts"].items()}
text.append(f"Most frequent value counts: {ellided}\n")
if "mean" in summary:
text.append(
f"Mean: {summary['mean']:#0.3g} "
Expand Down

0 comments on commit f0bfd0c

Please sign in to comment.