diff --git a/src/skrubview/_data/templates/base.css b/src/skrubview/_data/templates/base.css index 9d00efa..8e5e37f 100644 --- a/src/skrubview/_data/templates/base.css +++ b/src/skrubview/_data/templates/base.css @@ -55,6 +55,11 @@ font: inherit; } +{{ report_id_selector }}.skrubview-report :is(td, th) { + white-space: nowrap; +} + + {{ report_id_selector }}.skrubview-report .skrubview-horizontal-scroll { overflow-x: auto; } @@ -77,6 +82,11 @@ max-width: var(--skrubview-max-content-width); } +{{ report_id_selector }}.skrubview-report .skrubview-flex { + display: flex; +} + + {{ report_id_selector }}.skrubview-report .skrubview-card { padding: 0; @@ -116,9 +126,25 @@ {{ report_id_selector }}.skrubview-report .skrubview-box { border: 1px solid #555555; padding: var(--skrubview-tiny); + min-width: 1rem; max-width: 40rem; max-height: 10rem; overflow: auto; + margin-block-start: var(--skrubview-tiny); + margin-block-end: var(--skrubview-tiny); +} + +{{ report_id_selector }}.skrubview-report .skrubview-table-value-box { + max-width: 100%; +} + +{{ report_id_selector }}.skrubview-report .skrubview-table-value-box[data-shows-placeholder] { + color: #777777; +} + +{{ report_id_selector }}.skrubview-report .skrubview-table-cell[data-is-selected] { + background-color: #ffffaa; + border: 1px solid black; } {{ report_id_selector }}.skrubview-report :is(.skrubview-ok, .skrubview-warning, .skrubview-critical) { diff --git a/src/skrubview/_data/templates/dataframe-columns.html b/src/skrubview/_data/templates/dataframe-columns.html index 1f1c735..fdc2896 100644 --- a/src/skrubview/_data/templates/dataframe-columns.html +++ b/src/skrubview/_data/templates/dataframe-columns.html @@ -24,7 +24,7 @@

Constant columns

-
+

{% for column in summary.columns %} diff --git a/src/skrubview/_data/templates/dataframe-sample.html b/src/skrubview/_data/templates/dataframe-sample.html index bcb96cd..6f2bd9c 100644 --- a/src/skrubview/_data/templates/dataframe-sample.html +++ b/src/skrubview/_data/templates/dataframe-sample.html @@ -1,13 +1,15 @@
-
-

Sample

+
+ +

Sample

+

First 5 rows

-
- {{ summary.head_html | safe }} -
+ {% set dataframe = summary.head %} + {% set dataframe_id = "{}_head".format(report_id) %} + {% include "dataframe-table.html" %}

Last 5 rows

-
- {{ summary.tail_html | safe }} -
+ {% set dataframe = summary.tail %} + {% set dataframe_id = "{}_tail".format(report_id) %} + {% include "dataframe-table.html" %}
diff --git a/src/skrubview/_data/templates/dataframe-table.html b/src/skrubview/_data/templates/dataframe-table.html new file mode 100644 index 0000000..590ccbb --- /dev/null +++ b/src/skrubview/_data/templates/dataframe-table.html @@ -0,0 +1,35 @@ +{% set table_id = "{}_table".format(dataframe_id) %} + +
+ + + + {% for column_name in dataframe.header %} + + {% endfor %} + + + + {% for row in dataframe.data %} + + {% for value in row %} + + {% endfor %} + + {% endfor %} + +
{{ column_name }}
+ {%- if value is not none -%} + {{ value | ellide_string_short }} + {%- endif -%} +
+
+
+
+Select a cell to display its value
+
+
diff --git a/src/skrubview/_data/templates/skrubview.js b/src/skrubview/_data/templates/skrubview.js index 3ff5e3b..f5fe0e9 100644 --- a/src/skrubview/_data/templates/skrubview.js +++ b/src/skrubview/_data/templates/skrubview.js @@ -19,7 +19,9 @@ function updateSelectedColsSnippet(reportId) { function clearSelectedCols(reportId) { const reportElem = document.getElementById(reportId); reportElem.querySelectorAll("input.skrubview-select-column-checkbox[type='checkbox']").forEach( - box => {box.checked = false;} + box => { + box.checked = false; + } ); updateSelectedColsSnippet(reportId); } @@ -27,7 +29,9 @@ function clearSelectedCols(reportId) { function selectAllCols(reportId) { const reportElem = document.getElementById(reportId); reportElem.querySelectorAll("input.skrubview-select-column-checkbox[type='checkbox']").forEach( - box => {box.checked = true;} + box => { + box.checked = true; + } ); updateSelectedColsSnippet(reportId); } @@ -38,8 +42,7 @@ function copyTextToClipboard(elementID) { elem.setAttribute("data-is-being-copied", ""); if (navigator.clipboard) { navigator.clipboard.writeText(elem.textContent || ""); - } - else { + } else { const selection = window.getSelection(); if (selection == null) { return; @@ -56,3 +59,17 @@ function copyTextToClipboard(elementID) { elem.removeAttribute("data-is-being-copied"); }, 200); } + +function displayValue(event) { + const elem = event.target; + const table = document.getElementById(elem.dataset.parentTableId); + table.querySelectorAll(".skrubview-table-cell").forEach(cell => { + cell.removeAttribute("data-is-selected"); + }); + elem.setAttribute("data-is-selected", ""); + const data = elem.dataset.cellValue; + const displayBoxId = elem.dataset.displayBoxId; + const displayBox = document.getElementById(displayBoxId); + displayBox.removeAttribute("data-shows-placeholder"); + displayBox.textContent = data; +} diff --git a/src/skrubview/_html.py b/src/skrubview/_html.py index f76f589..fd5ccde 100644 --- a/src/skrubview/_html.py +++ b/src/skrubview/_html.py @@ -14,9 +14,14 @@ def _get_jinja_env(): ), autoescape=True, ) - env.filters["format_number"] = _utils.format_number - env.filters["format_percent"] = _utils.format_percent - env.filters["svg_to_img_src"] = _utils.svg_to_img_src + for function_name in [ + "format_number", + "format_percent", + "svg_to_img_src", + "ellide_string_short", + "ellide_string", + ]: + env.filters[function_name] = getattr(_utils, function_name) return env diff --git a/src/skrubview/_plotting.py b/src/skrubview/_plotting.py index 033428d..853dcd9 100644 --- a/src/skrubview/_plotting.py +++ b/src/skrubview/_plotting.py @@ -68,7 +68,7 @@ def line(x_col, y_col): def value_counts(value_counts, n_unique, color=COLOR_0): - values = [_utils.ellide_string(s, 30) for s in value_counts.keys()][::-1] + values = [_utils.ellide_string_short(s) for s in value_counts.keys()][::-1] counts = list(value_counts.values())[::-1] height = 0.2 * (len(value_counts) + 1.1) if n_unique > len(value_counts): diff --git a/src/skrubview/_summarize.py b/src/skrubview/_summarize.py index 4916928..e0b15b2 100644 --- a/src/skrubview/_summarize.py +++ b/src/skrubview/_summarize.py @@ -23,8 +23,8 @@ def summarize_dataframe( "n_rows": int(shape[0]), "n_columns": int(shape[1]), "columns": [], - "head_html": _utils.to_html(df.slice_rows(0, 5, 1)), - "tail_html": _utils.to_html(df.slice_rows(-5, None, 1)), + "head": _utils.to_row_list(df.slice_rows(0, 5, 1)), + "tail": _utils.to_row_list(df.slice_rows(-5, None, 1)), "first_row_dict": _utils.first_row_dict(df), } if title is not None: @@ -139,7 +139,8 @@ def _add_numeric_summary(summary, column, with_plots, order_by_column): return if not summary["high_cardinality"]: return - summary["standard_deviation"] = float(column.std()) + std = column.std().scalar + summary["standard_deviation"] = float("nan") if std is None else float(std) summary["mean"] = float(column.mean()) quantiles = _utils.quantiles(column) if quantiles[0.0] == quantiles[1.0]: diff --git a/src/skrubview/_utils.py b/src/skrubview/_utils.py index a411769..f8df375 100644 --- a/src/skrubview/_utils.py +++ b/src/skrubview/_utils.py @@ -69,6 +69,14 @@ def first_row_dict(dataframe): return {c: first_row.col(c).to_array().tolist()[0] for c in first_row.column_names} +def to_row_list(dataframe): + columns = dataframe.dataframe.to_dict() + rows = [] + for row_idx in range(dataframe.shape()[0]): + rows.append([col[row_idx] for col in columns.values()]) + return {"header": list(columns.keys()), "data": rows} + + def value_counts(column, high_cardinality_threshold): series = column.column value_counts = series.value_counts() @@ -99,11 +107,14 @@ def ellide_string(s, max_len=100): return s if len(s) <= max_len: return s - if max_len > 30: + if 30 <= max_len: truncated = len(s) - max_len return s[: (max_len - 30)] + f"[… {truncated} more chars]" return s[:max_len] + "…" +def ellide_string_short(s): + return ellide_string(s, 29) + def format_number(number): if not isinstance(number, float): @@ -116,6 +127,7 @@ def format_percent(proportion): return "< 0.1%" return f"{proportion:0.1%}" + def svg_to_img_src(svg): encoded_svg = base64.b64encode(svg.encode("UTF-8")).decode("UTF-8") return f"data:image/svg+xml;base64,{encoded_svg}"