Skip to content

Commit

Permalink
fix issue with odd column names (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
mplatzer authored Nov 20, 2024
1 parent 8276b05 commit 80982ca
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/mostlyai/qa/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from sklearn.decomposition import PCA


_OLD_COL_PREFIX = r"(\w+)\."
_OLD_COL_PREFIX = r"^(tgt|ctx|nxt)\."
_NEW_COL_PREFIX = r"\1⁝"


Expand Down
22 changes: 22 additions & 0 deletions tests/end_to_end/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,3 +224,25 @@ def test_report_sequential_few_records(tmp_path):
ctx_primary_key="id",
)
assert metrics is not None


def test_odd_column_names(tmp_path):
values = ["a", "b"] * 50
df = pd.DataFrame(
{
"some.test": values,
"foo%bar|this-long{c[u]rly} *": values,
"3": values,
}
)
path, metrics = report(
syn_tgt_data=df,
trn_tgt_data=df,
statistics_path=tmp_path / "stats",
)
assert metrics is not None
path = report_from_statistics(
syn_tgt_data=df,
statistics_path=tmp_path / "stats",
)
assert path is not None

0 comments on commit 80982ca

Please sign in to comment.