From 84df8a64ae4095c40f364d990537a5657a8e029d Mon Sep 17 00:00:00 2001 From: Simon Willison Date: Sun, 7 Apr 2024 23:03:40 -0700 Subject: [PATCH] Warn and continue on binary files, closes #5 --- files_to_prompt/cli.py | 38 ++++++++++++++++++++++------------- tests/test_files_to_prompt.py | 24 ++++++++++++++++++++++ 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/files_to_prompt/cli.py b/files_to_prompt/cli.py index ef0b83a..db5c07e 100644 --- a/files_to_prompt/cli.py +++ b/files_to_prompt/cli.py @@ -27,13 +27,17 @@ def process_path( path, include_hidden, ignore_gitignore, gitignore_rules, ignore_patterns ): if os.path.isfile(path): - with open(path, "r") as f: - file_contents = f.read() - click.echo(path) - click.echo("---") - click.echo(file_contents) - click.echo() - click.echo("---") + try: + with open(path, "r") as f: + file_contents = f.read() + click.echo(path) + click.echo("---") + click.echo(file_contents) + click.echo() + click.echo("---") + except UnicodeDecodeError: + warning_message = f"Warning: Skipping file {path} due to UnicodeDecodeError" + click.echo(click.style(warning_message, fg="red"), err=True) elif os.path.isdir(path): for root, dirs, files in os.walk(path): if not include_hidden: @@ -62,14 +66,20 @@ def process_path( for file in files: file_path = os.path.join(root, file) - with open(file_path, "r") as f: - file_contents = f.read() + try: + with open(file_path, "r") as f: + file_contents = f.read() - click.echo(file_path) - click.echo("---") - click.echo(file_contents) - click.echo() - click.echo("---") + click.echo(file_path) + click.echo("---") + click.echo(file_contents) + click.echo() + click.echo("---") + except UnicodeDecodeError: + warning_message = ( + f"Warning: Skipping file {file_path} due to UnicodeDecodeError" + ) + click.echo(click.style(warning_message, fg="red"), err=True) @click.command() diff --git a/tests/test_files_to_prompt.py b/tests/test_files_to_prompt.py index 4b94dd1..5e20af1 100644 --- a/tests/test_files_to_prompt.py +++ b/tests/test_files_to_prompt.py @@ -162,3 +162,27 @@ def test_mixed_paths_with_options(tmpdir): assert "test_dir/.hidden_included.txt" in result.output assert "single_file.txt" in result.output assert "Contents of single file" in result.output + + +def test_binary_file_warning(tmpdir): + runner = CliRunner(mix_stderr=False) + with tmpdir.as_cwd(): + os.makedirs("test_dir") + with open("test_dir/binary_file.bin", "wb") as f: + f.write(b"\xff") + with open("test_dir/text_file.txt", "w") as f: + f.write("This is a text file") + + result = runner.invoke(cli, ["test_dir"]) + assert result.exit_code == 0 + + stdout = result.stdout + stderr = result.stderr + + assert "test_dir/text_file.txt" in stdout + assert "This is a text file" in stdout + assert "\ntest_dir/binary_file.bin" not in stdout + assert ( + "Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError" + in stderr + )