Skip to content

Commit

Permalink
Fix filename too long when downloading to local folder
Browse files Browse the repository at this point in the history
  • Loading branch information
Wauplin committed Jan 27, 2025
1 parent a259e88 commit da821e5
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 6 deletions.
8 changes: 7 additions & 1 deletion src/huggingface_hub/_local_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
```
"""

import base64
import hashlib
import logging
import os
import time
Expand Down Expand Up @@ -84,7 +86,7 @@ class LocalDownloadFilePaths:

def incomplete_path(self, etag: str) -> Path:
"""Return the path where a file will be temporarily downloaded before being moved to `file_path`."""
return self.metadata_path.with_suffix(f".{etag}.incomplete")
return self.metadata_path.parent / f"{_short_hash(self.metadata_path.name)}.{etag}.incomplete"


@dataclass(frozen=True)
Expand Down Expand Up @@ -424,3 +426,7 @@ def _huggingface_dir(local_dir: Path) -> Path:
except OSError:
pass
return path


def _short_hash(filename: str) -> str:
return base64.urlsafe_b64encode(hashlib.sha1(filename.encode()).digest()).decode()
14 changes: 9 additions & 5 deletions tests/test_local_folder.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,17 @@ def test_local_download_paths(tmp_path: Path):
assert paths.metadata_path.parent.is_dir()
assert paths.lock_path.parent.is_dir()

# Incomplete path are etag-based
assert (
paths.incomplete_path("etag123")
== tmp_path / ".cache" / "huggingface" / "download" / "path" / "in" / "repo.txt.etag123.incomplete"
)
# Incomplete paths are etag-based
incomplete_path = paths.incomplete_path("etag123")
assert incomplete_path.parent == tmp_path / ".cache" / "huggingface" / "download" / "path" / "in"
assert incomplete_path.name.endswith(".etag123.incomplete")
assert paths.incomplete_path("etag123").parent.is_dir()

# Incomplete paths are unique per file per etag
other_paths = get_local_download_paths(tmp_path, "path/in/repo_other.txt")
other_incomplete_path = other_paths.incomplete_path("etag123")
assert incomplete_path != other_incomplete_path # different .incomplete files to prevent concurrency issues


def test_local_download_paths_are_recreated_each_time(tmp_path: Path):
paths1 = get_local_download_paths(tmp_path, "path/in/repo.txt")
Expand Down

0 comments on commit da821e5

Please sign in to comment.