Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Interpret all rpaths as resource names in transactions #318

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ on:
branches:
- main

permissions: {}

jobs:
lint:
name: Run code checks and formatting hooks
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ on:
types:
- published

permissions: {}

jobs:
build:
name: Build source distribution and wheel
Expand Down
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ repos:
- id: end-of-file-fixer
- id: mixed-line-ending
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.14.1
rev: v1.15.0
hooks:
# See https://github.com/pre-commit/mirrors-mypy/blob/main/.pre-commit-hooks.yaml
- id: mypy
types_or: [python, pyi]
args: [--ignore-missing-imports, --scripts-are-modules]
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.1
rev: v0.9.9
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand All @@ -31,12 +31,12 @@ repos:
# - id: pydoclint
# args: [--check-class-attributes=False]
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.5.20
rev: 0.6.3
hooks:
- id: uv-lock
name: Lock project dependencies
- repo: https://github.com/woodruffw/zizmor-pre-commit
rev: v1.0.1
rev: v1.4.1
hooks:
- id: zizmor
args: [--min-severity=medium]
13 changes: 10 additions & 3 deletions docs/guides/transactions.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ from lakefs_spec import LakeFSFileSystem
fs = LakeFSFileSystem()

with fs.transaction("repo", "main") as tx:
fs.put_file("train-data.txt", f"repo/{tx.branch.id}/train-data.txt")
fs.put_file("train-data.txt", "train-data.txt")
tx.commit(message="Add training data")
fs.put_file("test-data.txt", f"repo/{tx.branch.id}/test-data.txt")
fs.put_file("test-data.txt", "test-data.txt")
sha = tx.commit(message="Add test data")
tx.tag(sha, name="My train-test split")
```
Expand All @@ -35,6 +35,13 @@ The full list of supported lakeFS versioning operations (by default, these opera
* [`rev_parse`](../reference/lakefs_spec/transaction.md#lakefs_spec.transaction.LakeFSTransaction.rev_parse), for parsing revisions like branch/tag names and SHA fragments into full commit SHAs.
* [`tag`](../reference/lakefs_spec/transaction.md#lakefs_spec.transaction.LakeFSTransaction.tag), for creating a tag pointing to a commit.

## Limitations of transactions

Transactions are scoped to a single repository and branch only, equal to those given to the `fs.transaction()` context manager.
When uploading files in a transaction via `fs.put()` or `fs.put_file()`, you **must** give all remote paths as file names.
If you use a fully qualified URI, leading repository and branch names will be interpreted as subdirectories, which will be created on upload.
No warnings or errors will be thrown, so be sure to double-check your paths in all transaction scopes.

## Lifecycle of ephemeral transaction branches

You can control the lifecycle for a transaction branch with the `delete` argument:
Expand All @@ -56,7 +63,7 @@ from lakefs_spec import LakeFSFileSystem
fs = LakeFSFileSystem()

with fs.transaction("repo", "main", delete="onsuccess") as tx:
fs.put_file("my-file.txt", f"repo/{tx.branch.id}/my-file.txt")
fs.put_file("my-file.txt", "my-file.txt")
tx.commit(message="Add my-file.txt")
raise ValueError("oops!")
```
Expand Down
3 changes: 3 additions & 0 deletions src/lakefs_spec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,9 @@ def put_file(
lpath = stringify_path(lpath)
rpath = stringify_path(rpath)

if self._intrans:
rpath = self.transaction.make_uri(rpath)

if precheck and Path(lpath).is_file():
remote_checksum = self.checksum(rpath)
local_checksum = md5_checksum(lpath, blocksize=self.blocksize)
Expand Down
10 changes: 10 additions & 0 deletions src/lakefs_spec/transaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import logging
import os
import random
import string
import warnings
Expand Down Expand Up @@ -145,6 +146,15 @@ def __exit__(self, exc_type, exc_val, exc_tb):
if self.delete == "always" or (success and self.delete == "onsuccess"):
self._ephemeral_branch.delete()

def make_uri(self, path: str | os.PathLike[str]) -> str:
spath = str(path)
# NB: this fails silently if the input path is already fully qualified.
# However, in general, it's impossible to distinguish between a
# fully qualified path and a normal nested path, so at most, we
# could split off the first segment of the input and check it against existing
# repositories.
return "/".join([self.repository, self.branch.id, spath])

@property
def branch(self):
return self._ephemeral_branch
Expand Down
4 changes: 2 additions & 2 deletions tests/test_put_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_no_change_postcommit(
message = f"Add file {random_file.name}"

with fs.transaction(repository, temp_branch) as tx:
fs.put(lpath, f"{repository.id}/{tx.branch.id}/{random_file.name}")
fs.put(lpath, random_file.name)
tx.commit(message=message)

commits = list(temp_branch.log(max_amount=2))
Expand All @@ -31,7 +31,7 @@ def test_no_change_postcommit(

# put the same file again, this time the diff is empty
with fs.transaction(repository, temp_branch) as tx:
fs.put(lpath, f"{repository.id}/{tx.branch.id}/{random_file.name}", precheck=False)
fs.put(lpath, random_file.name, precheck=False)
tx.commit(message=f"Add file {random_file.name}")

# check that no other commit has happened.
Expand Down
15 changes: 7 additions & 8 deletions tests/test_transactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,11 @@ def test_transaction_commit(
random_file = random_file_factory.make()

lpath = str(random_file)
rpath = f"{repository.id}/{temp_branch.id}/{random_file.name}"

message = f"Add file {random_file.name}"

with fs.transaction(repository, temp_branch) as tx:
fs.put_file(lpath, f"{repository.id}/{tx.branch.id}/{random_file.name}")
fs.put_file(lpath, random_file.name)
assert len(tx.files) == 1
# sha is a placeholder for the actual SHA created on transaction completion.
sha = tx.commit(message=message)
Expand Down Expand Up @@ -65,7 +64,7 @@ def test_transaction_merge(
tbname = tx.branch.id
lpath = str(random_file)
# stage a file on the transaction branch...
fs.put_file(lpath, f"{repository.id}/{tx.branch.id}/{random_file.name}")
fs.put_file(lpath, random_file.name)
# ... commit it with the above message
tx.commit(message=message)
# ... and merge it into temp_branch.
Expand All @@ -90,7 +89,7 @@ def test_transaction_revert(
message = f"Add file {random_file.name}"

with fs.transaction(repository, temp_branch, automerge=True) as tx:
fs.put_file(lpath, f"{repository.id}/{tx.branch.id}/{random_file.name}")
fs.put_file(lpath, random_file.name)
tx.commit(message=message)
revert_commit = tx.revert(temp_branch, temp_branch.head)

Expand All @@ -113,7 +112,7 @@ def test_transaction_failure(

try:
with fs.transaction(repository, temp_branch) as tx:
fs.put_file(lpath, f"{repository.id}/{tx.branch.id}/{random_file.name}")
fs.put_file(lpath, random_file.name)
tx.commit(message=message)
raise RuntimeError("something went wrong")
except RuntimeError:
Expand Down Expand Up @@ -159,8 +158,8 @@ def test_warn_uncommitted_changes(
lpath = str(random_file)

with pytest.warns(match="uncommitted changes.*lost"):
with fs.transaction(repository, temp_branch) as tx:
fs.put_file(lpath, f"{repository.id}/{tx.branch.id}/{random_file.name}")
with fs.transaction(repository, temp_branch):
fs.put_file(lpath, random_file.name)


def test_warn_uncommitted_changes_on_persisted_branch(
Expand All @@ -175,4 +174,4 @@ def test_warn_uncommitted_changes_on_persisted_branch(

with pytest.warns(match="uncommitted changes(?:(?!lost).)*$"):
with fs.transaction(repository, temp_branch, delete="never") as tx:
fs.put_file(lpath, f"{repository.id}/{tx.branch.id}/{random_file.name}")
fs.put_file(lpath, random_file.name)
Loading
Loading