Skip to content

Commit

Permalink
Add --verify-unreachable option to LFS prune
Browse files Browse the repository at this point in the history
LFS prune offers --verify-remote, a command that exits whenever a reachable object is not on the remote. Hence, it still prunes non-reachable objects, e.g. referenced by orphan commits.
This commit adds a new option --verify-unreachable that additionally verifies those unreachable objects, so there is a guarantee that data is never lost.
In addition to that, a new option --when-unverified=<halt,continue> can change the default behaviour to not exit when a file is unverified but continue to prune the verified objects.
  • Loading branch information
jochenhz committed Feb 8, 2024
1 parent df0eab4 commit 2be5d26
Show file tree
Hide file tree
Showing 11 changed files with 255 additions and 48 deletions.
6 changes: 4 additions & 2 deletions commands/command_fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,10 @@ func fetchCommand(cmd *cobra.Command, args []string) {

if fetchPruneArg {
verify := fetchPruneCfg.PruneVerifyRemoteAlways
// no dry-run or verbose options in fetch, assume false
prune(fetchPruneCfg, verify, false, false)
verifyUnreachable := fetchPruneCfg.PruneVerifyUnreachableAlways

// assume false for non available options in fetch
prune(fetchPruneCfg, verify, verifyUnreachable, false, false, false)
}

if !success {
Expand Down
2 changes: 1 addition & 1 deletion commands/command_migrate_export.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func migrateExportCommand(cmd *cobra.Command, args []string) {
fetchPruneCfg.FetchRecentRefsDays = 0

// Prune our cache
prune(fetchPruneCfg, false, false, true)
prune(fetchPruneCfg, false, false, false, false, true)
}

func performForceCheckout(l *tasklog.Logger) error {
Expand Down
104 changes: 71 additions & 33 deletions commands/command_prune.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,15 @@ import (
)

var (
pruneDryRunArg bool
pruneVerboseArg bool
pruneVerifyArg bool
pruneRecentArg bool
pruneForceArg bool
pruneDoNotVerifyArg bool
pruneDryRunArg bool
pruneVerboseArg bool
pruneVerifyArg bool
pruneRecentArg bool
pruneForceArg bool
pruneDoNotVerifyArg bool
pruneVerifyUnreachableArg bool
pruneDoNotVerifyUnreachableArg bool
pruneWhenUnverifiedArg string
)

func pruneCommand(cmd *cobra.Command, args []string) {
Expand All @@ -42,17 +45,30 @@ func pruneCommand(cmd *cobra.Command, args []string) {
fetchPruneConfig := lfs.NewFetchPruneConfig(cfg.Git)
verify := !pruneDoNotVerifyArg &&
(fetchPruneConfig.PruneVerifyRemoteAlways || pruneVerifyArg)
verifyUnreachable := !pruneDoNotVerifyUnreachableArg && (pruneVerifyUnreachableArg || fetchPruneConfig.PruneVerifyUnreachableAlways)

continueWhenUnverified := false
switch pruneWhenUnverifiedArg {
case "halt":
continueWhenUnverified = false
case "continue":
continueWhenUnverified = true
default:
Exit(tr.Tr.Get("Invalid value for --when-unverified: %s", pruneWhenUnverifiedArg))
}

fetchPruneConfig.PruneRecent = pruneRecentArg || pruneForceArg
fetchPruneConfig.PruneForce = pruneForceArg
prune(fetchPruneConfig, verify, pruneDryRunArg, pruneVerboseArg)
prune(fetchPruneConfig, verify, verifyUnreachable, continueWhenUnverified, pruneDryRunArg, pruneVerboseArg)
}

type PruneProgressType int

const (
PruneProgressTypeLocal = PruneProgressType(iota)
PruneProgressTypeRetain = PruneProgressType(iota)
PruneProgressTypeVerify = PruneProgressType(iota)
PruneProgressTypeLocal = PruneProgressType(iota)
PruneProgressTypeRetain = PruneProgressType(iota)
PruneProgressTypeVerify = PruneProgressType(iota)
PruneProgressTypeUnverified = PruneProgressType(iota)
)

// Progress from a sub-task of prune
Expand All @@ -62,7 +78,7 @@ type PruneProgress struct {
}
type PruneProgressChan chan PruneProgress

func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose bool) {
func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, verifyUnreachable, continueWhenUnverified, dryRun, verbose bool) {
localObjects := make([]fs.Object, 0, 100)
retainedObjects := tools.NewStringSetWithCapacity(100)

Expand All @@ -78,7 +94,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
// one completes really fast & hits 0 unexpectedly
// each main process can Add() to the wg itself if it subdivides the task
taskwait.Add(5) // 1..5: localObjects, current & recent refs, unpushed, worktree, stashes
if verifyRemote {
if verifyRemote && !verifyUnreachable {
taskwait.Add(1) // 6
}

Expand Down Expand Up @@ -106,7 +122,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
go pruneTaskGetRetainedUnpushed(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem)
go pruneTaskGetRetainedWorktree(gitscanner, fetchPruneConfig, retainChan, errorChan, &taskwait, sem)
go pruneTaskGetRetainedStashed(gitscanner, retainChan, errorChan, &taskwait, sem)
if verifyRemote {
if verifyRemote && !verifyUnreachable {
reachableObjects = tools.NewStringSetWithCapacity(100)
go pruneTaskGetReachableObjects(gitscanner, &reachableObjects, errorChan, &taskwait, sem)
}
Expand Down Expand Up @@ -172,8 +188,6 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
}

if verifyRemote {
tracerx.Printf("VERIFYING: %v", file.Oid)

verifyQueue.Add(downloadTransfer(&lfs.WrappedPointer{
Pointer: lfs.NewPointer(file.Oid, file.Size, nil),
}))
Expand All @@ -184,9 +198,20 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
if verifyRemote {
verifyQueue.Wait()
verifywait.Wait()

var problems bytes.Buffer
prunableObjectsLen := len(prunableObjects)
prunableObjects, problems = pruneGetVerifiedPrunableObjects(prunableObjects, reachableObjects, verifiedObjects, verifyUnreachable)
if prunableObjectsLen != len(prunableObjects) {
progressChan <- PruneProgress{PruneProgressTypeUnverified, prunableObjectsLen - len(prunableObjects)}
}

close(progressChan) // after verify but before check
progresswait.Wait()
pruneCheckVerified(prunableObjects, reachableObjects, verifiedObjects)

if !continueWhenUnverified && problems.Len() > 0 {
Exit("%s\n%v", tr.Tr.Get("These objects to be pruned are missing on remote:"), problems.String())
}
} else {
close(progressChan)
progresswait.Wait()
Expand Down Expand Up @@ -224,27 +249,31 @@ func logVerboseOutput(logger *tasklog.Logger, verboseOutput []string, numPrunabl
}
}

func pruneCheckVerified(prunableObjects []string, reachableObjects, verifiedObjects tools.StringSet) {
// There's no issue if an object is not reachable and missing, only if reachable & missing
var problems bytes.Buffer
func pruneGetVerifiedPrunableObjects(prunableObjects []string, reachableObjects, verifiedObjects tools.StringSet, verifyUnreachable bool) ([]string, bytes.Buffer) {
verifiedPrunableObjects := make([]string, 0, len(verifiedObjects))
var unverified bytes.Buffer

for _, oid := range prunableObjects {
// Test verified first as most likely reachable
if !verifiedObjects.Contains(oid) {
if reachableObjects.Contains(oid) {
problems.WriteString(fmt.Sprintf(" * %v\n", oid))
if verifiedObjects.Contains(oid) {
verifiedPrunableObjects = append(verifiedPrunableObjects, oid)
} else {
if verifyUnreachable {
tracerx.Printf("UNVERIFIED: %v", oid)
unverified.WriteString(fmt.Sprintf(" * %v\n", oid))
} else {
// Just to indicate why it doesn't matter that we didn't verify
tracerx.Printf("UNREACHABLE: %v", oid)
// There's no issue if an object is not reachable and missing, only if reachable & missing
if reachableObjects.Contains(oid) {
unverified.WriteString(fmt.Sprintf(" * %v\n", oid))
} else {
// Just to indicate why it doesn't matter that we didn't verify
tracerx.Printf("UNREACHABLE: %v", oid)
verifiedPrunableObjects = append(verifiedPrunableObjects, oid)
}
}
}
}
// technically we could still prune the other oids, but this indicates a
// more serious issue because the local state implies that these can be
// deleted but that's incorrect; bad state has occurred somehow, might need
// push --all to resolve
if problems.Len() > 0 {
Exit("%s\n%v", tr.Tr.Get("These objects to be pruned are missing on remote:"), problems.String())
}

return verifiedPrunableObjects, unverified
}

func pruneCheckErrors(taskErrors []error) {
Expand All @@ -265,6 +294,7 @@ func pruneTaskDisplayProgress(progressChan PruneProgressChan, waitg *sync.WaitGr
localCount := 0
retainCount := 0
verifyCount := 0
notRemoteCount := 0
var msg string
for p := range progressChan {
switch p.ProgressType {
Expand All @@ -274,13 +304,18 @@ func pruneTaskDisplayProgress(progressChan PruneProgressChan, waitg *sync.WaitGr
retainCount++
case PruneProgressTypeVerify:
verifyCount++
case PruneProgressTypeUnverified:
notRemoteCount += p.Count
}
msg = fmt.Sprintf("prune: %s, %s",
tr.Tr.GetN("%d local object", "%d local objects", localCount, localCount),
tr.Tr.GetN("%d retained", "%d retained", retainCount, retainCount))
if verifyCount > 0 {
msg += tr.Tr.GetN(", %d verified with remote", ", %d verified with remote", verifyCount, verifyCount)
}
if notRemoteCount > 0 {
msg += tr.Tr.GetN(", %d not on remote", ", %d not on remote", notRemoteCount, notRemoteCount)
}
task.Log(msg)
}
}
Expand Down Expand Up @@ -571,7 +606,10 @@ func init() {
cmd.Flags().BoolVarP(&pruneVerboseArg, "verbose", "v", false, "Print full details of what is/would be deleted")
cmd.Flags().BoolVarP(&pruneRecentArg, "recent", "", false, "Prune even recent objects")
cmd.Flags().BoolVarP(&pruneForceArg, "force", "f", false, "Prune everything that has been pushed")
cmd.Flags().BoolVarP(&pruneVerifyArg, "verify-remote", "c", false, "Verify that remote has LFS files before deleting")
cmd.Flags().BoolVarP(&pruneVerifyArg, "verify-remote", "c", false, "Verify that remote has reachable LFS files before deleting")
cmd.Flags().BoolVar(&pruneDoNotVerifyArg, "no-verify-remote", false, "Override lfs.pruneverifyremotealways and don't verify")
cmd.Flags().BoolVar(&pruneVerifyUnreachableArg, "verify-unreachable", false, "When using --verify-remote, additionally verify unreachable LFS files before deleting.")
cmd.Flags().BoolVar(&pruneDoNotVerifyUnreachableArg, "no-verify-unreachable", false, "Override lfs.pruneverifyunreachablealways and don't verify unreachable objects")
cmd.Flags().StringVar(&pruneWhenUnverifiedArg, "when-unverified", "halt", "halt|continue the execution when objects are not found on the remote")
})
}
3 changes: 3 additions & 0 deletions docs/man/git-lfs-config.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,9 @@ called if --verify-remote is enabled.
* `lfs.pruneverifyremotealways`
+
Always run `git lfs prune` as if `--verify-remote` was provided.
* `lfs.pruneverifyunreachablealways`
+
Always run `git lfs prune` as if `--verify-unreachable` was provided.

=== Extensions

Expand Down
27 changes: 24 additions & 3 deletions docs/man/git-lfs-prune.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,22 @@ gitignore(5).
configuration options specified below in <<_recent_files>>.
`--verify-remote`::
`-c`::
Contact the remote and check that copies of the files we would delete
Contact the remote and check that copies of reachable files we would delete
definitely exist before deleting. See <<_verify_remote>>.
`--no-verify-remote`::
Disables remote verification if lfs.pruneverifyremotealways was enabled in
settings. See <<_verify_remote>>.
`--verify-reachable`::
When doing `--verify-remote` contact the remote and check unreachable
objects as well. See <<_verify_remote>>.
`--no-verify-reachable`::
Disables remote verification of unreachable files if
lfs.pruneverifyunreachablealways was enabled in settings. See
<<_verify_remote>>.
`--when-unverified=<halt,continue>`::
When `--verify-remote` cannot verify an object on the remote, either halt
the execution or continue the deletion of verified objects. See
<<_verify_remote>>.
`--verbose`::
`-v`::
Report the full detail of what is/would be deleted.
Expand Down Expand Up @@ -105,8 +116,8 @@ pruning purposes.

== VERIFY REMOTE

The `--verify-remote` option calls the remote to ensure that any LFS
files to be deleted have copies on the remote before actually deleting
The `--verify-remote` option calls the remote to ensure that any reachable
LFS files to be deleted have copies on the remote before actually deleting
them.

Usually the check performed by <<_unpushed_lfs_files>> is enough to
Expand All @@ -125,6 +136,16 @@ referenced only by orphaned commits), and files which are still
referenced, but by commits which are prunable. This makes the prune
process take longer.

If you want to verify unreachable objects as well, set the
`--verify-unreachable` option.

You can check for unreachable objects by default by setting
`lfs.pruneverifyunreachablealways` to true.

By default, `--verify-remote` halts execution if a file cannot be
verified. Set `--when-unverified=continue` to not halt exceution but
continue deleting all objects that can be verified.

== DEFAULT REMOTE

When identifying <<_unpushed_lfs_files>> and performing <<_verify_remote>>, a
Expand Down
5 changes: 4 additions & 1 deletion lfs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ type FetchPruneConfig struct {
// Number of days added to FetchRecent*; data outside combined window will be
// deleted when prune is run. (default 3)
PruneOffsetDays int
// Always verify with remote before pruning
// Always verify with remote before pruning reachable objects
PruneVerifyRemoteAlways bool
// When verifiying, always verify all reachable and unreachable objects with remote (default false)
PruneVerifyUnreachableAlways bool
// Name of remote to check for unpushed and verify checks
PruneRemoteName string
// Whether to ignore all recent options.
Expand All @@ -40,6 +42,7 @@ func NewFetchPruneConfig(git config.Environment) FetchPruneConfig {
FetchRecentAlways: git.Bool("lfs.fetchrecentalways", false),
PruneOffsetDays: git.Int("lfs.pruneoffsetdays", 3),
PruneVerifyRemoteAlways: git.Bool("lfs.pruneverifyremotealways", false),
PruneVerifyUnreachableAlways: git.Bool("lfs.pruneverifyunreachablealways", false),
PruneRemoteName: pruneRemote,
PruneRecent: false,
PruneForce: false,
Expand Down
15 changes: 9 additions & 6 deletions lfs/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,19 @@ func TestFetchPruneConfigDefault(t *testing.T) {
assert.Equal(t, 3, fp.PruneOffsetDays)
assert.Equal(t, "origin", fp.PruneRemoteName)
assert.False(t, fp.PruneVerifyRemoteAlways)
assert.False(t, fp.PruneVerifyUnreachableAlways)
}

func TestFetchPruneConfigCustom(t *testing.T) {
cfg := config.NewFrom(config.Values{
Git: map[string][]string{
"lfs.fetchrecentrefsdays": []string{"12"},
"lfs.fetchrecentremoterefs": []string{"false"},
"lfs.fetchrecentcommitsdays": []string{"9"},
"lfs.pruneoffsetdays": []string{"30"},
"lfs.pruneverifyremotealways": []string{"true"},
"lfs.pruneremotetocheck": []string{"upstream"},
"lfs.fetchrecentrefsdays": []string{"12"},
"lfs.fetchrecentremoterefs": []string{"false"},
"lfs.fetchrecentcommitsdays": []string{"9"},
"lfs.pruneoffsetdays": []string{"30"},
"lfs.pruneverifyremotealways": []string{"true"},
"lfs.pruneverifyunreachablealways": []string{"true"},
"lfs.pruneremotetocheck": []string{"upstream"},
},
})
fp := NewFetchPruneConfig(cfg.Git)
Expand All @@ -39,4 +41,5 @@ func TestFetchPruneConfigCustom(t *testing.T) {
assert.Equal(t, 30, fp.PruneOffsetDays)
assert.Equal(t, "upstream", fp.PruneRemoteName)
assert.True(t, fp.PruneVerifyRemoteAlways)
assert.True(t, fp.PruneVerifyUnreachableAlways)
}
1 change: 1 addition & 0 deletions lfs/lfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func Environ(cfg *config.Configuration, manifest tq.Manifest, envOverrides map[s
fmt.Sprintf("FetchRecentRefsIncludeRemotes=%v", fetchPruneConfig.FetchRecentRefsIncludeRemotes),
fmt.Sprintf("PruneOffsetDays=%d", fetchPruneConfig.PruneOffsetDays),
fmt.Sprintf("PruneVerifyRemoteAlways=%v", fetchPruneConfig.PruneVerifyRemoteAlways),
fmt.Sprintf("PruneVerifyUnreachableAlways=%v", fetchPruneConfig.PruneVerifyUnreachableAlways),
fmt.Sprintf("PruneRemoteName=%s", fetchPruneConfig.PruneRemoteName),
fmt.Sprintf("LfsStorageDir=%s", cfg.LFSStorageDir()),
fmt.Sprintf("AccessDownload=%s", download.Mode()),
Expand Down
Loading

0 comments on commit 2be5d26

Please sign in to comment.