Skip to content

Commit

Permalink
Fix git lfs prune is deleting staged files in the index
Browse files Browse the repository at this point in the history
Running git lfs prune right after git add will prune the indexed files from the LFS cache so that any commit will result in a rightfully rejected push. Even worse, doing multiple commits on changes of the same file will permanently delete LFS data that is not recoverable.

This change retains all files that are in the indices of all worktrees by diffing each index against the corresponding HEAD.
  • Loading branch information
jochenhz committed Feb 1, 2024
1 parent 2bf4567 commit 55a7f00
Show file tree
Hide file tree
Showing 12 changed files with 206 additions and 53 deletions.
2 changes: 1 addition & 1 deletion commands/command_fsck.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func doFsckObjects(include, exclude string, useIndex bool) []string {
}

if useIndex {
if err := gitscanner.ScanIndex("HEAD", nil); err != nil {
if err := gitscanner.ScanIndex("HEAD", "", nil); err != nil {
ExitWithError(err)
}
}
Expand Down
2 changes: 1 addition & 1 deletion commands/command_ls_files.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func lsFilesCommand(cmd *cobra.Command, args []string) {
//
// Do so to avoid showing "mixed" results, e.g., ls-files output
// from a specific historical revision, and the index.
if err := gitscanner.ScanIndex(ref, nil); err != nil {
if err := gitscanner.ScanIndex(ref, "", nil); err != nil {
Exit(tr.Tr.Get("Could not scan for Git LFS index: %s", err))
}
}
Expand Down
53 changes: 39 additions & 14 deletions commands/command_prune.go
Original file line number Diff line number Diff line change
Expand Up @@ -471,34 +471,39 @@ func pruneTaskGetRetainedUnpushed(gitscanner *lfs.GitScanner, fetchconf lfs.Fetc
func pruneTaskGetRetainedWorktree(gitscanner *lfs.GitScanner, fetchconf lfs.FetchPruneConfig, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) {
defer waitg.Done()

if fetchconf.PruneForce {
return
}

// Retain other worktree HEADs too
// Working copy, branch & maybe commit is different but repo is shared
allWorktreeRefs, err := git.GetAllWorkTreeHEADs(cfg.LocalGitStorageDir())
allWorktrees, err := git.GetAllWorkTrees(cfg.LocalGitStorageDir())
if err != nil {
errorChan <- err
return
}
// Don't repeat any commits, worktrees are always on their own branches but
// may point to the same commit
commits := tools.NewStringSet()
// current HEAD is done elsewhere
headref, err := git.CurrentRef()
if err != nil {
errorChan <- err
return

if !fetchconf.PruneForce {
// current HEAD is done elsewhere
headref, err := git.CurrentRef()
if err != nil {
errorChan <- err
return
}

commits.Add(headref.Sha)
}
commits.Add(headref.Sha)
for _, ref := range allWorktreeRefs {
if commits.Add(ref.Sha) {

for _, worktree := range allWorktrees {
if !fetchconf.PruneForce && commits.Add(worktree.Ref.Sha) {
// Worktree is on a different commit
waitg.Add(1)
// Don't need to 'cd' to worktree since we share same repo
go pruneTaskGetRetainedAtRef(gitscanner, ref.Sha, retainChan, errorChan, waitg, sem)
go pruneTaskGetRetainedAtRef(gitscanner, worktree.Ref.Sha, retainChan, errorChan, waitg, sem)
}

// Always scan the index of the worktree
waitg.Add(1)
go pruneTaskGetRetainedIndex(gitscanner, worktree.Ref.Sha, worktree.Dir, retainChan, errorChan, waitg, sem)
}
}

Expand All @@ -521,6 +526,26 @@ func pruneTaskGetRetainedStashed(gitscanner *lfs.GitScanner, retainChan chan str
}
}

// Background task, must call waitg.Done() once at end
func pruneTaskGetRetainedIndex(gitscanner *lfs.GitScanner, ref string, workingDir string, retainChan chan string, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) {
defer waitg.Done()

err := gitscanner.ScanIndex(ref, workingDir, func(p *lfs.WrappedPointer, err error) {
if err != nil {
errorChan <- err
} else {
retainChan <- p.Pointer.Oid
tracerx.Printf("RETAIN: %v index", p.Pointer.Oid)
}
})

if err != nil {
errorChan <- err
return
}
}


// Background task, must call waitg.Done() once at end
func pruneTaskGetReachableObjects(gitscanner *lfs.GitScanner, outObjectSet *tools.StringSet, errorChan chan error, waitg *sync.WaitGroup, sem *semaphore.Weighted) {
defer waitg.Done()
Expand Down
4 changes: 2 additions & 2 deletions commands/command_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,12 @@ func blobInfo(s *lfs.PointerScanner, blobSha, name string) (sha, from string, er
}

func scanIndex(ref string) (staged, unstaged []*lfs.DiffIndexEntry, err error) {
uncached, err := lfs.NewDiffIndexScanner(ref, false, true)
uncached, err := lfs.NewDiffIndexScanner(ref, false, true, "")
if err != nil {
return nil, nil, err
}

cached, err := lfs.NewDiffIndexScanner(ref, true, false)
cached, err := lfs.NewDiffIndexScanner(ref, true, false, "")
if err != nil {
return nil, nil, err
}
Expand Down
44 changes: 37 additions & 7 deletions git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func CatFile() (*subprocess.BufferedCmd, error) {
return gitNoLFSBuffered("cat-file", "--batch-check")
}

func DiffIndex(ref string, cached bool, refresh bool) (*bufio.Scanner, error) {
func DiffIndex(ref string, cached bool, refresh bool, workingDir string) (*bufio.Scanner, error) {
if refresh {
_, err := gitSimple("update-index", "-q", "--refresh")
if err != nil {
Expand All @@ -243,6 +243,10 @@ func DiffIndex(ref string, cached bool, refresh bool) (*bufio.Scanner, error) {
}
args = append(args, ref)

if workingDir != "" {
args = append([]string{"-C", workingDir}, args...)
}

cmd, err := gitBuffered(args...)
if err != nil {
return nil, err
Expand Down Expand Up @@ -856,18 +860,24 @@ func GitCommonDir() (string, error) {
return tools.CanonicalizePath(path, false)
}

// GetAllWorkTreeHEADs returns the refs that all worktrees are using as HEADs
// A git worktree (ref + path)
type Worktree struct {
Ref Ref
Dir string
}

// GetAllWorkTrees returns the refs that all worktrees are using as HEADs plus the worktree's path.
// This returns all worktrees plus the master working copy, and works even if
// working dir is actually in a worktree right now
// Pass in the git storage dir (parent of 'objects') to work from
func GetAllWorkTreeHEADs(storageDir string) ([]*Ref, error) {
func GetAllWorkTrees(storageDir string) ([]*Worktree, error) {
worktreesdir := filepath.Join(storageDir, "worktrees")
dirf, err := os.Open(worktreesdir)
if err != nil && !os.IsNotExist(err) {
return nil, err
}

var worktrees []*Ref
var worktrees []*Worktree
if err == nil {
// There are some worktrees
defer dirf.Close()
Expand All @@ -886,7 +896,16 @@ func GetAllWorkTreeHEADs(storageDir string) ([]*Ref, error) {
tracerx.Printf("Error reading %v for worktree, skipping: %v", headfile, err)
continue
}
worktrees = append(worktrees, ref)

// Read the gitdir file to get the location of the git repo
dirfile := filepath.Join(worktreesdir, dirfi.Name(), "gitdir")
dir, err := parseDirFile(dirfile)
if err != nil {
tracerx.Printf("Error reading %v for worktree, skipping: %v", dirfile, err)
continue
}

worktrees = append(worktrees, &Worktree{*ref, filepath.Dir(dir)})
}
}
}
Expand All @@ -898,11 +917,13 @@ func GetAllWorkTreeHEADs(storageDir string) ([]*Ref, error) {
headfile := filepath.Join(storageDir, "HEAD")
ref, err := parseRefFile(headfile)
if err == nil {
worktrees = append(worktrees, ref)
dir, err := RootDir()
if err == nil {
worktrees = append(worktrees, &Worktree{*ref, dir})
}
} else if !os.IsNotExist(err) { // ok if not exists, probably bare repo
tracerx.Printf("Error reading %v for main checkout, skipping: %v", headfile, err)
}

return worktrees, nil
}

Expand All @@ -919,6 +940,15 @@ func parseRefFile(filename string) (*Ref, error) {
return ResolveRef(contents)
}

func parseDirFile(filename string) (string, error) {
bytes, err := os.ReadFile(filename)
if err != nil {
return "", err
}
contents := strings.TrimSpace(string(bytes))
return contents, nil
}

// IsBare returns whether or not a repository is bare. It requires that the
// current working directory is a repository.
//
Expand Down
40 changes: 26 additions & 14 deletions git/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

. "github.com/git-lfs/git-lfs/v3/git"
test "github.com/git-lfs/git-lfs/v3/t/cmd/util"
"github.com/git-lfs/git-lfs/v3/tools"
"github.com/stretchr/testify/assert"
)

Expand Down Expand Up @@ -344,6 +345,8 @@ func TestWorkTrees(t *testing.T) {
repo.Cleanup()
}()

repoDir, _ := tools.CanonicalizePath(repo.Path, true)

// test commits; we'll just modify the same file each time since we're
// only interested in branches & dates
inputs := []*test.CommitInput{
Expand Down Expand Up @@ -383,29 +386,38 @@ func TestWorkTrees(t *testing.T) {
test.RunGitCommand(t, true, "worktree", "add", "branch2_wt", "branch2")
test.RunGitCommand(t, true, "worktree", "add", "branch4_wt", "branch4")

refs, err := GetAllWorkTreeHEADs(filepath.Join(repo.Path, ".git"))
worktrees, err := GetAllWorkTrees(filepath.Join(repo.Path, ".git"))
assert.Equal(t, nil, err)
expectedRefs := []*Ref{
expectedWorktrees := []*Worktree{
{
Name: "master",
Type: RefTypeLocalBranch,
Sha: outputs[0].Sha,
Ref: Ref{
Name: "master",
Type: RefTypeLocalBranch,
Sha: outputs[0].Sha,
},
Dir: repoDir,
},
{
Name: "branch2",
Type: RefTypeLocalBranch,
Sha: outputs[1].Sha,
Ref: Ref{
Name: "branch2",
Type: RefTypeLocalBranch,
Sha: outputs[1].Sha,
},
Dir: filepath.Join(repoDir, "branch2_wt"),
},
{
Name: "branch4",
Type: RefTypeLocalBranch,
Sha: outputs[3].Sha,
Ref: Ref{
Name: "branch4",
Type: RefTypeLocalBranch,
Sha: outputs[3].Sha,
},
Dir: filepath.Join(repoDir, "branch4_wt"),
},
}
// Need to sort for consistent comparison
sort.Sort(test.RefsByName(expectedRefs))
sort.Sort(test.RefsByName(refs))
assert.Equal(t, expectedRefs, refs, "Refs should be correct")
sort.Sort(test.WorktreesByName(expectedWorktrees))
sort.Sort(test.WorktreesByName(worktrees))
assert.Equal(t, expectedWorktrees, worktrees, "Worktrees should be correct")
}

func TestVersionCompare(t *testing.T) {
Expand Down
8 changes: 6 additions & 2 deletions lfs/diff_index_scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,15 @@ type DiffIndexScanner struct {
// operation would be undesirable due to the possibility of corruption. It can
// also be disabled where another operation will have refreshed the index.
//
// If "workingDir" is set, the DiffIndexScanner will be run in the given
// directory. Otherwise, the DiffIndexScanner will be run in the current
// working directory.
//
// If any error was encountered in starting the command or closing its `stdin`,
// that error will be returned immediately. Otherwise, a `*DiffIndexScanner`
// will be returned with a `nil` error.
func NewDiffIndexScanner(ref string, cached bool, refresh bool) (*DiffIndexScanner, error) {
scanner, err := git.DiffIndex(ref, cached, refresh)
func NewDiffIndexScanner(ref string, cached bool, refresh bool, workingDir string) (*DiffIndexScanner, error) {
scanner, err := git.DiffIndex(ref, cached, refresh, workingDir)
if err != nil {
return nil, err
}
Expand Down
4 changes: 2 additions & 2 deletions lfs/gitscanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,14 +266,14 @@ func (s *GitScanner) ScanPreviousVersions(ref string, since time.Time, cb GitSca
}

// ScanIndex scans the git index for modified LFS objects.
func (s *GitScanner) ScanIndex(ref string, cb GitScannerFoundPointer) error {
func (s *GitScanner) ScanIndex(ref string, workingDir string, cb GitScannerFoundPointer) error {
callback, err := firstGitScannerCallback(cb, s.foundPointer)
if err != nil {
return err
}

start := time.Now()
err = scanIndex(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
err = scanIndex(callback, ref, workingDir, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
tracerx.PerformanceSince("ScanIndex", start)

return err
Expand Down
10 changes: 5 additions & 5 deletions lfs/gitscanner_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@ import (
//
// Ref is the ref at which to scan, which may be "HEAD" if there is at least one
// commit.
func scanIndex(cb GitScannerFoundPointer, ref string, f *filepathfilter.Filter, gitEnv, osEnv config.Environment) error {
func scanIndex(cb GitScannerFoundPointer, ref string, workingDir string, f *filepathfilter.Filter, gitEnv, osEnv config.Environment) error {
indexMap := &indexFileMap{
nameMap: make(map[string][]*indexFile),
nameShaPairs: make(map[string]bool),
mutex: &sync.Mutex{},
}

revs, err := revListIndex(ref, false, indexMap)
revs, err := revListIndex(ref, false, indexMap, workingDir)
if err != nil {
return err
}

cachedRevs, err := revListIndex(ref, true, indexMap)
cachedRevs, err := revListIndex(ref, true, indexMap, workingDir)
if err != nil {
return err
}
Expand Down Expand Up @@ -109,8 +109,8 @@ func scanIndex(cb GitScannerFoundPointer, ref string, f *filepathfilter.Filter,
// revListIndex uses git diff-index to return the list of object sha1s
// for in the indexf. It returns a channel from which sha1 strings can be read.
// The namMap will be filled indexFile pointers mapping sha1s to indexFiles.
func revListIndex(atRef string, cache bool, indexMap *indexFileMap) (*StringChannelWrapper, error) {
scanner, err := NewDiffIndexScanner(atRef, cache, false)
func revListIndex(atRef string, cache bool, indexMap *indexFileMap, workingDir string) (*StringChannelWrapper, error) {
scanner, err := NewDiffIndexScanner(atRef, cache, false, workingDir)
if err != nil {
return nil, err
}
Expand Down
7 changes: 7 additions & 0 deletions t/cmd/util/testutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,13 @@ func (a RefsByName) Len() int { return len(a) }
func (a RefsByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a RefsByName) Less(i, j int) bool { return a[i].Name < a[j].Name }

// WorktreesByName implements sort.Interface for []*git.Worktree based on dir
type WorktreesByName []*git.Worktree

func (a WorktreesByName) Len() int { return len(a) }
func (a WorktreesByName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a WorktreesByName) Less(i, j int) bool { return a[i].Dir < a[j].Dir }

// WrappedPointersByOid implements sort.Interface for []*lfs.WrappedPointer based on oid
type WrappedPointersByOid []*lfs.WrappedPointer

Expand Down
Loading

0 comments on commit 55a7f00

Please sign in to comment.