Skip to content

Commit

Permalink
commands,lfs: drop GitScanner Close() method
Browse files Browse the repository at this point in the history
The GitScanner structure and its methods were introduced in PR git-lfs#1670,
and in commit bdbca39 of that PR
the structure's Close() method was introduced.  Unlike other similar
structures whose Close() methods should be called to release underlying
resources such as channels or I/O streams, the (*GitScanner).Close()
method serves only to output an optional performance timing trace metric.

This Close() method is not called consistently; for instance, it is never
called by the migrateExportCommand() function of the "git lfs migrate"
command, and will be skipped by the checkoutCommand() function of the
"git lfs checkout" command if an error is returned by the
(*GitScanner).ScanTree() method.

The utility of the performance timing metric is also undercut by the
fact that some commands perform other tasks before and after calling
the specific (*GitScanner).Scan*() method they invoke.  And in the
particular case of the "git lfs prune" command, multiple goroutines
are started, each of which runs a different Scan*() method simultaneously
with the others, so the final timing metric does not account for
their different execution times, just the overall final timing.

We can improve the value of the timing metric while also simplifying
the calling convention for the GitScanner structure's methods by
removing the Close() method, and tracing the performance of each
Scan*() method individually.

Removing the Close() method clarifies that no underlying resources
must be released for the GitScanner structure, and so callers need
not try to register a deferred call to the method.  This parallels
some other conventional Go structures, such as the Scanner structure
of the "bufio" package.

As well, running a "git lfs prune" command with the GIT_TRACE_PERFORMANCE=1
environment variable set now results in more detailed and useful output,
for example:

  12:36:51.221526 performance ScanStashed: 0.013632533 s
  12:36:51.224494 performance ScanUnpushed: 0.016570280 s
  12:36:51.240670 performance ScanTree: 0.017171717 s
  • Loading branch information
chrisd8088 committed Jun 7, 2023
1 parent 2dd8934 commit a0986c7
Show file tree
Hide file tree
Showing 11 changed files with 73 additions and 45 deletions.
1 change: 0 additions & 1 deletion commands/command_checkout.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ func checkoutCommand(cmd *cobra.Command, args []string) {
if err := chgitscanner.ScanTree(ref.Sha, nil); err != nil {
ExitWithError(err)
}
chgitscanner.Close()

meter.Start()
for _, p := range pointers {
Expand Down
1 change: 0 additions & 1 deletion commands/command_dedup.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ func dedupCommand(cmd *cobra.Command, args []string) {
atomic.AddInt64(&dedupStats.totalProcessedSize, p.Size)
}
})
defer gitScanner.Close()

if err := gitScanner.ScanTree("HEAD", nil); err != nil {
ExitWithError(err)
Expand Down
5 changes: 0 additions & 5 deletions commands/command_fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ func pointersToFetchForRef(ref string, filter *filepathfilter.Filter) ([]*lfs.Wr
return nil, err
}

tempgitscanner.Close()
return pointers, multiErr
}

Expand Down Expand Up @@ -180,7 +179,6 @@ func pointersToFetchForRefs(refs []string) ([]*lfs.WrappedPointer, error) {
return nil, err
}

tempgitscanner.Close()
return pointers, multiErr
}

Expand Down Expand Up @@ -212,7 +210,6 @@ func fetchPreviousVersions(ref string, since time.Time, filter *filepathfilter.F
ExitWithError(err)
}

tempgitscanner.Close()
return fetchAndReportToChan(pointers, filter, nil)
}

Expand Down Expand Up @@ -317,8 +314,6 @@ func scanAll() []*lfs.WrappedPointer {
Panic(err, tr.Tr.Get("Could not scan for Git LFS files"))
}

tempgitscanner.Close()

if multiErr != nil {
Panic(multiErr, tr.Tr.Get("Could not scan for Git LFS files"))
}
Expand Down
2 changes: 0 additions & 2 deletions commands/command_fsck.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ func doFsckObjects(include, exclude string, useIndex bool) []string {
}
}

gitscanner.Close()
return corruptOids
}

Expand Down Expand Up @@ -209,7 +208,6 @@ func doFsckPointers(include, exclude string) []corruptPointer {
}
}

gitscanner.Close()
return corruptPointers
}

Expand Down
1 change: 0 additions & 1 deletion commands/command_ls_files.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ func lsFilesCommand(cmd *cobra.Command, args []string) {

seen[p.Name] = struct{}{}
})
defer gitscanner.Close()

includeArg, excludeArg := getIncludeExcludeArgs(cmd)
gitscanner.Filter = buildFilepathFilter(cfg, includeArg, excludeArg, false)
Expand Down
3 changes: 1 addition & 2 deletions commands/command_prune.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,7 @@ func prune(fetchPruneConfig lfs.FetchPruneConfig, verifyRemote, dryRun, verbose
progresswait.Add(1)
go pruneTaskDisplayProgress(progressChan, &progresswait, logger)

taskwait.Wait() // wait for subtasks
gitscanner.Close()
taskwait.Wait() // wait for subtasks
close(retainChan) // triggers retain collector to end now all tasks have
retainwait.Wait() // make sure all retained objects added

Expand Down
1 change: 0 additions & 1 deletion commands/command_pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ func pull(filter *filepathfilter.Filter) {
}

meter.Start()
gitscanner.Close()
q.Wait()
wg.Wait()
tracerx.PerformanceSince("process queue", processQueue)
Expand Down
1 change: 0 additions & 1 deletion commands/command_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,6 @@ func statusScanRefRange(ref *git.Ref) {

Print("\t%s (%s)", p.Name, p.Oid)
})
defer gitscanner.Close()

Print("%s\n", tr.Tr.Get("Objects to be pushed to %s:", remoteRef.Name))
if err := gitscanner.ScanRefRange(ref.Sha, remoteRef.Sha, nil); err != nil {
Expand Down
5 changes: 1 addition & 4 deletions commands/uploader.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ import (

func uploadForRefUpdates(ctx *uploadContext, updates []*git.RefUpdate, pushAll bool) error {
gitscanner := ctx.buildGitScanner()
defer func() {
gitscanner.Close()
ctx.ReportErrors()
}()
defer ctx.ReportErrors()

verifyLocksForUpdates(ctx.lockVerifier, updates)
exclude := make([]string, 0, len(updates))
Expand Down
97 changes: 71 additions & 26 deletions lfs/gitscanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@ type GitScanner struct {
remote string
skippedRefs []string

closed bool
started time.Time
cfg *config.Configuration
}

Expand All @@ -43,18 +41,7 @@ type GitScannerSet interface {
// NewGitScanner initializes a *GitScanner for a Git repository in the current
// working directory.
func NewGitScanner(cfg *config.Configuration, cb GitScannerFoundPointer) *GitScanner {
return &GitScanner{started: time.Now(), FoundPointer: cb, cfg: cfg}
}

// Close stops exits once all processing has stopped, and all resources are
// tracked and cleaned up.
func (s *GitScanner) Close() {
if s.closed {
return
}

s.closed = true
tracerx.PerformanceSince("scan", s.started)
return &GitScanner{FoundPointer: cb, cfg: cfg}
}

// RemoteForPush sets up this *GitScanner to scan for objects to push to the
Expand All @@ -81,7 +68,11 @@ func (s *GitScanner) ScanMultiRangeToRemote(include string, exclude []string, cb
return errors.New(tr.Tr.Get("unable to scan starting at %q: no remote set", include))
}

return scanRefsToChanSingleIncludeMultiExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), s.opts(ScanRangeToRemoteMode))
start := time.Now()
err = scanRefsToChanSingleIncludeMultiExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), s.opts(ScanRangeToRemoteMode))
tracerx.PerformanceSince("ScanMultiRangeToRemote", start)

return err
}

// ScanRefs scans through all unique objects reachable from the "include" refs
Expand All @@ -95,7 +86,12 @@ func (s *GitScanner) ScanRefs(include, exclude []string, cb GitScannerFoundPoint

opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false
return scanRefsToChan(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)

start := time.Now()
err = scanRefsToChan(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefs", start)

return err
}

// ScanRefRange scans through all unique objects reachable from the "include"
Expand All @@ -109,7 +105,12 @@ func (s *GitScanner) ScanRefRange(include, exclude string, cb GitScannerFoundPoi

opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false
return scanRefsToChanSingleIncludeExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)

start := time.Now()
err = scanRefsToChanSingleIncludeExclude(s, callback, include, exclude, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefRange", start)

return err
}

// ScanRefRangeByTree scans through all objects reachable from the "include"
Expand All @@ -125,7 +126,12 @@ func (s *GitScanner) ScanRefRangeByTree(include, exclude string, cb GitScannerFo
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = false
opts.CommitsOnly = true
return scanRefsByTree(s, callback, []string{include}, []string{exclude}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)

start := time.Now()
err = scanRefsByTree(s, callback, []string{include}, []string{exclude}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefRangeByTree", start)

return err
}

// ScanRefWithDeleted scans through all unique objects in the given ref,
Expand All @@ -144,7 +150,12 @@ func (s *GitScanner) ScanRef(ref string, cb GitScannerFoundPointer) error {

opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = true
return scanRefsToChanSingleIncludeExclude(s, callback, ref, "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)

start := time.Now()
err = scanRefsToChanSingleIncludeExclude(s, callback, ref, "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRef", start)

return err
}

// ScanRefByTree scans through all objects in the current ref, excluding
Expand All @@ -159,7 +170,12 @@ func (s *GitScanner) ScanRefByTree(ref string, cb GitScannerFoundPointer) error
opts := s.opts(ScanRefsMode)
opts.SkipDeletedBlobs = true
opts.CommitsOnly = true
return scanRefsByTree(s, callback, []string{ref}, []string{}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)

start := time.Now()
err = scanRefsByTree(s, callback, []string{ref}, []string{}, s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanRefByTree", start)

return err
}

// ScanAll scans through all unique objects in the repository, including
Expand All @@ -172,7 +188,12 @@ func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error {

opts := s.opts(ScanAllMode)
opts.SkipDeletedBlobs = false
return scanRefsToChanSingleIncludeExclude(s, callback, "", "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)

start := time.Now()
err = scanRefsToChanSingleIncludeExclude(s, callback, "", "", s.cfg.GitEnv(), s.cfg.OSEnv(), opts)
tracerx.PerformanceSince("ScanAll", start)

return err
}

// ScanTree takes a ref and returns WrappedPointer objects in the tree at that
Expand All @@ -183,7 +204,12 @@ func (s *GitScanner) ScanTree(ref string, cb GitScannerFoundPointer) error {
if err != nil {
return err
}
return runScanTree(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())

start := time.Now()
err = runScanTree(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
tracerx.PerformanceSince("ScanTree", start)

return err
}

// ScanUnpushed scans history for all LFS pointers which have been added but not
Expand All @@ -193,7 +219,12 @@ func (s *GitScanner) ScanUnpushed(remote string, cb GitScannerFoundPointer) erro
if err != nil {
return err
}
return scanUnpushed(callback, remote)

start := time.Now()
err = scanUnpushed(callback, remote)
tracerx.PerformanceSince("ScanUnpushed", start)

return err
}

// ScanStashed scans for all LFS pointers referenced solely by a stash
Expand All @@ -203,7 +234,11 @@ func (s *GitScanner) ScanStashed(cb GitScannerFoundPointer) error {
return err
}

return scanStashed(callback)
start := time.Now()
err = scanStashed(callback)
tracerx.PerformanceSince("ScanStashed", start)

return err
}

// ScanPreviousVersions scans changes reachable from ref (commit) back to since.
Expand All @@ -215,7 +250,12 @@ func (s *GitScanner) ScanPreviousVersions(ref string, since time.Time, cb GitSca
if err != nil {
return err
}
return logPreviousSHAs(callback, ref, s.Filter, since)

start := time.Now()
err = logPreviousSHAs(callback, ref, s.Filter, since)
tracerx.PerformanceSince("ScanPreviousVersions", start)

return err
}

// ScanIndex scans the git index for modified LFS objects.
Expand All @@ -224,7 +264,12 @@ func (s *GitScanner) ScanIndex(ref string, cb GitScannerFoundPointer) error {
if err != nil {
return err
}
return scanIndex(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())

start := time.Now()
err = scanIndex(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
tracerx.PerformanceSince("ScanIndex", start)

return err
}

func (s *GitScanner) opts(mode ScanningMode) *ScanRefsOptions {
Expand Down
1 change: 0 additions & 1 deletion lfs/scanner_git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ func scanUnpushed(remoteName string) ([]*WrappedPointer, error) {
return nil, err
}

gitscanner.Close()
return pointers, multiErr
}

Expand Down

0 comments on commit a0986c7

Please sign in to comment.