Skip to content

Commit

Permalink
feat: add support for shallow cloning repos (#363)
Browse files Browse the repository at this point in the history
Signed-off-by: Mmadu Manasseh <[email protected]>
  • Loading branch information
MeNsaaH authored Feb 25, 2025
1 parent 7a476ba commit ed4056c
Show file tree
Hide file tree
Showing 10 changed files with 117 additions and 17 deletions.
8 changes: 4 additions & 4 deletions cmd/locations.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

func processLocations(ctx context.Context, ctr container.Container, locations []string) error {
for index, location := range locations {
if newLocation, err := maybeCloneGitUrl(ctx, ctr.RepoManager, ctr.Config.RepoRefreshInterval, location, ctr.VcsClient.Username()); err != nil {
if newLocation, err := maybeCloneGitUrl(ctx, ctr.RepoManager, ctr.Config.RepoRefreshInterval, location, ctr.VcsClient.Username(), ctr.Config.RepoShallowClone); err != nil {
return errors.Wrapf(err, "failed to clone %q", location)
} else if newLocation != "" {
locations[index] = newLocation
Expand All @@ -31,12 +31,12 @@ func processLocations(ctx context.Context, ctr container.Container, locations []
}

type cloner interface {
Clone(ctx context.Context, cloneUrl, branchName string) (*git.Repo, error)
Clone(ctx context.Context, cloneUrl, branchName string, shallow bool) (*git.Repo, error)
}

var ErrCannotUseQueryWithFilePath = errors.New("relative and absolute file paths cannot have query parameters")

func maybeCloneGitUrl(ctx context.Context, repoManager cloner, repoRefreshDuration time.Duration, location, vcsUsername string) (string, error) {
func maybeCloneGitUrl(ctx context.Context, repoManager cloner, repoRefreshDuration time.Duration, location, vcsUsername string, shallow bool) (string, error) {
result := strings.SplitN(location, "?", 2)
if !isGitURL(result[0]) {
if len(result) > 1 {
Expand All @@ -51,7 +51,7 @@ func maybeCloneGitUrl(ctx context.Context, repoManager cloner, repoRefreshDurati
}
cloneUrl := repoUrl.CloneURL(vcsUsername)

repo, err := repoManager.Clone(ctx, cloneUrl, query.Get("branch"))
repo, err := repoManager.Clone(ctx, cloneUrl, query.Get("branch"), shallow)
if err != nil {
return "", errors.Wrap(err, "failed to clone")
}
Expand Down
10 changes: 5 additions & 5 deletions cmd/locations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ type fakeCloner struct {
err error
}

func (f *fakeCloner) Clone(_ context.Context, cloneUrl, branchName string) (*git.Repo, error) {
func (f *fakeCloner) Clone(_ context.Context, cloneUrl, branchName string, shallow bool) (*git.Repo, error) {
f.cloneUrl = cloneUrl
f.branchName = branchName
return f.result, f.err
Expand All @@ -43,7 +43,7 @@ func TestMaybeCloneGitUrl_NonGitUrl(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
fc := &fakeCloner{result: nil, err: nil}
actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.NoError(t, err)
assert.Equal(t, "", fc.branchName)
assert.Equal(t, "", fc.cloneUrl)
Expand Down Expand Up @@ -137,7 +137,7 @@ func TestMaybeCloneGitUrl_HappyPath(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
fc := &fakeCloner{result: &git.Repo{Directory: testRoot}, err: nil}
actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
actual, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.NoError(t, err)
assert.Equal(t, tc.expected.branch, fc.branchName)
assert.Equal(t, tc.expected.cloneUrl, fc.cloneUrl)
Expand Down Expand Up @@ -165,7 +165,7 @@ func TestMaybeCloneGitUrl_URLError(t *testing.T) {
tc := tc
t.Run(tc.name, func(t *testing.T) {
fc := &fakeCloner{result: &git.Repo{Directory: testRoot}, err: nil}
result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.ErrorContains(t, err, tc.expected)
require.Equal(t, "", result)
})
Expand Down Expand Up @@ -193,7 +193,7 @@ func TestMaybeCloneGitUrl_CloneError(t *testing.T) {
defer cancel()

fc := &fakeCloner{result: &git.Repo{Directory: testRoot}, err: tc.cloneError}
result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername)
result, err := maybeCloneGitUrl(ctx, fc, time.Duration(0), tc.input, testUsername, false)
require.ErrorContains(t, err, tc.expected)
require.Equal(t, "", result)
})
Expand Down
3 changes: 3 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@ func init() {
newStringOpts().
withDefault("kubechecks again"))
stringSliceFlag(flags, "additional-apps-namespaces", "Additional namespaces other than the ArgoCDNamespace to monitor for applications.")
boolFlag(flags, "repo-shallow-clone", "Enable shallow cloning for all git repos.",
newBoolOpts().
withDefault(false))
stringFlag(flags, "identifier", "Identifier for the kubechecks instance. Used to differentiate between multiple kubechecks instances.",
newStringOpts().
withDefault(""))
Expand Down
1 change: 1 addition & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ The full list of supported environment variables is described below:
|`KUBECHECKS_POLICIES_LOCATION`|Sets rego policy locations to be used for every check request. Can be common path inside the repos being checked or git urls in either git or http(s) format.|`[./policies]`|
|`KUBECHECKS_REPLAN_COMMENT_MSG`|comment message which re-triggers kubechecks on PR.|`kubechecks again`|
|`KUBECHECKS_REPO_REFRESH_INTERVAL`|Interval between static repo refreshes (for schemas and policies).|`5m`|
|`KUBECHECKS_REPO_SHALLOW_CLONE`|Enable shallow cloning for all git repos.|`false`|
|`KUBECHECKS_SCHEMAS_LOCATION`|Sets schema locations to be used for every check request. Can be a common path on the host or git urls in either git or http(s) format.|`[]`|
|`KUBECHECKS_SHOW_DEBUG_INFO`|Set to true to print debug info to the footer of MR comments.|`false`|
|`KUBECHECKS_TIDY_OUTDATED_COMMENTS_MODE`|Sets the mode to use when tidying outdated comments. One of hide, delete.|`hide`|
Expand Down
4 changes: 3 additions & 1 deletion localdev/kubechecks/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,18 @@ configMap:
#
# KUBECHECKS_LABEL_FILTER: "test" # On your PR/MR, prefix this with "kubechecks:"
# KUBECHECKS_SCHEMAS_LOCATION: https://github.com/zapier/kubecheck-schemas.git
KUBECHECKS_REPO_REFRESH_INTERVAL: 30s
KUBECHECKS_TIDY_OUTDATED_COMMENTS_MODE: "delete"
KUBECHECKS_ENABLE_CONFTEST: "false"
KUBECHECKS_REPO_SHALLOW_CLONE: "true"
KUBECHECKS_IDENTIFIER: "test"

deployment:
annotations:
reloader.stakater.com/auto: "true"

image:
pullPolicy: Never
pullPolicy: IfNotPresent
name: "kubechecks"
tag: ""

Expand Down
1 change: 1 addition & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ type ServerConfig struct {
MonitorAllApplications bool `mapstructure:"monitor-all-applications"`
OpenAIAPIToken string `mapstructure:"openai-api-token"`
RepoRefreshInterval time.Duration `mapstructure:"repo-refresh-interval"`
RepoShallowClone bool `mapstructure:"repo-shallow-clone"`
SchemasLocations []string `mapstructure:"schemas-location"`
ShowDebugInfo bool `mapstructure:"show-debug-info"`
TidyOutdatedCommentsMode string `mapstructure:"tidy-outdated-comments-mode"`
Expand Down
4 changes: 2 additions & 2 deletions pkg/events/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ type CheckEvent struct {
}

type repoManager interface {
Clone(ctx context.Context, cloneURL, branchName string) (*git.Repo, error)
Clone(ctx context.Context, cloneURL, branchName string, shallow bool) (*git.Repo, error)
}

func generateMatcher(ce *CheckEvent, repo *git.Repo) error {
Expand Down Expand Up @@ -192,7 +192,7 @@ func (ce *CheckEvent) getRepo(ctx context.Context, cloneURL, branchName string)
return repo, nil
}

repo, err = ce.repoManager.Clone(ctx, cloneURL, branchName)
repo, err = ce.repoManager.Clone(ctx, cloneURL, branchName, ce.ctr.Config.RepoShallowClone)
if err != nil {
return nil, errors.Wrap(err, "failed to clone repo")
}
Expand Down
5 changes: 4 additions & 1 deletion pkg/git/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ func NewRepoManager(cfg config.ServerConfig) *RepoManager {
return &RepoManager{cfg: cfg}
}

func (rm *RepoManager) Clone(ctx context.Context, cloneUrl, branchName string) (*Repo, error) {
func (rm *RepoManager) Clone(ctx context.Context, cloneUrl, branchName string, shallow bool) (*Repo, error) {
repo := New(rm.cfg, cloneUrl, branchName)
if shallow {
repo.Shallow = true
}

if err := repo.Clone(ctx); err != nil {
return nil, errors.Wrap(err, "failed to clone repository")
Expand Down
97 changes: 93 additions & 4 deletions pkg/git/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type Repo struct {
BranchName string
Config config.ServerConfig
CloneURL string
Shallow bool

// exposed state
Directory string
Expand All @@ -46,11 +47,17 @@ func New(cfg config.ServerConfig, cloneUrl, branchName string) *Repo {
}

func (r *Repo) Clone(ctx context.Context) error {
if r.Shallow {
return r.shallowClone(ctx)
}

var err error

r.Directory, err = os.MkdirTemp("/tmp", "kubechecks-repo-")
if err != nil {
return errors.Wrap(err, "failed to make temp dir")
if r.Directory == "" {
r.Directory, err = os.MkdirTemp("/tmp", "kubechecks-repo-")
if err != nil {
return errors.Wrap(err, "failed to make temp dir")
}
}

log.Info().
Expand Down Expand Up @@ -85,6 +92,63 @@ func (r *Repo) Clone(ctx context.Context) error {
return nil
}

func (r *Repo) shallowClone(ctx context.Context) error {
var err error

if r.Directory == "" {
r.Directory, err = os.MkdirTemp("/tmp", "kubechecks-repo-")
if err != nil {
return errors.Wrap(err, "failed to make temp dir")
}
}

log.Info().
Str("temp-dir", r.Directory).
Str("clone-url", r.CloneURL).
Str("branch", r.BranchName).
Msg("cloning git repo")

// Attempt to locally clone the repo based on the provided information stored within
_, span := tracer.Start(ctx, "ShallowCloneRepo")
defer span.End()

args := []string{"clone", r.CloneURL, r.Directory, "--depth", "1"}
cmd := r.execGitCommand(args...)
out, err := cmd.CombinedOutput()
if err != nil {
log.Error().Err(err).Msgf("unable to clone repository, %s", out)
return err
}

if r.BranchName != "HEAD" {
// Fetch SHA
args = []string{"fetch", "origin", r.BranchName, "--depth", "1"}
cmd = r.execGitCommand(args...)
out, err = cmd.CombinedOutput()
if err != nil {
log.Error().Err(err).Msgf("unable to fetch %s repository, %s", r.BranchName, out)
return err
}
// Checkout SHA
args = []string{"checkout", r.BranchName}
cmd = r.execGitCommand(args...)
out, err = cmd.CombinedOutput()
if err != nil {
log.Error().Err(err).Msgf("unable to checkout branch %s repository, %s", r.BranchName, out)
return err
}
}

if log.Trace().Enabled() {
if err = filepath.WalkDir(r.Directory, printFile); err != nil {
log.Warn().Err(err).Msg("failed to walk directory")
}
}

log.Info().Msg("repo has been cloned")
return nil
}

func printFile(s string, d fs.DirEntry, err error) error {
if err != nil {
return err
Expand Down Expand Up @@ -118,8 +182,24 @@ func (r *Repo) MergeIntoTarget(ctx context.Context, ref string) error {
attribute.String("sha", ref),
))
defer span.End()
merge_command := []string{"merge", ref}
// For shallow clones, we need to pull the ref into the repo
if r.Shallow {
ref = strings.TrimPrefix(ref, "origin/")
cmd := r.execGitCommand("fetch", "origin", fmt.Sprintf("%s:%s", ref, ref), "--depth", "1")
out, err := cmd.CombinedOutput()
if err != nil {
telemetry.SetError(span, err, "fetch origin ref")
log.Error().Err(err).Msgf("unable to fetch ref %s, %s", ref, out)
return err
}
// When merging shallow clones, we need to allow unrelated histories
// and use the "theirs" strategy to avoid conflicts
// cons of this is that it may not be entirely accurate and may overwrite changes in the target branch
merge_command = []string{"merge", ref, "--allow-unrelated-histories", "-X", "theirs"}
}

cmd := r.execGitCommand("merge", ref)
cmd := r.execGitCommand(merge_command...)
out, err := cmd.CombinedOutput()
if err != nil {
telemetry.SetError(span, err, "merge commit into branch")
Expand All @@ -131,6 +211,15 @@ func (r *Repo) MergeIntoTarget(ctx context.Context, ref string) error {
}

func (r *Repo) Update(ctx context.Context) error {
// Since we're shallow cloning, to update we need to wipe the directory and re-clone
if r.Shallow {
r.Wipe()
err := os.Mkdir(r.Directory, 0700)
if err != nil {
return errors.Wrap(err, "failed to create repo directory")
}
return r.Clone(ctx)
}
cmd := r.execGitCommand("pull")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stdout
Expand Down
1 change: 1 addition & 0 deletions pkg/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ func Pointer[T interface{}](item T) *T {
}

func WipeDir(dir string) {
log.Debug().Str("path", dir).Msg("wiping path")
if err := os.RemoveAll(dir); err != nil {
log.Error().
Err(err).
Expand Down

0 comments on commit ed4056c

Please sign in to comment.