Skip to content

Commit

Permalink
repo maintenance for windows
Browse files Browse the repository at this point in the history
Signed-off-by: Lyndon-Li <[email protected]>
  • Loading branch information
Lyndon-Li committed Jan 17, 2025
1 parent 5b1738a commit bdf495f
Show file tree
Hide file tree
Showing 5 changed files with 295 additions and 110 deletions.
49 changes: 28 additions & 21 deletions pkg/cmd/cli/repomantenance/maintenance.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/vmware-tanzu/velero/pkg/util/logging"

repokey "github.com/vmware-tanzu/velero/pkg/repository/keys"
"github.com/vmware-tanzu/velero/pkg/repository/maintenance"
repomanager "github.com/vmware-tanzu/velero/pkg/repository/manager"
)

Expand Down Expand Up @@ -78,17 +79,7 @@ func (o *Options) Run(f velerocli.Factory) {
}()

if pruneError != nil {
logger.WithError(pruneError).Error("An error occurred when running repo prune")
terminationLogFile, err := os.Create("/dev/termination-log")
if err != nil {
logger.WithError(err).Error("Failed to create termination log file")
return
}
defer terminationLogFile.Close()

if _, errWrite := terminationLogFile.WriteString(fmt.Sprintf("An error occurred: %v", err)); errWrite != nil {
logger.WithError(errWrite).Error("Failed to write error to termination log file")
}
os.Stdout.WriteString(fmt.Sprintf("%s%v", maintenance.TerminationLogIndicator, pruneError))
}
}

Expand Down Expand Up @@ -163,22 +154,38 @@ func (o *Options) runRepoPrune(f velerocli.Factory, namespace string, logger log
return err
}

manager, err := initRepoManager(namespace, cli, kubeClient, logger)
if err != nil {
return err
var repo *velerov1api.BackupRepository
retry := 10
for {
repo, err = repository.GetBackupRepository(context.Background(), cli, namespace,
repository.BackupRepositoryKey{
VolumeNamespace: o.RepoName,
BackupLocation: o.BackupStorageLocation,
RepositoryType: o.RepoType,
}, true)
if err == nil {
break
}

retry--
if retry == 0 {
break
}

logger.WithError(err).Warn("Failed to retrieve backup repo, need retry")

time.Sleep(time.Second)
}

// backupRepository
repo, err := repository.GetBackupRepository(context.Background(), cli, namespace,
repository.BackupRepositoryKey{
VolumeNamespace: o.RepoName,
BackupLocation: o.BackupStorageLocation,
RepositoryType: o.RepoType,
}, true)
if err != nil {
return errors.Wrap(err, "failed to get backup repository")
}

manager, err := initRepoManager(namespace, cli, kubeClient, logger)
if err != nil {
return err
}

err = manager.PruneRepo(repo)
if err != nil {
return errors.Wrap(err, "failed to prune repo")
Expand Down
77 changes: 47 additions & 30 deletions pkg/repository/maintenance/maintenance.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"fmt"
"math"
"sort"
"strings"
"time"

"github.com/pkg/errors"
Expand All @@ -35,6 +36,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/util"
"github.com/vmware-tanzu/velero/pkg/util/kube"

appsv1 "k8s.io/api/apps/v1"
Expand All @@ -47,6 +49,7 @@ import (
const (
RepositoryNameLabel = "velero.io/repo-name"
GlobalKeyForRepoMaintenanceJobCM = "global"
TerminationLogIndicator = "Repo maintenance error: "
)

type JobConfigs struct {
Expand Down Expand Up @@ -147,24 +150,37 @@ func getResultFromJob(cli client.Client, job *batchv1.Job) (string, error) {
}

if len(podList.Items) == 0 {
return "", fmt.Errorf("no pod found for job %s", job.Name)
return "", errors.Errorf("no pod found for job %s", job.Name)
}

// we only have one maintenance pod for the job
pod := podList.Items[0]

statuses := pod.Status.ContainerStatuses
if len(statuses) == 0 {
return "", fmt.Errorf("no container statuses found for job %s", job.Name)
return "", errors.Errorf("no container statuses found for job %s", job.Name)
}

// we only have one maintenance container
terminated := statuses[0].State.Terminated
if terminated == nil {
return "", fmt.Errorf("container for job %s is not terminated", job.Name)
return "", errors.Errorf("container for job %s is not terminated", job.Name)
}

return terminated.Message, nil
if terminated.Message == "" {
return "", nil
}

idx := strings.Index(terminated.Message, TerminationLogIndicator)
if idx == -1 {
return "", errors.New("error to locate repo maintenance error indicator from termination message")
}

if idx+len(TerminationLogIndicator) >= len(terminated.Message) {
return "", errors.New("nothing after repo maintenance error indicator in termination message")
}

return terminated.Message[idx+len(TerminationLogIndicator):], nil
}

// getJobConfig is called to get the Maintenance Job Config for the
Expand Down Expand Up @@ -434,6 +450,16 @@ func buildJob(cli client.Client, ctx context.Context, repo *velerov1api.BackupRe
return nil, errors.Wrap(err, "failed to parse resource requirements for maintenance job")
}

podLabels := map[string]string{
RepositoryNameLabel: repo.Name,
}

for _, k := range util.ThirdPartyLabels {
if v := veleroutil.GetVeleroServerLabelValue(deployment, k); v != "" {
podLabels[k] = v
}
}

// Set arguments
args := []string{"repo-maintenance"}
args = append(args, fmt.Sprintf("--repo-name=%s", repo.Spec.VolumeNamespace))
Expand All @@ -455,10 +481,8 @@ func buildJob(cli client.Client, ctx context.Context, repo *velerov1api.BackupRe
BackoffLimit: new(int32), // Never retry
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Name: "velero-repo-maintenance-pod",
Labels: map[string]string{
RepositoryNameLabel: repo.Name,
},
Name: "velero-repo-maintenance-pod",
Labels: podLabels,
},
Spec: v1.PodSpec{
Containers: []v1.Container{
Expand All @@ -468,17 +492,26 @@ func buildJob(cli client.Client, ctx context.Context, repo *velerov1api.BackupRe
Command: []string{
"/velero",
},
Args: args,
ImagePullPolicy: v1.PullIfNotPresent,
Env: envVars,
EnvFrom: envFromSources,
VolumeMounts: volumeMounts,
Resources: resources,
Args: args,
ImagePullPolicy: v1.PullIfNotPresent,
Env: envVars,
EnvFrom: envFromSources,
VolumeMounts: volumeMounts,
Resources: resources,
TerminationMessagePolicy: v1.TerminationMessageFallbackToLogsOnError,
},
},
RestartPolicy: v1.RestartPolicyNever,
Volumes: volumes,
ServiceAccountName: serviceAccount,
Tolerations: []v1.Toleration{
{
Key: "os",
Operator: "Equal",
Effect: "NoSchedule",
Value: "windows",
},
},
},
},
},
Expand All @@ -489,22 +522,6 @@ func buildJob(cli client.Client, ctx context.Context, repo *velerov1api.BackupRe
job.Spec.Template.Spec.Affinity = affinity
}

if tolerations := veleroutil.GetTolerationsFromVeleroServer(deployment); tolerations != nil {
job.Spec.Template.Spec.Tolerations = tolerations
}

if nodeSelector := veleroutil.GetNodeSelectorFromVeleroServer(deployment); nodeSelector != nil {
job.Spec.Template.Spec.NodeSelector = nodeSelector
}

if labels := veleroutil.GetVeleroServerLables(deployment); len(labels) > 0 {
job.Spec.Template.Labels = labels
}

if annotations := veleroutil.GetVeleroServerAnnotations(deployment); len(annotations) > 0 {
job.Spec.Template.Annotations = annotations
}

return job, nil
}

Expand Down
Loading

0 comments on commit bdf495f

Please sign in to comment.