Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement CRB cleanup #576

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
15 changes: 15 additions & 0 deletions hack/runtime-migrator/cmd/crb-cleanup/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
ENVTEST_K8S_VERSION = 1.31.0
ENVTEST_VERSION ?= release-0.19

LOCALBIN ?= $(shell pwd)/bin
$(LOCALBIN):
mkdir -p $(LOCALBIN)

ENVTEST ?= $(LOCALBIN)/setup-envtest
.PHONY: envtest
envtest: $(ENVTEST) ## Download setup-envtest locally if necessary.
$(ENVTEST): $(LOCALBIN)
test -s $(LOCALBIN)/setup-envtest || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest

test: envtest ## Run tests.
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./... -coverprofile cover.out -v
62 changes: 62 additions & 0 deletions hack/runtime-migrator/cmd/crb-cleanup/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# CRB Cleanup script

This script is used to clean up provisioner's ClusterRoleBindings (CRBs) after migration.
It looks for provisioner and kim CRBs, compares them,
and if all of provisioner's CRBs have a KIM equivalent - it removes the provisioner's ones.

The cleanup script is run locally, with kubeconfig pointing to the cluster.

## Configuration

| flag | description | default |
| ------------------- | ----------------------------------------------------------- | -------------------------------------------------------------------------------------------------- |
| `-kubeconfig` | Path to the kubeconfig file | in-cluster config |
| `-provisionerLabel` | Label selector for provisioner CRBs | `kyma-project.io/deprecation=to-be-removed-soon,reconciler.kyma-project.io/managed-by=provisioner` |
| `-kimLabel` | Label selector for kim CRBs | `reconciler.kyma-project.io/managed-by=infrastructure-manager` |
| `-output` | Output dir for created logs. | _empty_ (acts like `./ `) |
| `-dry-run` | Don't perform any destructive actions | `true` |
| `-verbose` | Print detailed logs | `false` |
| `-force` | Delete provisioner CRBs even if they have no kim equivalent | `false` |

> [!NOTE]
> if `-output` doesn't end with `/`, the name of the files will be prefixed with last segment.
> eg. `-output=./dev/log/cluster_a-` will create files like `./dev/log/cluster_a-missing.json`, `./dev/log/cluster_a-removed.json`, etc.

> [!WARNING]
> without `-dry-run=false` the script won't delete anything, even with a `-force` flag

## Usage

To run cleanup script, execute:

```bash
go run ./cmd/crb-cleanup -output=./dev/logs/my-cluster/ -kubeconfig=./dev/kubeconfig -dry-run=false
```

If there are missing CRBs, the script will print a list of them and exit with a zero status code.
Missing CRBs can be inspected as JSON in `./dev/logs/my-cluster/missing.json`. No CRBs will be removed.

After inspecting the missing CRBs, you can re-run the script with the `-force` flag to delete them.

If no CRBs are missing, the script will remove provisioner CRBs.
Removed CRBs can be inspected as JSON in `./dev/logs/my-cluster/removed.json`.

If any errors occured during deletion (eg. permission error), the CRBs that failed will be listed in `./dev/logs/my-cluster/failures.json`.

All of the log files will be created either way.

### prefixing logs in `kcp taskrun`

Create a script:

```bash
#!/bin/bash
crb-cleanup -output=./logs/${RUNTIME_NAME}_ -dry-run=true
```

When run, the script will create files like `./logs/some_runtime_missing.json`

### `-dry-run` mode

When running the script without `-dry-run=false` flag, CRBs that _would_ be removed will be listed as JSON in `./dev/logs/my-cluster/removed.json`.
No destructive actions will be performed.
89 changes: 89 additions & 0 deletions hack/runtime-migrator/cmd/crb-cleanup/cleaner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package main

import (
"context"
"log/slog"

v1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type KubeDeleter interface {
Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error
}

type Compared struct {
provisioner []v1.ClusterRoleBinding
kim []v1.ClusterRoleBinding
missing []v1.ClusterRoleBinding
additional []v1.ClusterRoleBinding
}

type Cleaner interface {
Clean(context.Context, []v1.ClusterRoleBinding) []Failure
}

type CRBCleaner struct {
client KubeDeleter
}

type Failure struct {
CRB v1.ClusterRoleBinding `json:"crb"`
Err error `json:"error"`
}

// Clean deletes CRBs, returning list of deleting errors
func (c CRBCleaner) Clean(ctx context.Context, crbs []v1.ClusterRoleBinding) []Failure {
failures := make([]Failure, 0)

for _, crb := range crbs {
slog.Debug("Removing CRB", "crb", crb.Name)
err := c.client.Delete(ctx, crb.Name, metav1.DeleteOptions{})
if err != nil {
slog.Error("Error removing CRB", "crb", crb.Name)
failures = append(failures, Failure{
CRB: crb,
Err: err,
})
}
}

return failures
}

// Compare returns missing, additional and original CRBs
func Compare(ctx context.Context, provisioner []v1.ClusterRoleBinding, kim []v1.ClusterRoleBinding) Compared {
missing, additional := difference(provisioner, kim, CRBEquals)

return Compared{
provisioner: provisioner,
kim: kim,
missing: missing,
additional: additional,
}
}

func NewCRBCleaner(client KubeDeleter) Cleaner {
return CRBCleaner{
client: client,
}
}

type DryCleaner struct {
filer Filer
}

func (p DryCleaner) Clean(_ context.Context, crbs []v1.ClusterRoleBinding) []Failure {
slog.Debug("Removing CRBs", "crbs", crbs)
err := p.filer.Removed(crbs)
if err != nil {
slog.Error("Error saving removed CRBs", "error", err, "crbs", CRBNames(crbs))
}
return nil
}

func NewDryCleaner(filer Filer) Cleaner {
return DryCleaner{
filer: filer,
}
}
51 changes: 51 additions & 0 deletions hack/runtime-migrator/cmd/crb-cleanup/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package main

import (
"flag"
"log/slog"
"os"
"reflect"
"strings"
)

type Config struct {
Kubeconfig string
DryRun bool
Verbose bool
Force bool
ProvisionerLabel string
KimLabel string
Output string
}

func ParseConfig() Config {
cfg := Config{}
flag.StringVar(&cfg.Kubeconfig, "kubeconfig", "", "Kubeconfig file path")
flag.StringVar(&cfg.ProvisionerLabel, "provisionerLabel", LabelSelectorProvisioner, "Label marking provisioner's CRBs")
flag.StringVar(&cfg.KimLabel, "kimLabel", LabelSelectorKim, "Label marking kim's CRBs")
flag.StringVar(&cfg.Output, "output", "", "Output folder for created files. Can also contain file prefix, if it doesn't end with `/` (can be a folder, eg ./foo/)")
flag.BoolVar(&cfg.DryRun, "dry-run", true, "Don't remove CRBs, write what would be removed to ./removed.json")
flag.BoolVar(&cfg.Verbose, "verbose", false, "Increase the log level to debug (default: info)")
flag.BoolVar(&cfg.Force, "force", false, "Force remove CRBs without checking migration status")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not convinced to have this force option. IMO it's risky.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was specifically listed in requirements by @tobiscr, IIRC

flag.Parse()
if cfg.Kubeconfig == "" {
if k, ok := os.LookupEnv("KUBECONFIG"); ok {
cfg.Kubeconfig = k
}
}
slog.Info("Parsed config", Spread(cfg)...)
return cfg
}

// Spread returns list of struct fields in the format ["field_name", "field_value", /* ... */]
func Spread(val interface{}) []interface{} {
v := reflect.ValueOf(val)
t := v.Type()

var res []interface{}
for i := 0; i < v.NumField(); i++ {
res = append(res, strings.ToLower(t.Field(i).Name), v.Field(i).Interface())
}

return res
}
13 changes: 13 additions & 0 deletions hack/runtime-migrator/cmd/crb-cleanup/crb_cleanup_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package main_test

import (
"testing"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

func TestCrbCleanup(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "CrbCleanup Suite")
}
51 changes: 51 additions & 0 deletions hack/runtime-migrator/cmd/crb-cleanup/fetcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package main

import (
"context"

v1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type Fetcher interface {
FetchKim(context.Context) ([]v1.ClusterRoleBinding, error)
FetchProvisioner(context.Context) ([]v1.ClusterRoleBinding, error)
}

type KubeLister interface {
List(ctx context.Context, opts metav1.ListOptions) (*v1.ClusterRoleBindingList, error)
}

type CRBFetcher struct {
labelKim string
labelProvisioner string
client KubeLister
}

func (f CRBFetcher) fetch(ctx context.Context, label string) ([]v1.ClusterRoleBinding, error) {
list, err := f.client.List(ctx, metav1.ListOptions{
LabelSelector: label,
})

if err != nil {
return nil, err
}

return list.Items, nil
}

func (f CRBFetcher) FetchKim(ctx context.Context) ([]v1.ClusterRoleBinding, error) {
return f.fetch(ctx, f.labelKim)
}

func (f CRBFetcher) FetchProvisioner(ctx context.Context) ([]v1.ClusterRoleBinding, error) {
return f.fetch(ctx, f.labelProvisioner)
}

func NewCRBFetcher(client KubeLister, labelProvisioner, labelKim string) Fetcher {
return CRBFetcher{
labelKim: labelKim,
labelProvisioner: labelProvisioner,
client: client,
}
}
83 changes: 83 additions & 0 deletions hack/runtime-migrator/cmd/crb-cleanup/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package main
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the tool should be more kcp taskrun friendly.
There are two use cases:

  1. I just want to see what will be removed, and where are the problems that require manual intervention
  2. I want to do actual remove and learn what needs manual intervention

Exemplary run:

kcp taskrun --target plan=aws -- go run ./cmd/crb-cleanup -output=./run-aws/ -dry-run=true

The above call invokes crb-cleanup for all AWS clusters.

Problems:

  • run_aws folder contains only three files (failures.json, missing.json, removed.json) ; what I would expect is to have many files (set of files for each runtime)
  • The tool prints the following message: "New CRBs not found in old CRBs" ; it happens in case there are crbs for particular subject that exist for KIM but didn't exist for Provisioner - the information is completely redundant, as we don't want to be notified about new CRBs that were added by the user (we want to learn when there is no counterpart and some manual intervention is required)
  • I'm now convinced that the tool should not use old and new words. It should be more specific (old=Provisioner, new=Kim)
  • output json files are created before we know if there will be anything to write ; I need to open the file to see if it actually contains anything

What would make it more friendly:

  • output should be stored for each runtime separately ; there are two options:
    • files are prefixed with shoot name and stored in the output folder
    • there is one folder with shoot name where all the files are stored
  • I'm wondering whether in case it was detected that there are some Provisioner crbs that doesn't have KIM counterpart we should exit with error
  • printing some "completed" message ; it would be easier to grep the output, and see what runtimes are sound


import (
"context"
"log/slog"
"os"
)

const LabelSelectorProvisioner = "kyma-project.io/deprecation=to-be-removed-soon,reconciler.kyma-project.io/managed-by=provisioner"
const LabelSelectorKim = "reconciler.kyma-project.io/managed-by=infrastructure-manager"

func main() {
cfg := ParseConfig()

if cfg.Verbose {
slog.SetLogLoggerLevel(slog.LevelDebug)
} else {
slog.SetLogLoggerLevel(slog.LevelInfo)
}

kubectl := setupKubectl(cfg.Kubeconfig)
client := kubectl.RbacV1().ClusterRoleBindings()
fetcher := NewCRBFetcher(client, cfg.ProvisionerLabel, cfg.KimLabel)

filer := NewJSONFiler(cfg.Output)

var cleaner Cleaner
if cfg.DryRun {
slog.Info("Running in dry-run mode")
cleaner = NewDryCleaner(filer)
} else {
cleaner = NewCRBCleaner(client)
}

failures, err := ProcessCRBs(fetcher, cleaner, filer, cfg)
if err != nil {
slog.Error("Error processing CRBs", "error", err)
os.Exit(1)
}
err = filer.Failures(failures)
if err != nil {
slog.Error("Error marshaling list of failures", "error", err, "failures", failures)
os.Exit(1)
}
slog.Info("Completed without errors")
}

// ProcessCRBs fetches provisioner's and kim's CRBs, compares them and cleans provisioner's CRBs
// It returns error on fetch errors
// It does nothing, if provisioner's CRBs are not found in kim's CRBs, unless force flag is set
// It returns list of failures on removal errors
func ProcessCRBs(fetcher Fetcher, cleaner Cleaner, filer Filer, cfg Config) ([]Failure, error) {
ctx := context.Background()
provisionerCRBs, err := fetcher.FetchProvisioner(ctx)
if err != nil {
slog.Error("Error fetching provisioner CRBs", "error", err)
return nil, err
}

kimCRBs, err := fetcher.FetchKim(ctx)
if err != nil {
slog.Error("Error fetching kim CRBs", "error", err)
return nil, err
}

compared := Compare(ctx, provisionerCRBs, kimCRBs)

if len(compared.missing) != 0 {
slog.Warn("Provisioner CRBs not found in kim CRBs", CRBNames(compared.missing))
if filer != nil {
err := filer.Missing(compared.missing)
if err != nil {
slog.Error("Error saving unmatched CRBs", "error", err)
}
}
if !cfg.Force {
slog.Info("Use -force to remove provisioner CRBs without match")
return nil, nil
}
}

return cleaner.Clean(ctx, provisionerCRBs), nil
}
Loading
Loading