Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow the discovery of Servers which are in the powered On state. #109

Merged
merged 7 commits into from
Aug 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ jobs:
- name: golangci-lint
uses: golangci/golangci-lint-action@v6
with:
version: v1.59
version: v1.60
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ RUN --mount=type=cache,target=/root/.cache/go-build \

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM gcr.io/distroless/static:nonroot as manager
FROM gcr.io/distroless/static:nonroot AS manager
WORKDIR /
COPY --from=builder /workspace/manager .
USER 65532:65532

ENTRYPOINT ["/manager"]

FROM gcr.io/distroless/static:nonroot as probe
FROM gcr.io/distroless/static:nonroot AS probe
WORKDIR /
COPY --from=builder /workspace/metalprobe .
USER 65532:65532
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ GEN_CRD_API_REFERENCE_DOCS ?= $(LOCALBIN)/gen-crd-api-reference-docs-$(GEN_CRD_A
KUSTOMIZE_VERSION ?= v5.3.0
CONTROLLER_TOOLS_VERSION ?= v0.15.0
ENVTEST_VERSION ?= latest
GOLANGCI_LINT_VERSION ?= v1.59.1
GOLANGCI_LINT_VERSION ?= v1.60.1
GOIMPORTS_VERSION ?= v0.22.0
GEN_CRD_API_REFERENCE_DOCS_VERSION ?= v0.3.0

Expand Down
38 changes: 12 additions & 26 deletions bmc/redfish_local.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,41 +31,27 @@ func NewRedfishLocalBMCClient(
}

func (r RedfishLocalBMC) PowerOn(systemUUID string) error {
service := r.client.GetService()
systems, err := service.Systems()
system, err := r.getSystemByUUID(systemUUID)
if err != nil {
return fmt.Errorf("failed to get systems: %w", err)
return fmt.Errorf("failed to get system: %w", err)
}

for _, system := range systems {
if system.UUID == systemUUID {
system.PowerState = redfish.OnPowerState
systemURI := fmt.Sprintf("/redfish/v1/Systems/%s", system.ID)
if err := system.Patch(systemURI, system); err != nil {
return fmt.Errorf("failed to set power state %s for system %s: %w", redfish.OnPowerState, systemUUID, err)
}
break
}
system.PowerState = redfish.OnPowerState
systemURI := fmt.Sprintf("/redfish/v1/Systems/%s", system.ID)
if err := system.Patch(systemURI, system); err != nil {
return fmt.Errorf("failed to set power state %s for system %s: %w", redfish.OnPowerState, systemUUID, err)
}
return nil
}

func (r RedfishLocalBMC) PowerOff(systemUUID string) error {
service := r.client.GetService()
systems, err := service.Systems()
system, err := r.getSystemByUUID(systemUUID)
if err != nil {
return fmt.Errorf("failed to get systems: %w", err)
return fmt.Errorf("failed to get system: %w", err)
}

for _, system := range systems {
if system.UUID == systemUUID {
system.PowerState = redfish.OffPowerState
systemURI := fmt.Sprintf("/redfish/v1/Systems/%s", system.ID)
if err := system.Patch(systemURI, system); err != nil {
return fmt.Errorf("failed to set power state %s for system %s: %w", redfish.OffPowerState, systemUUID, err)
}
break
}
system.PowerState = redfish.OffPowerState
systemURI := fmt.Sprintf("/redfish/v1/Systems/%s", system.ID)
if err := system.Patch(systemURI, system); err != nil {
return fmt.Errorf("failed to set power state %s for system %s: %w", redfish.OffPowerState, systemUUID, err)
}
return nil
}
4 changes: 4 additions & 0 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ func main() {
var registryURL string
var requeueInterval time.Duration
var webhookPort int
var enforceFirstBoot bool

flag.DurationVar(&requeueInterval, "requeue-interval", 10*time.Second, "Reconciler requeue interval.")
flag.StringVar(&registryURL, "registry-url", "", "The URL of the registry.")
Expand All @@ -70,6 +71,8 @@ func main() {
flag.StringVar(&macPrefixesFile, "mac-prefixes-file", "", "Location of the MAC prefixes file.")
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.BoolVar(&enforceFirstBoot, "enforce-first-boot", false,
"Enforce the first boot probing of a Server even if it is powered on in the Initial state.")
flag.IntVar(&webhookPort, "webhook-port", 9443, "The port to use for webhook server.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
Expand Down Expand Up @@ -198,6 +201,7 @@ func main() {
ProbeOSImage: probeOSImage,
RegistryURL: registryURL,
RequeueInterval: requeueInterval,
EnforceFirstBoot: enforceFirstBoot,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Server")
os.Exit(1)
Expand Down
24 changes: 24 additions & 0 deletions internal/controller/bmc_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,30 @@ var _ = Describe("BMC Controller", func() {
}
Expect(k8sClient.Create(ctx, endpoint)).To(Succeed())
DeferCleanup(k8sClient.Delete, endpoint)

By("Ensuring that the BMC will be removed")
bmc := &metalv1alpha1.BMC{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("bmc-%s", endpoint.Name),
},
}
DeferCleanup(k8sClient.Delete, bmc)

By("Ensuring that the BMCSecret will be removed")
bmcSecret := &metalv1alpha1.BMCSecret{
ObjectMeta: metav1.ObjectMeta{
Name: bmc.Name,
},
}
DeferCleanup(k8sClient.Delete, bmcSecret)

By("Ensuring that the Server resource will be removed")
server := &metalv1alpha1.Server{
ObjectMeta: metav1.ObjectMeta{
Name: GetServerNameFromBMCandIndex(0, bmc),
},
}
DeferCleanup(k8sClient.Delete, server)
})

It("Should successfully reconcile the a BMC resource", func(ctx SpecContext) {
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/endpoint_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func (r *EndpointReconciler) reconcile(ctx context.Context, log logr.Logger, end
if err := r.applyBMC(ctx, log, endpoint, bmcSecret, m); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to apply BMC object: %w", err)
}
log.V(1).Info("Applied local test BMC object for endpoint")
log.V(1).Info("Applied BMC object for Endpoint")
}
// TODO: other types like Switches can be handled here later
}
Expand Down
1 change: 1 addition & 0 deletions internal/controller/endpoint_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ var _ = Describe("Endpoints Controller", func() {
"username": []byte(base64.StdEncoding.EncodeToString([]byte("foo"))),
"password": []byte(base64.StdEncoding.EncodeToString([]byte("bar"))),
}))))
DeferCleanup(k8sClient.Delete, bmcSecret)

By("By ensuring that the BMC object has been created")
bmc := &metalv1alpha1.BMC{
Expand Down
136 changes: 100 additions & 36 deletions internal/controller/server_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ type ServerReconciler struct {
RegistryURL string
ProbeOSImage string
RequeueInterval time.Duration
EnforceFirstBoot bool
}

//+kubebuilder:rbac:groups=metal.ironcore.dev,resources=bmcs,verbs=get;list;watch
Expand Down Expand Up @@ -152,12 +153,12 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve
if err := r.applyBiosSettings(ctx, log, server); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update server bios settings: %w", err)
}
log.V(1).Info("Updated Server bios settings")
log.V(1).Info("Updated Server BIOS settings")

if err := r.applyBootOrder(ctx, log, server); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to update server bios boot order: %w", err)
}
log.V(1).Info("Updated Server bios boot order")
log.V(1).Info("Updated Server BIOS boot order")

requeue, err := r.ensureServerStateTransition(ctx, log, server)
if requeue && err == nil {
Expand Down Expand Up @@ -215,6 +216,16 @@ func (r *ServerReconciler) ensureServerStateTransition(ctx context.Context, log
}

func (r *ServerReconciler) handleInitialState(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) {
if requeue, err := r.ensureInitialConditions(ctx, log, server); err != nil || requeue {
return requeue, err
}
log.V(1).Info("Initial conditions for Server met")

if err := r.ensureServerPowerState(ctx, log, server); err != nil {
return false, fmt.Errorf("failed to ensure server power state: %w", err)
}
log.V(1).Info("Ensured power state for Server")

if err := r.applyBootConfigurationAndIgnitionForDiscovery(ctx, log, server); err != nil {
return false, fmt.Errorf("failed to apply server boot configuration: %w", err)
}
Expand All @@ -232,6 +243,12 @@ func (r *ServerReconciler) handleInitialState(ctx context.Context, log logr.Logg
}

func (r *ServerReconciler) handleDiscoveryState(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) {
if ready, err := r.serverBootConfigurationIsReady(ctx, server); err != nil || !ready {
log.V(1).Info("Server boot configuration is not ready. Retrying ...")
return true, err
}
log.V(1).Info("Server boot configuration is ready")

serverBase := server.DeepCopy()
server.Spec.Power = metalv1alpha1.PowerOn
if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil {
Expand All @@ -244,12 +261,6 @@ func (r *ServerReconciler) handleDiscoveryState(ctx context.Context, log logr.Lo
}
log.V(1).Info("Server state set to power on")

if ready, err := r.serverBootConfigurationIsReady(ctx, server); err != nil || !ready {
log.V(1).Info("Server boot configuration is not ready. Retrying ...")
return true, err
}
log.V(1).Info("Server boot configuration is ready")

ready, err := r.extractServerDetailsFromRegistry(ctx, log, server)
if !ready && err == nil {
log.V(1).Info("Server agent did not post info to registry")
Expand Down Expand Up @@ -480,6 +491,47 @@ func (r *ServerReconciler) generateDefaultIgnitionDataForServer(flags string) ([
return ignitionData, nil
}

func (r *ServerReconciler) ensureInitialConditions(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) {
if server.Spec.Power == "" && server.Status.PowerState == metalv1alpha1.ServerOffPowerState {
requeue, err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOff)
if err != nil {
return false, fmt.Errorf("failed to set server power state: %w", err)
}
if requeue {
return requeue, nil
}
}

if server.Status.State == metalv1alpha1.ServerStateInitial &&
server.Status.PowerState == metalv1alpha1.ServerOnPowerState &&
r.EnforceFirstBoot {
log.V(1).Info("Server in initial state is powered on. Ensure that it is powered off.")
requeue, err := r.setAndPatchServerPowerState(ctx, log, server, metalv1alpha1.PowerOff)
if err != nil {
return false, fmt.Errorf("failed to set server power state: %w", err)
}
if requeue {
return requeue, nil
}
}
return false, nil
}

func (r *ServerReconciler) setAndPatchServerPowerState(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server, powerState metalv1alpha1.Power) (bool, error) {
op, err := controllerutil.CreateOrPatch(ctx, r.Client, server, func() error {
server.Spec.Power = powerState
return nil
})
if err != nil {
return false, fmt.Errorf("failed to patch Server: %w", err)
}
if op == controllerutil.OperationResultUpdated {
log.V(1).Info("Server updated to power off state.")
return true, nil
}
return false, nil
}

func (r *ServerReconciler) serverBootConfigurationIsReady(ctx context.Context, server *metalv1alpha1.Server) (bool, error) {
if server.Spec.BootConfigurationRef == nil {
return false, nil
Expand All @@ -502,7 +554,11 @@ func (r *ServerReconciler) pxeBootServer(ctx context.Context, log logr.Logger, s
}

bmcClient, err := GetBMCClientForServer(ctx, r.Client, server, r.Insecure)
defer bmcClient.Logout()
defer func() {
if bmcClient != nil {
bmcClient.Logout()
}
}()

if err != nil {
return fmt.Errorf("failed to get BMC client: %w", err)
Expand All @@ -520,6 +576,10 @@ func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context,
return false, nil
}

if resp == nil {
return false, fmt.Errorf("failed to find server information in registry")
}

if err != nil {
return false, fmt.Errorf("failed to fetch server details: %w", err)
}
Expand Down Expand Up @@ -585,17 +645,21 @@ func (r *ServerReconciler) ensureServerPowerState(ctx context.Context, log logr.
}

bmcClient, err := GetBMCClientForServer(ctx, r.Client, server, r.Insecure)
defer bmcClient.Logout()
defer func() {
if bmcClient != nil {
bmcClient.Logout()
}
}()
if err != nil {
return fmt.Errorf("failed to get BMC client: %w", err)
}

if powerOp == powerOpOn {
switch powerOp {
case powerOpOn:
if err := bmcClient.PowerOn(server.Spec.UUID); err != nil {
return fmt.Errorf("failed to power on server: %w", err)
}
}
if powerOp == powerOpOff {
case powerOpOff:
if err := bmcClient.PowerOff(server.Spec.UUID); err != nil {
return fmt.Errorf("failed to power off server: %w", err)
}
Expand Down Expand Up @@ -663,28 +727,6 @@ func (r *ServerReconciler) invalidateRegistryEntryForServer(log logr.Logger, ser
return nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *ServerReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&metalv1alpha1.Server{}).
Watches(
&metalv1alpha1.ServerBootConfiguration{},
r.enqueueServerByServerBootConfiguration(),
).
Complete(r)
}

func (r *ServerReconciler) enqueueServerByServerBootConfiguration() handler.EventHandler {
return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []ctrl.Request {
config := obj.(*metalv1alpha1.ServerBootConfiguration)
return []ctrl.Request{
{
NamespacedName: types.NamespacedName{Name: config.Spec.ServerRef.Name},
},
}
})
}

func (r *ServerReconciler) applyBootOrder(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) error {
if server.Spec.BMCRef == nil && server.Spec.BMC == nil {
log.V(1).Info("Server has no BMC connection configured")
Expand Down Expand Up @@ -764,8 +806,30 @@ func (r *ServerReconciler) applyBiosSettings(ctx context.Context, log logr.Logge
}
}
if !versionMatch {
log.V(1).Info("none of the Bios versions match")
log.V(1).Info("None of the Bios versions match")
return nil
}
return nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *ServerReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&metalv1alpha1.Server{}).
Watches(
&metalv1alpha1.ServerBootConfiguration{},
r.enqueueServerByServerBootConfiguration(),
).
Complete(r)
}

func (r *ServerReconciler) enqueueServerByServerBootConfiguration() handler.EventHandler {
return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []ctrl.Request {
config := obj.(*metalv1alpha1.ServerBootConfiguration)
return []ctrl.Request{
{
NamespacedName: types.NamespacedName{Name: config.Spec.ServerRef.Name},
},
}
})
}
Loading