From a8775cc25ec50f5042a7566198ad1df7173ee454 Mon Sep 17 00:00:00 2001 From: Andreas Fritzler Date: Thu, 22 Aug 2024 10:46:40 +0200 Subject: [PATCH 1/4] Add mkdocs based project documentation Initial version and tooling setup for the project documentation. --- .github/workflows/publish-docs.yml | 23 +++++++++++ Makefile | 12 ++++++ docs/Dockerfile | 14 +++++++ docs/README.md | 11 ++++++ docs/requirements.txt | 1 + mkdocs.yml | 63 ++++++++++++++++++++++++++++++ 6 files changed, 124 insertions(+) create mode 100644 .github/workflows/publish-docs.yml create mode 100644 docs/Dockerfile create mode 100644 docs/README.md create mode 100644 docs/requirements.txt create mode 100644 mkdocs.yml diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml new file mode 100644 index 0000000..39bee09 --- /dev/null +++ b/.github/workflows/publish-docs.yml @@ -0,0 +1,23 @@ +name: Publish docs via GitHub Pages +on: + push: + branches: [ main ] + +jobs: + build: + name: Deploy docs + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 'pypy3.9' + - uses: actions/cache@v4 + with: + key: ${{ github.ref }} + path: .cache + - name: Deploy docs + uses: afritzler/mkdocs-gh-pages-action@main + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/Makefile b/Makefile index 1627584..a70506b 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,9 @@ IMG ?= controller:latest # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.31.0 +# Docker image name for the mkdocs based local development setup +IMAGE=ironcore-dev/metal-operator-docs + # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) ifeq (,$(shell go env GOBIN)) GOBIN=$(shell go env GOPATH)/bin @@ -102,6 +105,15 @@ lint: golangci-lint ## Run golangci-lint linter & yamllint lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes $(GOLANGCI_LINT) run --fix +.PHONY: start-docs +start-docs: ## Start the local mkdocs based development environment. + docker build -t $(IMAGE) -f docs/Dockerfile . --load + docker run -p 8000:8000 -v `pwd`/:/docs $(IMAGE) + +.PHONY: clean-docs +clean-docs: ## Remove all local mkdocs Docker images (cleanup). + docker container prune --force --filter "label=project=metal_operator" + ##@ Build .PHONY: docs diff --git a/docs/Dockerfile b/docs/Dockerfile new file mode 100644 index 0000000..e143e2f --- /dev/null +++ b/docs/Dockerfile @@ -0,0 +1,14 @@ +FROM squidfunk/mkdocs-material:latest + +LABEL project=metal_operator + +WORKDIR /docs + +COPY docs/requirements.txt requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +EXPOSE 8000 + +# Start development server by default +ENTRYPOINT ["mkdocs"] +CMD ["serve", "--dev-addr=0.0.0.0:8000"] diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..180e7af --- /dev/null +++ b/docs/README.md @@ -0,0 +1,11 @@ +# metal-operator documentation + +This folder contains the documentation of the `metal-operator` project. + +## Local dev setup + +You can run the documentation via: + +```shell +make start-docs +``` diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..5d0c7af --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +# your mkdocs plugins go here diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..8a31da7 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,63 @@ +site_name: IronCore / Metal Operator +repo_url: https://github.com/ironcore-dev/metal-operator/ +edit_uri: edit/main/docs/ +theme: + icon: + repo: fontawesome/brands/github + features: + - navigation.instant + - navigation.tracking + - navigation.expand + - navigation.indexes + - navigation.top + name: material + logo: https://raw.githubusercontent.com/ironcore-dev/ironcore/main/docs/assets/logo.svg + favicon: https://raw.githubusercontent.com/ironcore-dev/ironcore/main/docs/assets/favicon/favicon.ico + palette: + - media: "(prefers-color-scheme: light)" + scheme: default + primary: white + toggle: + icon: material/weather-night + name: Switch to dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + primary: black + toggle: + icon: material/weather-sunny + name: Switch to light mode + font: + text: 'Work Sans' + +plugins: +- search + +markdown_extensions: +- pymdownx.highlight +- pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format +- pymdownx.snippets +- codehilite +- admonition +- abbr +- toc: + permalink: true + +nav: +- Home: README.md +- Architecture: + - Overview: README.md +- Usage: + - Discover Servers: README.md + - Provision Servers: README.md +- Developer Guide: + - Local Setup: dev_setup.md +- API Reference: api-reference/api.md + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/ironcore-dev/metal-operator From 611be8a34221c29b5a4b10139bd8daa0944e986d Mon Sep 17 00:00:00 2001 From: Andreas Fritzler Date: Tue, 29 Oct 2024 17:05:46 +0100 Subject: [PATCH 2/4] Describe concepts and overall architecture --- Makefile | 8 +- docs/README.md | 54 +++++++-- docs/architecture.md | 103 ++++++++++++++++ docs/concepts/bmcs.md | 74 ++++++++++++ docs/concepts/bmcsecrets.md | 41 +++++++ docs/concepts/endpoints.md | 91 ++++++++++++++ docs/concepts/serverbootconfigurations.md | 45 +++++++ docs/concepts/serverclaims.md | 59 ++++++++++ docs/concepts/servers.md | 137 ++++++++++++++++++++++ docs/development/dev_docs.md | 15 +++ docs/{ => development}/dev_setup.md | 0 hack/api-reference/config.json | 2 +- mkdocs.yml | 18 +-- 13 files changed, 628 insertions(+), 19 deletions(-) create mode 100644 docs/architecture.md create mode 100644 docs/concepts/bmcs.md create mode 100644 docs/concepts/bmcsecrets.md create mode 100644 docs/concepts/endpoints.md create mode 100644 docs/concepts/serverbootconfigurations.md create mode 100644 docs/concepts/serverclaims.md create mode 100644 docs/concepts/servers.md create mode 100644 docs/development/dev_docs.md rename docs/{ => development}/dev_setup.md (100%) diff --git a/Makefile b/Makefile index a70506b..d76a630 100644 --- a/Makefile +++ b/Makefile @@ -105,13 +105,13 @@ lint: golangci-lint ## Run golangci-lint linter & yamllint lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes $(GOLANGCI_LINT) run --fix -.PHONY: start-docs -start-docs: ## Start the local mkdocs based development environment. +.PHONY: startdocs +startdocs: ## Start the local mkdocs based development environment. docker build -t $(IMAGE) -f docs/Dockerfile . --load docker run -p 8000:8000 -v `pwd`/:/docs $(IMAGE) -.PHONY: clean-docs -clean-docs: ## Remove all local mkdocs Docker images (cleanup). +.PHONY: cleandocs +cleandocs: ## Remove all local mkdocs Docker images (cleanup). docker container prune --force --filter "label=project=metal_operator" ##@ Build diff --git a/docs/README.md b/docs/README.md index 180e7af..85d5b60 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,11 +1,51 @@ -# metal-operator documentation +# Metal-Operator Documentation -This folder contains the documentation of the `metal-operator` project. +**Welcome to the Metal-Operator Documentation!** -## Local dev setup +The `metal-operator` is a Kubernetes-native operator, part of the IronCore open-source project, designed for robust bare metal infrastructure management. By leveraging Baseboard Management Controllers (BMCs) and the Redfish API, `metal-operator` enables streamlined and automated server discovery, provisioning, and lifecycle management. Using the Kubernetes Controller pattern, `metal-operator` provides a CRD-based operational model that standardizes bare metal management across different hardware environments. Integration with vendor-specific tooling is also possible for enhanced functionality when needed. -You can run the documentation via: +--- -```shell -make start-docs -``` +## Key Features + +### 1. **Discover and Onboard Bare Metal Servers** +- Automatically detect and register bare metal servers through BMCs and the Redfish API. +- Efficiently gather hardware specs, network configurations, and initial health checks directly from BMC interfaces. + +### 2. **Provision Software on Bare Metal Servers** +- Deploy and configure software on registered servers using BMC interactions and standardized provisioning workflows. +- Support for dynamic software configuration and Redfish API-based management for consistent, vendor-neutral provisioning. + +### 3. **Manage Server Reservations** +- Reserve specific bare metal resources based on workload needs. +- Prevent resource conflicts by managing reservations via Kubernetes-native CRDs, ensuring that workloads align with available hardware resources. + +### 4. **Perform Day 2 Operations** +- Utilize the Redfish API to manage BIOS, firmware, and driver updates. +- Automate ongoing maintenance tasks and operational workflows to maintain infrastructure resilience and uptime. + +### 5. **Decommission and Maintain Faulty Servers** +- Decommission servers via BMC controls for clean removal from active pools. +- Schedule and perform maintenance tasks with BMC data to optimize uptime and maintain hardware reliability. + +--- + +## How It Works + +The `metal-operator` relies on **BMCs and the Redfish API** to handle bare metal server management tasks. Through a CRD-based operational model, `metal-operator` provides Kubernetes-native management of bare metal infrastructure, enabling consistent, vendor-neutral interactions. + +### Core Components +- **Custom Resources (CRs)**: Extend Kubernetes to manage server configurations, reservations, and operational workflows. +- **Controllers**: Automate lifecycle management through Redfish-enabled interactions, from provisioning to decommissioning. +- **Reconcilers**: Ensure the desired state matches the actual state by continuously monitoring hardware via BMC integrations. + +### Architecture Overview + +1. **Discovery**: Register new bare metal servers through BMCs and Redfish API, creating CRDs for streamlined management. +2. **Provisioning**: Apply software images and configurations using Redfish API, based on templates or custom configurations. +3. **Operations**: Execute BIOS, firmware updates, and other maintenance tasks through standardized workflows. +4. **Decommissioning**: Safely remove or maintain servers using Redfish and BMC controls, marking them for reuse or retirement as needed. + +--- + +The `metal-operator` is a core component of the IronCore project, designed to simplify and automate bare metal management across various hardware environments using BMC and Redfish API integrations. Expect continuous updates to expand capabilities and enhance usability. diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..7cca07d --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,103 @@ +# Metal-Operator Architectural Description + +The **metal-operator** is a Kubernetes operator designed to manage bare metal servers within a Kubernetes environment. It automates the provisioning, configuration, and lifecycle management of physical servers by integrating them into Kubernetes using Custom Resource Definitions (CRDs) and controllers. The architecture promotes modularity, scalability, and flexibility, enabling seamless integration with various boot mechanisms and provisioning tools. + +## Architectural Diagram + +```mermaid +flowchart LR + subgraph Out-of-Band Network + EndpointReconciler + end + EndpointReconciler -- Discovers --> EndpointCRD + EndpointCRD -- Uses --> MACPrefixDatabase + EndpointReconciler -- Creates --> BMC & BMCSecret + + BMCReconciler -- Manages --> BMC + BMCReconciler -- Uses --> BMCSecret + BMCReconciler -- Discovers Servers --> ServerCRD + + ServerReconciler -- Manages --> ServerCRD + ServerReconciler -- Uses --> metalprobe + ServerReconciler -- Waits for --> ServerBootConfiguration + + ServerClaimReconciler -- Manages --> ServerClaimCRD + ServerClaimCRD -- References --> ServerCRD + ServerClaimReconciler -- Creates --> ServerBootConfiguration + + BootOperator -- Watches --> ServerBootConfiguration + BootOperator -- Prepares --> BootEnvironment + BootOperator -- Updates --> ServerBootConfiguration + + ServerReconciler -- Powers On --> ServerCRD + + classDef operator fill:#f9f,stroke:#333,stroke-width:2px; + classDef crd fill:#9f9,stroke:#333,stroke-width:2px; + classDef external fill:#ff9,stroke:#333,stroke-width:2px; + + class EndpointReconciler,BMCReconciler,ServerReconciler,ServerClaimReconciler operator; + class EndpointCRD,BMC,ServerCRD,ServerClaimCRD,ServerBootConfiguration crd; + class BootOperator external; +``` + +## Key Components + +### 1. Custom Resource Definitions (CRDs) + +- [**Endpoint**](concepts/endpoints.md): Represents devices on the out-of-band management network, identified by MAC and IP addresses. +- [**BMC**](concepts/bmcs.md): Models Baseboard Management Controllers (BMCs), allowing interaction with server hardware. +- [**BMCSecret**](concepts/bmcsecrets.md): Securely stores credentials required to access BMCs. +- [**Server**](concepts/servers.md): Represents physical servers, managing their state, power, and configurations. +- [**ServerClaim**](concepts/serverclaims.md): Allows users to reserve servers by specifying desired configurations and boot images. +- [**ServerBootConfiguration**](concepts/serverbootconfigurations.md): Signals the need to prepare the boot environment for a server. + +### 2. Controllers + +- **EndpointReconciler**: Discovers devices on the out-of-band network by processing `Endpoint` resources. It uses a **MAC Prefix Database** to identify device types, vendors, protocols, and default credentials. When a BMC is detected, it creates corresponding `BMC` and `BMCSecret` resources. + +- **BMCReconciler**: Manages `BMC` resources by connecting to BMC devices using credentials from `BMCSecret`. It retrieves hardware information, updates the BMC status, and detects managed servers, creating `Server` resources for them. + +- **ServerReconciler**: Manages `Server` resources and their lifecycle states. During the **Discovery** phase, it interacts with BMCs and uses the **metalprobe** agent to collect in-band hardware information, updating the server's status. It handles power management, BIOS configurations, and transitions servers through various states (e.g., Initial, Discovery, Available, Reserved). + +- **ServerClaimReconciler**: Handles `ServerClaim` resources, allowing users to reserve servers. Upon creation of a `ServerClaim`, it allocates an available server, transitions it to the **Reserved** state, and creates a `ServerBootConfiguration`. When the claim is deleted, it releases the server, transitioning it to the **Cleanup** state for sanitization. + +- **Boot Operator (External Component)**: Monitors `ServerBootConfiguration` resources to prepare the boot environment (e.g., configuring DHCP, PXE servers). Once the boot environment is ready, it updates the `ServerBootConfiguration` status to **Ready**. + +## Workflow Summary + +1. **Discovery and Initialization**: + - The **EndpointReconciler** discovers devices on the out-of-band network, creating `Endpoint` resources. + - BMCs are identified using the MAC Prefix Database, leading to the creation of `BMC` and `BMCSecret` resources. + - The **BMCReconciler** connects to BMCs, gathers hardware details, and creates `Server` resources for each managed server. + +2. **Server Discovery Phase**: + - The **ServerReconciler** enters the **Discovery** phase, interacting with BMCs and booting servers using a predefined ignition. + - The **metalprobe** agent runs on the servers, collecting detailed hardware information (e.g., network interfaces, storage devices) and reporting back to update the `Server` status. + +3. **Server Availability**: + - Once discovery is complete, servers transition to the **Available** state, ready to be claimed. + +4. **Server Reservation and Boot Configuration**: + - Users create `ServerClaim` resources to reserve servers, specifying desired OS images and ignition configurations. + - The **ServerClaimReconciler** allocates servers, transitions them to the **Reserved** state, and creates `ServerBootConfiguration` resources. + +5. **Boot Environment Preparation**: + - External components (e.g., **boot-operator**) watch for `ServerBootConfiguration` resources and prepare the boot environment accordingly. + - Once the environment is ready, they update the `ServerBootConfiguration` status to **Ready**. + +6. **Server Power-On and Usage**: + - The **ServerReconciler** detects the ready status and powers on the server. + - The server boots using the specified image and ignition configuration. + +7. **Cleanup and Maintenance**: + - When a `ServerClaim` is deleted, the server transitions to the **Cleanup** state. + - The **ServerReconciler** performs sanitization tasks (e.g., wiping disks, resetting configurations) before returning the server to the **Available** state. + - Servers can enter the **Maintenance** state for updates or repairs. + +## Architectural Benefits + +- **Modularity**: Separation of concerns allows for flexible integration with various boot mechanisms and provisioning tools (e.g., OpenStack Ironic, custom solutions). +- **Scalability**: Automates the management of large numbers of servers through Kubernetes CRDs and controllers. +- **Extensibility**: Supports customization through additional CRDs and operators, enabling adaptation to specific infrastructure needs. +- **Security**: Manages sensitive information like BMC credentials using Kubernetes Secrets and enforces access control via RBAC policies. +- **Automation**: Streamlines hardware provisioning, configuration, and lifecycle management, reducing manual intervention and potential errors. diff --git a/docs/concepts/bmcs.md b/docs/concepts/bmcs.md new file mode 100644 index 0000000..5796cd4 --- /dev/null +++ b/docs/concepts/bmcs.md @@ -0,0 +1,74 @@ +# BMCs + +The BMC Custom Resource Definition (CRD) represents a Baseboard Management Controller. +It is designed to manage and monitor the state of BMC devices and the systems (servers) they control. The primary +purpose of the BMC resource is to reconcile the BMC state and detect all systems it manages by creating the +corresponding [`Server`](servers.md) resources. + +## Example BMC Resource + +Using `endpointRef`: + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: BMC +metadata: + name: my-bmc +spec: + endpointRef: + name: my-bmc-endpoint + bmcSecretRef: + name: my-bmc-secret + protocol: + name: Redfish + port: 8000 + consoleProtocol: + name: SSH + port: 22 +``` + +Using inline `endpoint`: + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: BMC +metadata: + name: my-bmc-inline +spec: + endpoint: + macAddress: "00:1A:2B:3C:4D:5E" + ip: "192.168.100.10" + bmcSecretRef: + name: my-bmc-secret + protocol: + name: Redfish + port: 8000 + consoleProtocol: + name: SSH + port: 22 +``` + +## Usage + +The BMC CRD is essential for managing and monitoring BMC devices. It is used to: + +- **Reconcile BMC State**: Continuously monitor the BMC's status and update its state. +- **Detect Managed Systems**: Identify all systems (servers) managed by the BMC and create corresponding [`Server`](servers.md) resources. +- **Automate Hardware Management**: Enable automated power control, firmware updates, and health monitoring of physical servers through the BMC. + +## Reconciliation Process + +The `BMCReconciler` is a controller that processes BMC resources to: + +1. **Access BMC Device**: Uses the `endpointRef` or `endpoint`, along with `bmcSecretRef`, to establish a connection +with the BMC using the specified `protocol`. + +2. **Retrieve BMC Information**: Gathers details such as manufacturer, model, serial number, firmware version, and +power state. + +3. **Update BMCStatus**: Populates the `status` field of the BMC resource with the retrieved information. + +4. **Detect Managed Systems**: Identifies all systems (servers) that the BMC manages. + +5. **Create Server Resources**: For each detected system, the `BMCReconciler` creates a corresponding [`Server`](servers.md) +resource to represent the physical server. diff --git a/docs/concepts/bmcsecrets.md b/docs/concepts/bmcsecrets.md new file mode 100644 index 0000000..cdf09ba --- /dev/null +++ b/docs/concepts/bmcsecrets.md @@ -0,0 +1,41 @@ +# BMCSecrets + +The `BMCSecret` Custom Resource Definition (CRD) is a Kubernetes resource used to store sensitive credentials required +to access a Baseboard Management Controller (BMC). This resource holds the `username` and `password` needed for +authentication with the BMC devices. The `BMCSecret` is utilized by the `BMCReconciler` to construct clients that +interact with BMCs. + +## Example BMCSecret Resource + +An example of how to define an `BMCSecret` resource: + +```yaml +apiVersion: v1alpha1 +kind: BMCSecret +metadata: + name: my-bmc-secret +stringData: + username: admin + password: supersecretpassword +type: Opaque +``` + +## Usage + +The `BMCSecret` resource is essential for securely managing credentials required to access BMC devices. It is used by +the `BMCReconciler` to: + +- **Construct BMC Clients**: Utilize the credentials to authenticate and establish connections with BMC devices. +- **Automate Hardware Management**: Enable automated operations such as power control, firmware updates, and +health monitoring by authenticating with the BMC. + +## Credential Sources + +- **Endpoint-Based Discovery**: When BMCs are discovered through an [`Endpoint`](endpoints.md) resource and a MAC Prefix Database, +the credentials (`username` and `password`) are derived automatically based on the MAC address prefixes. +- **Manual Configuration**: Users can manually create BMCSecret resources with the required credentials to interact with specific BMCs. + +## Reconciliation Process + +The `BMCReconciler` uses the `bmcSecretRef` field in the BMC resource's specification to reference the corresponding +`BMCSecret`. It retrieves the credentials from the BMCSecret to authenticate with the BMC device. diff --git a/docs/concepts/endpoints.md b/docs/concepts/endpoints.md new file mode 100644 index 0000000..7a63486 --- /dev/null +++ b/docs/concepts/endpoints.md @@ -0,0 +1,91 @@ +# Endpoints + +The Endpoint Custom Resource Definition (CRD) is a Kubernetes resource used to represent and identify devices or +entities within an out-of-band (OOB) network. It serves as a means to catalog and manage devices such as Baseboard +Management Controllers (BMCs) by capturing their unique identifiers, specifically the MAC address and IP address. +The `EndpointReconciler` leverages this information to determine the nature of the device, its vendor, and any initial +credentials required for further interactions. + +## Example Endpoint Resource + +An example of how to define an Endpoint resource: + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: Endpoint +metadata: + name: device-12345 +spec: + macAddress: "00:1A:2B:3C:4D:5E" + ip: "192.168.100.10" +``` + +## MAC Prefix Database and EndpointReconciler Configuration + +The `EndpointReconciler` can be configured with a MAC Prefix Database to determine the characteristics of devices based +on their MAC addresses. This database maps MAC address prefixes to device information such as the manufacturer, +protocol, port, type, default credentials, and console settings. + +### Configuration + +The MAC Prefix Database is typically configured using a YAML file, which is passed to the `metal-operator` using the +`--mac-prefixes-file` flag. This file contains mappings of MAC address prefixes to device specifications. + +Example YAML Configuration: + +```yaml +macPrefixes: + - macPrefix: "23" + manufacturer: "Foo" + protocol: "Redfish" + port: 8000 + type: "bmc" + defaultCredentials: + - username: "foo" + password: "bar" + console: + type: "ssh" + port: 22 +``` + +**Key Fields**: + +- macPrefix (`string`): The prefix of the MAC address used to identify the device manufacturer or type. +- manufacturer (`string`): The name of the device manufacturer. +- protocol (`string`): The communication protocol used (e.g., `Redfish`). +- port (`int`): The network port used for communication. +- type (`string`): The type of device (e.g., `bmc`). +- defaultCredentials (`list`): A list of default credentials for accessing the device. + - username (`string`): The default username. + - password (`string`): The default password. +- console (`dict`): Console access configuration. + - type (string): The console protocol (e.g., ssh). + - port (int): The port used for console access. + +### Using `--mac-prefixes-file` Flag + +The `metal-operator` accepts the `--mac-prefixes-file` flag to specify the path to the MAC Prefix Database YAML file: + +```shell +metal-operator --mac-prefixes-file /path/to/mac_prefixes.yaml +``` + +## Reconciliation Process + +1. **MAC Address Matching**: When the `EndpointReconciler` processes an `Endpoint` resource, it extracts the +`macAddress` from the `spec`. + +2. **Prefix Lookup**: It compares the MAC address prefix against the entries in the MAC Prefix Database. + +3. **Device Identification**: If a matching prefix is found, the device is identified with the associated manufacturer, +type, and protocol. + +4. **Credential Assignment**: The default credentials specified in the database are used for initial authentication with +the device. + +5. **BMC and BMCSecret Creation**: When the `EndpointReconciler` detects that the device is a Baseboard Management +Controller (`type: "bmc"`), it automatically creates a [`BMC`](bmcs.md) and a [`BMCSecret`](bmcsecrets.md) +object using the data from the MAC Prefix Database. These objects are used to manage and authenticate with the BMC device. + +6. **Configuration Application**: Additional settings such as console access and communication ports are applied based +on the database entries. diff --git a/docs/concepts/serverbootconfigurations.md b/docs/concepts/serverbootconfigurations.md new file mode 100644 index 0000000..8ab071c --- /dev/null +++ b/docs/concepts/serverbootconfigurations.md @@ -0,0 +1,45 @@ +# ServerBootConfigurations + +The `ServerBootConfiguration` Custom Resource Definition (CRD) is a Kubernetes resource used to signal the need to +initiate a boot process for a bare metal server. It serves as an indicator for external components responsible for +configuring network boot environments, such as PXE or HTTPBoot servers. The `ServerBootConfiguration` resource allows +the `metal-operator` to delegate the boot preparation process to third-party operators like the +[`boot-operator`](https://github.com/ironcore-dev/boot-operator) or tools like OpenStack Ironic. + +## Example ServerBootConfiguration Resource + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: ServerBootConfiguration +metadata: + name: my-server-boot-config + namespace: defauilt +spec: + serverRef: + name: my-server + image: my-osimage:latest + ignitionSecretRef: + name: my-ignition-secret +``` + +## Integration with Third-Party Components + +The actual preparation of the boot environment is performed by external components, which may include: +- boot-operator: A custom operator that handles boot environment preparation as part of the IronCore project. +- OpenStack Ironic: A service for managing and provisioning bare metal servers. + +These components watch for `ServerBootConfiguration` resources and perform the necessary actions to set up the boot +environment according to the specifications provided. + +## Why externalizing the boot preparation to a Third-Party? + +**Separation of Concerns**: By abstracting the boot preparation into a separate resource, the `metal-operator` +remains agnostic to the specifics of the boot process, allowing for flexibility in different deployment scenarios. + +**Custom Implementations**: Users can implement their own components to handle the `ServerBootConfiguration`, enabling +integration with various provisioning systems or custom workflows. + +## Reconciliation Process + +The `ServerReconciler` checks the `ServerBootConfiguration` status before powering on the server. Servers are not +powered on until the boot environment is confirmed to be `ready`. diff --git a/docs/concepts/serverclaims.md b/docs/concepts/serverclaims.md new file mode 100644 index 0000000..628d2a3 --- /dev/null +++ b/docs/concepts/serverclaims.md @@ -0,0 +1,59 @@ +# ServerClaims + +The `ServerClaim` Custom Resource Definition (CRD) is a Kubernetes resource used to claim ownership of a bare metal +[`Server`](servers.md) resource that is in the `Available` state. It allows users to specify the desired +operating system image and ignition configuration for booting the server. The `ServerClaimReconciler` handles the +allocation of servers to claims and manages the lifecycle of the claim and the server. + +## Example ServerClaim Resource + +Claiming a Specific Server with Ignition Configuration: + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: ServerClaim +metadata: + name: my-server-claim + namespace: default +spec: + power: "On" + serverRef: + name: "my-server" + image: "my-osimage:latest" + ignitionSecretRef: + name: "my-ignition-secret" +``` + +Claiming a Server Using a Selector: + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: ServerClaim +metadata: + name: selector-server-claim + namespace: default +spec: + power: "On" + serverSelector: + matchLabels: + hardwareType: gpu-node + location: datacenter-1 + image: my-osimage:latest + ignitionSecretRef: + name: my-ignition-secret +``` + +## Reconciliation Process + +- [`ServerBootConfiguration`](serverbootconfigurations.md): + - The `ServerClaimReconciler` creates a [`ServerBootConfiguration`](serverbootconfigurations.md) resource under the hood. + - This resource specifies how the server should be booted, including the image and ignition configuration. + +- **State Transitions**: + - Available → Reserved: When a server is successfully claimed. + - Reserved → Cleanup: When the `ServerClaim` is deleted. + - Cleanup → Available: After cleanup tasks are completed. + +- **Cleanup Process**: + - Ensures that servers are sanitized before being made available again. + - Tasks may include wiping disks, resetting BIOS settings, and clearing configurations. diff --git a/docs/concepts/servers.md b/docs/concepts/servers.md new file mode 100644 index 0000000..9b73f44 --- /dev/null +++ b/docs/concepts/servers.md @@ -0,0 +1,137 @@ +# Servers + +The `Server` Custom Resource Definition (CRD) represents a bare metal server. It manages the state and lifecycle of +physical servers, enabling automated hardware management tasks such as power control, BIOS configuration, and +firmware updates. Interaction with a `Server` resource is facilitated through its associated Baseboard Management +Controller (BMC), either by referencing a [`BMC`](bmcs.md) resource or by providing direct BMC configuration. + +## Example Server Resource + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: Server +metadata: + name: my-server +spec: + uuid: "123e4567-e89b-12d3-a456-426614174000" + power: "Off" + bmcRef: + name: my-bmc + bootOrder: + - name: PXE + priority: 1 + device: Network + BIOS: + - version: "1.0.3" + settings: + BootMode: UEFI + Virtualization: Enabled +``` + +## Usage + +The `Server` CRD is central to managing bare metal servers. It allows for: + +- **Power Management**: Powering servers on and off. +- **BIOS Configuration**: Changing BIOS settings and performing BIOS updates. +- **Lifecycle Management**: Handling the server's lifecycle through various states. +- **Hardware Discovery**: Gathering hardware information via BMC and in-band agents. + +## Lifecycle and States + +A server undergoes the following phases: + +1. **Initial**: The server object is created; hardware details are not yet known. + +2. **Discovery**: + - The `ServerReconciler` interacts with the BMC to retrieve hardware details. + - An initial boot is performed using a predefined ignition configuration. + - An agent called [`metalprobe`](https://github.com/ironcore-dev/metal-operator/tree/main/cmd/metalprobe) runs on the server to collect additional data (e.g., network interfaces, disks). + - The collected data is reported back to the `metal-operator` and added to the `ServerStatus`.` + +3. **Available**: The server has completed discovery and is ready for use. + +4. **Reserved**: + - A [`ServerClaim`](serverclaims.md) resource is created to claim the server. + - The server transitions to the `Reserved` state. + - The server is allocated for a specific use or user. + +5. **Cleanup**: + - When the [`ServerClaim`](serverclaims.md) is removed, the server enters the Cleanup state. + - Sanitization processes are performed (e.g., wiping disks, resetting BIOS settings). + +6. **Maintenance**: + - Servers in the `Available` state can transition to `Maintenance`. + - Maintenance tasks such as BIOS updates or hardware repairs are performed. + +7. **Error**: + - The server has encountered an error. + - Requires intervention to resolve issues before it can return to `Available`. + +The state diagram below represents the various server states and their transitions: + +```mermaid +stateDiagram-v2 + [*] --> Initial + Initial --> Discovery : Server object created + Discovery --> Available : Discovery complete + Available --> Reserved : ServerClaim created + Reserved --> Cleanup : ServerClaim removed + Cleanup --> Available : Cleanup complete + Available --> Maintenance : Maintenance initiated + Maintenance --> Available : Maintenance complete + Available --> Error : Error detected + Reserved --> Error : Error detected + Discovery --> Error : Error detected + Cleanup --> Error : Error detected + Maintenance --> Error : Error detected + Error --> Maintenance : Enter maintenance to fix error + Error --> Available : Error resolved +``` + +## Interaction with BMC + +Interaction with a server is done through its BMC: + +Via Reference: Reference a [`BMC`](bmcs.md) resource using `bmcRef`. + +```yaml +apiVersion: metal.ironcore.dev/v1alpha1 +kind: Server +metadata: + name: server-with-bmc-ref +spec: + uuid: "123e4567-e89b-12d3-a456-426614174000" + power: "On" + bmcRef: + name: my-bmc + bootOrder: + - name: PXE + priority: 1 + device: Network + BIOS: + - version: "1.0.3" + settings: + BootMode: UEFI + HyperThreading: Enabled +``` + +Inline Configuration: Use the `bmc` field to provide direct BMC access details. + +```yaml +apiVersion: v1alpha1 +kind: BMC +metadata: + name: my-bmc +spec: + endpointRef: + name: my-bmc-endpoint + bmcSecretRef: + name: my-bmc-secret + protocol: + name: Redfish + port: 8000 + consoleProtocol: + name: SSH + port: 22 +``` diff --git a/docs/development/dev_docs.md b/docs/development/dev_docs.md new file mode 100644 index 0000000..efe347e --- /dev/null +++ b/docs/development/dev_docs.md @@ -0,0 +1,15 @@ +# metal-operator documentation + +## Local dev setup + +You can run the documentation via: + +```shell +make startdocs +``` + +You can remove the `mkdocs` container image by running: + +```shell +make cleandocs +``` diff --git a/docs/dev_setup.md b/docs/development/dev_setup.md similarity index 100% rename from docs/dev_setup.md rename to docs/development/dev_setup.md diff --git a/hack/api-reference/config.json b/hack/api-reference/config.json index 65d4cf1..80e79d8 100644 --- a/hack/api-reference/config.json +++ b/hack/api-reference/config.json @@ -21,7 +21,7 @@ }, { "typeMatchPrefix": "^k8s\\.io/(api|apimachinery/pkg/apis)/", - "docsURLTemplate": "https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#{{lower .TypeIdentifier}}-{{arrIndex .PackageSegments -1}}-{{arrIndex .PackageSegments -2}}" + "docsURLTemplate": "https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.31/#{{lower .TypeIdentifier}}-{{arrIndex .PackageSegments -1}}-{{arrIndex .PackageSegments -2}}" } ], "typeDisplayNamePrefixOverrides": { diff --git a/mkdocs.yml b/mkdocs.yml index 8a31da7..e2e819a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -48,13 +48,17 @@ markdown_extensions: nav: - Home: README.md -- Architecture: - - Overview: README.md -- Usage: - - Discover Servers: README.md - - Provision Servers: README.md -- Developer Guide: - - Local Setup: dev_setup.md +- Architecture: architecture.md +- Concepts: + - Endpoints: concepts/endpoints.md + - BMCs: concepts/bmcs.md + - BMCSecrets: concepts/bmcsecrets.md + - Servers: concepts/servers.md + - ServerBootConfigurations: concepts/serverbootconfigurations.md + - ServerClaims: concepts/serverclaims.md +- Development Guide: + - Local Setup: development/dev_setup.md + - Documentation: development/dev_docs.md - API Reference: api-reference/api.md extra: From eae002290a1e5ce36224389671c4decfed1caff6 Mon Sep 17 00:00:00 2001 From: Andreas Fritzler Date: Tue, 29 Oct 2024 17:08:09 +0100 Subject: [PATCH 3/4] Fix reuse compliance check --- .reuse/dep5 | 1 + 1 file changed, 1 insertion(+) diff --git a/.reuse/dep5 b/.reuse/dep5 index a08812f..e085d4a 100644 --- a/.reuse/dep5 +++ b/.reuse/dep5 @@ -17,6 +17,7 @@ Files: PROJECT config/* go.mod + mkdocs.yml go.sum hack/* Copyright: 2024 SAP SE or an SAP affiliate company and IronCore contributors From f12936719e25b14f169e755e87464e5f5ead4de8 Mon Sep 17 00:00:00 2001 From: Andreas Fritzler Date: Tue, 29 Oct 2024 23:26:58 +0100 Subject: [PATCH 4/4] Update API reference docs --- docs/api-reference/api.md | 60 +++++++++---------- internal/controller/server_controller_test.go | 2 +- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/docs/api-reference/api.md b/docs/api-reference/api.md index 818befe..490018c 100644 --- a/docs/api-reference/api.md +++ b/docs/api-reference/api.md @@ -67,7 +67,7 @@ map[string]string metadata
- + Kubernetes meta/v1.ObjectMeta @@ -94,7 +94,7 @@ BMCSpec endpointRef
- + Kubernetes core/v1.LocalObjectReference @@ -108,7 +108,7 @@ This reference is typically used to locate the BMC endpoint within the cluster.< bmcSecretRef
- + Kubernetes core/v1.LocalObjectReference @@ -208,7 +208,7 @@ string bmcSecretRef
- + Kubernetes core/v1.LocalObjectReference @@ -272,7 +272,7 @@ temporary state can be very short.

metadata
- + Kubernetes meta/v1.ObjectMeta @@ -334,7 +334,7 @@ The stringData field is never output when reading from the API.

type
- + Kubernetes core/v1.SecretType @@ -367,7 +367,7 @@ More info: + Kubernetes core/v1.LocalObjectReference @@ -381,7 +381,7 @@ This reference is typically used to locate the BMC endpoint within the cluster.< bmcSecretRef
- + Kubernetes core/v1.LocalObjectReference @@ -572,7 +572,7 @@ BMCPowerState conditions
- + []Kubernetes meta/v1.Condition @@ -722,7 +722,7 @@ This port is used by the specified console protocol to establish connections.

metadata
- + Kubernetes meta/v1.ObjectMeta @@ -1110,7 +1110,7 @@ This port is used by the specified protocol to establish connections.

metadata
- + Kubernetes meta/v1.ObjectMeta @@ -1174,7 +1174,7 @@ IndicatorLED serverClaimRef
- + Kubernetes core/v1.ObjectReference @@ -1188,7 +1188,7 @@ This field is optional and can be omitted if no claim is associated with this se bmcRef
- + Kubernetes core/v1.LocalObjectReference @@ -1216,7 +1216,7 @@ This field is optional and can be omitted if no BMC access is specified.

bootConfigurationRef
- + Kubernetes core/v1.ObjectReference @@ -1287,7 +1287,7 @@ ServerStatus metadata
- + Kubernetes meta/v1.ObjectMeta @@ -1314,7 +1314,7 @@ ServerBootConfigurationSpec serverRef
- + Kubernetes core/v1.LocalObjectReference @@ -1339,7 +1339,7 @@ This field is optional and can be omitted if not specified.

ignitionSecretRef
- + Kubernetes core/v1.LocalObjectReference @@ -1386,7 +1386,7 @@ ServerBootConfigurationStatus serverRef
- + Kubernetes core/v1.LocalObjectReference @@ -1411,7 +1411,7 @@ This field is optional and can be omitted if not specified.

ignitionSecretRef
- + Kubernetes core/v1.LocalObjectReference @@ -1497,7 +1497,7 @@ ServerBootConfigurationState metadata
- + Kubernetes meta/v1.ObjectMeta @@ -1537,7 +1537,7 @@ Power serverRef
- + Kubernetes core/v1.LocalObjectReference @@ -1551,7 +1551,7 @@ This field is optional and can be omitted if the server is to be selected using serverSelector
- + Kubernetes meta/v1.LabelSelector @@ -1565,7 +1565,7 @@ This field is optional and can be omitted if a specific server is referenced usi ignitionSecretRef
- + Kubernetes core/v1.LocalObjectReference @@ -1636,7 +1636,7 @@ Power serverRef
- + Kubernetes core/v1.LocalObjectReference @@ -1650,7 +1650,7 @@ This field is optional and can be omitted if the server is to be selected using serverSelector
- + Kubernetes meta/v1.LabelSelector @@ -1664,7 +1664,7 @@ This field is optional and can be omitted if a specific server is referenced usi ignitionSecretRef
- + Kubernetes core/v1.LocalObjectReference @@ -1810,7 +1810,7 @@ IndicatorLED serverClaimRef
- + Kubernetes core/v1.ObjectReference @@ -1824,7 +1824,7 @@ This field is optional and can be omitted if no claim is associated with this se bmcRef
- + Kubernetes core/v1.LocalObjectReference @@ -1852,7 +1852,7 @@ This field is optional and can be omitted if no BMC access is specified.

bootConfigurationRef
- + Kubernetes core/v1.ObjectReference @@ -2064,7 +2064,7 @@ BIOSSettings conditions
- + []Kubernetes meta/v1.Condition diff --git a/internal/controller/server_controller_test.go b/internal/controller/server_controller_test.go index 522d6c2..e6748ec 100644 --- a/internal/controller/server_controller_test.go +++ b/internal/controller/server_controller_test.go @@ -273,7 +273,7 @@ var _ = Describe("Server Controller", func() { )) By("Ensuring that the server is set back to initial due to the discovery check timing out") - Eventually(Object(server), "500ms").Should(SatisfyAll( + Eventually(Object(server), "1000ms").Should(SatisfyAll( HaveField("Status.State", metalv1alpha1.ServerStateInitial), ))