From 0aa431c7e1aba4631b67fed58bee271855aa1d4b Mon Sep 17 00:00:00 2001 From: parth-gr Date: Thu, 7 Mar 2024 18:03:03 +0530 Subject: [PATCH] external: add ci and design documet Signed-off-by: parth-gr --- .github/workflows/canary-integration-test.yml | 35 +++++- Documentation/CRDs/Cluster/.pages | 2 +- .../CRDs/Cluster/ceph-cluster-crd.md | 4 +- .../CRDs/Cluster/external-cluster/.pages | 3 + .../external-cluster.md | 18 ++- .../topology-for-external-mode.md | 118 ++++++++++++++++++ Documentation/Getting-Started/glossary.md | 2 +- ROADMAP.md | 1 + .../create-external-cluster-resources.py | 24 ++-- 9 files changed, 179 insertions(+), 28 deletions(-) create mode 100644 Documentation/CRDs/Cluster/external-cluster/.pages rename Documentation/CRDs/Cluster/{ => external-cluster}/external-cluster.md (93%) create mode 100644 Documentation/CRDs/Cluster/external-cluster/topology-for-external-mode.md diff --git a/.github/workflows/canary-integration-test.yml b/.github/workflows/canary-integration-test.yml index 54e6aeef3d44..6874c444079d 100644 --- a/.github/workflows/canary-integration-test.yml +++ b/.github/workflows/canary-integration-test.yml @@ -4,7 +4,7 @@ on: workflow_call: inputs: ceph_images: - description: 'JSON list of Ceph images for creating Ceph cluster' + description: "JSON list of Ceph images for creating Ceph cluster" default: '["quay.io/ceph/ceph:v18"]' type: string @@ -229,6 +229,39 @@ jobs: echo "script failed because wrong realm was passed" fi + - name: test topology flags + run: | + toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') + # create 3 replica-1 pools + sed -i 's/replicapool/replica1a/' deploy/examples/pool-test.yaml + kubectl create -f deploy/examples/pool-test.yaml + sed -i 's/replica1a/replica1b/' deploy/examples/pool-test.yaml + kubectl create -f deploy/examples/pool-test.yaml + sed -i 's/replica1b/replica1c/' deploy/examples/pool-test.yaml + kubectl create -f deploy/examples/pool-test.yaml + # bring back the original file + sed -i 's/replica1c/replicapool/' deploy/examples/pool-test.yaml + + # check and wait for the pools to get ready + kubectl wait --for='jsonpath={.status.phase}=Ready' Cephblockpool/replica1a -nrook-ceph + kubectl wait --for='jsonpath={.status.phase}=Ready' Cephblockpool/replica1b -nrook-ceph + kubectl wait --for='jsonpath={.status.phase}=Ready' Cephblockpool/replica1c -nrook-ceph + + # pass correct flags + kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --topology-pools replica1a,replica1b,replica1c --topology-failure-domain-label hostname --topology-failure-domain-values minikube,minikube-m02,minikube-m03 + # pass the pool which is not exists + if output=$(kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --topology-pools ab,cd,ef --topology-failure-domain-label hostname --topology-failure-domain-values minikube,minikube-m02,minikube-m03); then + echo "script run completed with stderr error after passing the wrong pools: $output" + else + echo "script failed because wrong pools doesn't exist" + fi + # dont pass all topology flags + if output=$(kubectl -n rook-ceph exec $toolbox -- python3 /etc/ceph/create-external-cluster-resources.py --rbd-data-pool-name replicapool --topology-pools replica1a,replica1b,replica1c --topology-failure-domain-values minikube,minikube-m02,minikube-m03); then + echo "script run completed with stderr error after passing the wrong flags: $output" + else + echo "script failed because topology-failure-domain-label is missing" + fi + - name: test enable v2 mon port run: | toolbox=$(kubectl get pod -l app=rook-ceph-tools -n rook-ceph -o jsonpath='{.items[*].metadata.name}') diff --git a/Documentation/CRDs/Cluster/.pages b/Documentation/CRDs/Cluster/.pages index 001ac3924b7b..524ac4b14ea3 100644 --- a/Documentation/CRDs/Cluster/.pages +++ b/Documentation/CRDs/Cluster/.pages @@ -4,5 +4,5 @@ nav: - host-cluster.md - pvc-cluster.md - stretch-cluster.md - - external-cluster.md + - external-cluster - ... diff --git a/Documentation/CRDs/Cluster/ceph-cluster-crd.md b/Documentation/CRDs/Cluster/ceph-cluster-crd.md index c6efd41fb3e1..15c8a72d7d49 100755 --- a/Documentation/CRDs/Cluster/ceph-cluster-crd.md +++ b/Documentation/CRDs/Cluster/ceph-cluster-crd.md @@ -8,7 +8,7 @@ There are primarily four different modes in which to create your cluster. 1. [Host Storage Cluster](host-cluster.md): Consume storage from host paths and raw devices 2. [PVC Storage Cluster](pvc-cluster.md): Dynamically provision storage underneath Rook by specifying the storage class Rook should use to consume storage (via PVCs) 3. [Stretched Storage Cluster](stretch-cluster.md): Distribute Ceph mons across three zones, while storage (OSDs) is only configured in two zones -4. [External Ceph Cluster](external-cluster.md): Connect your K8s applications to an external Ceph cluster +4. [External Ceph Cluster](external-cluster/external-cluster.md): Connect your K8s applications to an external Ceph cluster See the separate topics for a description and examples of each of these scenarios. @@ -24,7 +24,7 @@ Settings can be specified at the global level to apply to the cluster as a whole ### Cluster Settings * `external`: - * `enable`: if `true`, the cluster will not be managed by Rook but via an external entity. This mode is intended to connect to an existing cluster. In this case, Rook will only consume the external cluster. However, Rook will be able to deploy various daemons in Kubernetes such as object gateways, mds and nfs if an image is provided and will refuse otherwise. If this setting is enabled **all** the other options will be ignored except `cephVersion.image` and `dataDirHostPath`. See [external cluster configuration](external-cluster.md). If `cephVersion.image` is left blank, Rook will refuse the creation of extra CRs like object, file and nfs. + * `enable`: if `true`, the cluster will not be managed by Rook but via an external entity. This mode is intended to connect to an existing cluster. In this case, Rook will only consume the external cluster. However, Rook will be able to deploy various daemons in Kubernetes such as object gateways, mds and nfs if an image is provided and will refuse otherwise. If this setting is enabled **all** the other options will be ignored except `cephVersion.image` and `dataDirHostPath`. See [external cluster configuration](external-cluster/external-cluster.md). If `cephVersion.image` is left blank, Rook will refuse the creation of extra CRs like object, file and nfs. * `cephVersion`: The version information for launching the ceph daemons. * `image`: The image used for running the ceph daemons. For example, `quay.io/ceph/ceph:v18.2.1`. For more details read the [container images section](#ceph-container-images). For the latest ceph images, see the [Ceph DockerHub](https://hub.docker.com/r/ceph/ceph/tags/). diff --git a/Documentation/CRDs/Cluster/external-cluster/.pages b/Documentation/CRDs/Cluster/external-cluster/.pages new file mode 100644 index 000000000000..5a3a6ca9e41c --- /dev/null +++ b/Documentation/CRDs/Cluster/external-cluster/.pages @@ -0,0 +1,3 @@ +nav: + - external-cluster.md + - topology-for-external-mode.md diff --git a/Documentation/CRDs/Cluster/external-cluster.md b/Documentation/CRDs/Cluster/external-cluster/external-cluster.md similarity index 93% rename from Documentation/CRDs/Cluster/external-cluster.md rename to Documentation/CRDs/Cluster/external-cluster/external-cluster.md index 417ff4400c31..bc0db2bb05e1 100644 --- a/Documentation/CRDs/Cluster/external-cluster.md +++ b/Documentation/CRDs/Cluster/external-cluster/external-cluster.md @@ -60,9 +60,9 @@ python3 create-external-cluster-resources.py --rbd-data-pool-name -- * `--upgrade`: (optional) Upgrades the cephCSIKeyrings(For example: client.csi-cephfs-provisioner) and client.healthchecker ceph users with new permissions needed for the new cluster version and older permission will still be applied. * `--restricted-auth-permission`: (optional) Restrict cephCSIKeyrings auth permissions to specific pools, and cluster. Mandatory flags that need to be set are `--rbd-data-pool-name`, and `--k8s-cluster-name`. `--cephfs-filesystem-name` flag can also be passed in case of CephFS user restriction, so it can restrict users to particular CephFS filesystem. * `--v2-port-enable`: (optional) Enables the v2 mon port (3300) for mons. -* `--topology-pools`: (optional) comma-separated list of topology-constrained rbd pools -* `--topology-failure-domain-label`: (optional) k8s cluster failure domain label (example: zone,rack,host,etc) for the topology-pools that are matching the ceph domain -* `--topology-failure-domain-values`: (optional) comma-separated list of the k8s cluster failure domain values corresponding to each of the pools in the topology-pools list +* `--topology-pools`: (optional) Comma-separated list of topology-constrained rbd pools +* `--topology-failure-domain-label`: (optional) K8s cluster failure domain label (example: zone, rack, or host) for the topology-pools that match the ceph domain +* `--topology-failure-domain-values`: (optional) Comma-separated list of the k8s cluster failure domain values corresponding to each of the pools in the `topology-pools` list ### Multi-tenancy @@ -90,13 +90,11 @@ python3 create-external-cluster-resources.py --rbd-data-pool-name -- ### Topology Based Provisioning Enable Topology Based Provisioning for RBD pools by passing `--topology-pools`, `--topology-failure-domain-label` and `--topology-failure-domain-values` flags. -A new storageclass will be created by the import script named `ceph-rbd-topology` with `volumeBindingMode: WaitForFirstConsumer` -and will configure topologyConstrainedPools according the input provided. -Later use the storageclass to create a volume in the pool matching the topology of the pod scheduling. +A new storageclass named `ceph-rbd-topology` will be created by the import script with `volumeBindingMode: WaitForFirstConsumer`. +The storageclass is used to create a volume in the pool matching the topology where a pod is scheduled. + +For more details, see the [Topology-Based Provisioning](topology-for-external-mode.md) -```console -python3 create-external-cluster-resources.py --rbd-data-pool-name pool_name --topology-pools p,q,r --topology-failure-domain-label labelName --topology-failure-domain-values x,y,z --format bash -``` ### Upgrade Example @@ -248,7 +246,7 @@ Consume the S3 Storage, in two different ways: ``` !!! hint - For more details see the [Object Store topic](../../Storage-Configuration/Object-Storage-RGW/object-storage.md#connect-to-an-external-object-store) + For more details see the [Object Store topic](../../../Storage-Configuration/Object-Storage-RGW/object-storage.md#connect-to-an-external-object-store) ### Connect to v2 mon port diff --git a/Documentation/CRDs/Cluster/external-cluster/topology-for-external-mode.md b/Documentation/CRDs/Cluster/external-cluster/topology-for-external-mode.md new file mode 100644 index 000000000000..67fda8817a1a --- /dev/null +++ b/Documentation/CRDs/Cluster/external-cluster/topology-for-external-mode.md @@ -0,0 +1,118 @@ +# Topology-Based Provisioning + +## Scenario +Applications like Kafka will have a deployment with multiple running instances. Each service instance will create a new claim and is expected to be located in a different zone. Since the application has its own redundant instances, there is no requirement for redundancy at the data layer. A storage class is created that will provision storage from replica 1 Ceph pools that are located in each of the separate zones. + +## Configuration Flags + +Add the required flags to the script: `create-external-cluster-resources.py`: + +- `--topology-pools`: (optional) Comma-separated list of topology-constrained rbd pools + +- `--topology-failure-domain-label`: (optional) K8s cluster failure domain label (example: zone, rack, or host) for the topology-pools that match the ceph domain + +- `--topology-failure-domain-values`: (optional) Comma-separated list of the k8s cluster failure domain values corresponding to each of the pools in the `topology-pools` list + +The import script will then create a new storage class named `ceph-rbd-topology`. + +## Example Configuration + +### Ceph cluster + +Determine the names of the zones (or other failure domains) in the Ceph CRUSH map where each of the pools will have corresponding CRUSH rules. + +Create a zone-specific CRUSH rule for each of the pools. For example, this is a CRUSH rule for `zone-a`: + +``` +$ ceph osd crush rule create-replicated + { + "rule_id": 5, + "rule_name": "rule_host-zone-a-hdd", + "type": 1, + "steps": [ + { + "op": "take", + "item": -10, + "item_name": "zone-a~hdd" + }, + { + "op": "choose_firstn", + "num": 0, + "type": "osd" + }, + { + "op": "emit" + } + ] +} +``` + +Create replica-1 pools based on each of the CRUSH rules from the previous step. Each pool must be created with a CRUSH rule to limit the pool to OSDs in a specific zone. + +!!! note + Disable the ceph warning for replica-1 pools: `ceph config set global mon_allow_pool_size_one true` + +Determine the zones in the K8s cluster that correspond to each of the pools in the Ceph pool. The K8s nodes require labels as defined with the [OSD Topology labels](../ceph-cluster-crd.md#osd-topology). Some environments already have nodes labeled in zones. Set the topology labels on the nodes if not already present. + +Set the flags of the external cluster configuration script based on the pools and failure domains. + +--topology-pools=pool-a,pool-b,pool-c + +--topology-failure-domain-label=zone + +--topology-failure-domain-values=zone-a,zone-b,zone-c + +Then run the python script to generate the settings which will be imported to the Rook cluster: +``` + python3 create-external-cluster-resources.py --rbd-data-pool-name replicapool --topology-pools pool-a,pool-b,pool-c --topology-failure-domain-label zone --topology-failure-domain-values zone-a,zone-b,zone-c +``` + +Output: +``` +export ROOK_EXTERNAL_FSID=8f01d842-d4b2-11ee-b43c-0050568fb522 +.... +.... +.... +export TOPOLOGY_POOLS=pool-a,pool-b,pool-c +export TOPOLOGY_FAILURE_DOMAIN_LABEL=zone +export TOPOLOGY_FAILURE_DOMAIN_VALUES=zone-a,zone-b,zone-c +``` + +### Kubernetes Cluster + +Check the external cluster is created and connected as per the installation steps. +Review the new storage class: +``` +$ kubectl get sc ceph-rbd-topology -o yaml +allowVolumeExpansion: true +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + creationTimestamp: "2024-03-07T12:10:19Z" + name: ceph-rbd-topology + resourceVersion: "82502" + uid: 68448a14-3a78-42c5-ac29-261b6c3404af +parameters: + ... + ... + pool: replicapool + topologyConstrainedPools: | + [ + {"poolName":"pool-a", + "domainSegments":[ + {"domainLabel":"zone","value":"zone-a"}]}, + {"poolName":"pool-b", + "domainSegments":[ + {"domainLabel":"zone","value":"zone-b"}]}, + {"poolName":"pool-c", + "domainSegments":[ + {"domainLabel":"zone","value":"zone-c"}]}, + ] +provisioner: rook-ceph.rbd.csi.ceph.com +reclaimPolicy: Delete +volumeBindingMode: WaitForFirstConsumer +``` + +#### Create a Topology-Based PVC + +The topology-based storage class is ready to be consumed! Create a PVC from the `ceph-rbd-topology` storage class above, and watch the OSD usage to see how the data is spread only among the topology-based CRUSH buckets. diff --git a/Documentation/Getting-Started/glossary.md b/Documentation/Getting-Started/glossary.md index 5dfd032313d4..492491d3ced7 100644 --- a/Documentation/Getting-Started/glossary.md +++ b/Documentation/Getting-Started/glossary.md @@ -64,7 +64,7 @@ CephRBDMirror CRD is used by Rook to allow creation and updating rbd-mirror daem ### External Storage Cluster -An [external cluster](../CRDs/Cluster/external-cluster.md) is a Ceph configuration that is managed outside of the local K8s cluster. +An [external cluster](../CRDs/Cluster/external-cluster/external-cluster.md) is a Ceph configuration that is managed outside of the local K8s cluster. ### Host Storage Cluster diff --git a/ROADMAP.md b/ROADMAP.md index 377ddcf51a88..749bd0fa8867 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -24,6 +24,7 @@ The following high level features are targeted for Rook v1.14 (April 2024). For * Separate CSI image repository and tag for all images in the helm chart [#13585](https://github.com/rook/rook/issues/13585) * Ceph-CSI [v3.11](https://github.com/ceph/ceph-csi/issues?q=is%3Aopen+is%3Aissue+milestone%3Arelease-v3.11.0) * Add build support for Go 1.22 [#13738](https://github.com/rook/rook/pull/13738) +* Add topology based provisioning for external clusters [#13821](https://github.com/rook/rook/pull/13821) ## Kubectl Plugin diff --git a/deploy/examples/create-external-cluster-resources.py b/deploy/examples/create-external-cluster-resources.py index acd265dcd1f0..b4404f745370 100644 --- a/deploy/examples/create-external-cluster-resources.py +++ b/deploy/examples/create-external-cluster-resources.py @@ -484,13 +484,13 @@ def gen_arg_parser(cls, args_to_parse=None): "--topology-failure-domain-label", default="", required=False, - help="k8s cluster failure domain label (example: zone,rack,host,etc) for the topology-pools that are matching the ceph domain", + help="k8s cluster failure domain label (example: zone, rack, or host) for the topology-pools that match the ceph domain", ) output_group.add_argument( "--topology-failure-domain-values", default="", required=False, - help="comma-separated list of the k8s cluster failure domain values corresponding to each of the pools in the topology-pools list", + help="comma-separated list of the k8s cluster failure domain values corresponding to each of the pools in the `topology-pools` list", ) upgrade_group = argP.add_argument_group("upgrade") @@ -1518,7 +1518,7 @@ def validate_rgw_multisite(self, rgw_multisite_config_name, rgw_multisite_config return "-1" return "" - def convert_comma_seprated_to_array(self, value): + def convert_comma_separated_to_array(self, value): return value.split(",") def raise_exception_if_any_topology_flag_is_missing(self): @@ -1663,16 +1663,16 @@ def _gen_output_map(self): and self._arg_parser.topology_failure_domain_values != "" ): self.validate_topology_values( - self.convert_comma_seprated_to_array(self.out_map["TOPOLOGY_POOLS"]), - self.convert_comma_seprated_to_array( + self.convert_comma_separated_to_array(self.out_map["TOPOLOGY_POOLS"]), + self.convert_comma_separated_to_array( self.out_map["TOPOLOGY_FAILURE_DOMAIN_VALUES"] ), ) self.validate_topology_rbd_pools( - self.convert_comma_seprated_to_array(self.out_map["TOPOLOGY_POOLS"]) + self.convert_comma_separated_to_array(self.out_map["TOPOLOGY_POOLS"]) ) self.init_topology_rbd_pools( - self.convert_comma_seprated_to_array(self.out_map["TOPOLOGY_POOLS"]) + self.convert_comma_separated_to_array(self.out_map["TOPOLOGY_POOLS"]) ) else: self.raise_exception_if_any_topology_flag_is_missing() @@ -1928,12 +1928,10 @@ def gen_json_out(self): "topologyFailureDomainLabel": self.out_map[ "TOPOLOGY_FAILURE_DOMAIN_LABEL" ], - "topologyFailureDomainValues": self.convert_comma_seprated_to_array( - self.out_map["TOPOLOGY_FAILURE_DOMAIN_VALUES"] - ), - "topologyPools": self.convert_comma_seprated_to_array( - self.out_map["TOPOLOGY_POOLS"] - ), + "topologyFailureDomainValues": self.out_map[ + "TOPOLOGY_FAILURE_DOMAIN_VALUES" + ], + "topologyPools": self.out_map["TOPOLOGY_POOLS"], "pool": self.out_map["RBD_POOL_NAME"], "csi.storage.k8s.io/provisioner-secret-name": f"rook-{self.out_map['CSI_RBD_PROVISIONER_SECRET_NAME']}", "csi.storage.k8s.io/controller-expand-secret-name": f"rook-{self.out_map['CSI_RBD_PROVISIONER_SECRET_NAME']}",