diff --git a/k8s/docs/01-introduction/README.md b/k8s/docs/01-introduction/README.md index a8c00e06..58cf4956 100644 --- a/k8s/docs/01-introduction/README.md +++ b/k8s/docs/01-introduction/README.md @@ -6,7 +6,8 @@ Understanding the intricate synergy between TigerGraph, TigerGraph Operator, and | TigerGraph Operator version | TigerGraph version | Kubernetes version | |----------|----------|----------| -| 0.0.9 | TigerGraph >= 3.6.0 |1.23, 1.24, 1.25, 1.26, **1.27**| +| 0.1.0 | TigerGraph >= 3.6.0 |1.24, 1.25, 1.26, 1.27, **1.28**| +| 0.0.9 | TigerGraph >= 3.6.0 && TigerGraph <= 3.9.3|1.23, 1.24, 1.25, 1.26, 1.27| | 0.0.7 | TigerGraph >= 3.6.0 && TigerGraph <= 3.9.2|1.22, 1.23, 1.24, 1.25, 1.26| | 0.0.6 | TigerGraph >= 3.6.0 && TigerGraph <= 3.9.1|1.22, 1.23, 1.24, 1.25, 1.26| | 0.0.5 | TigerGraph >= 3.6.0 && TigerGraph <= 3.9.1|1.22, 1.23, 1.24, 1.25, 1.26| @@ -38,6 +39,11 @@ Once your deployment is complete, refer to the following documents for guidance - [Working with InitContainers, Sidecar Containers, and Custom Volumes](../03-deploy/use-custom-containers-by-kubectl-tg.md) - [Resizing Persistent Volumes for TigerGraph](../07-reference/expand-persistent-volume.md) - [Backing Up and Restoring TigerGraph Clusters](../04-manage/backup-and-restore/README.md) +- [Pause and Resume TigerGraph Clusters](../04-manage/pause-and-resume.md) +- [Customize TigerGraph Pods and Containers](../03-deploy/customize-tigergraph-pod.md) +- [Lifecycle of TigerGraph](../03-deploy/lifecycle-of-tigergraph.md) +- [Multiple persistent volumes mounting](../03-deploy/multiple-persistent-volumes-mounting.md) +- [Cluster status of TigerGraph on k8s](../07-reference/cluster-status-of-tigergraph.md) In case issues arise and your cluster requires diagnosis, you have two valuable resources: diff --git a/k8s/docs/02-get-started/get_started.md b/k8s/docs/02-get-started/get_started.md index c533cfd4..229d242c 100644 --- a/k8s/docs/02-get-started/get_started.md +++ b/k8s/docs/02-get-started/get_started.md @@ -11,14 +11,20 @@ - [Step 3: Deploy a TigerGraph Cluster](#step-3-deploy-a-tigergraph-cluster) - [Providing a Private SSH Key Pair for Enhanced Security](#providing-a-private-ssh-key-pair-for-enhanced-security) - [Specify the StorageClass Name](#specify-the-storageclass-name) + - [Specify the additional Storage for mounting multiple PVs(Optional)](#specify-the-additional-storage-for-mounting-multiple-pvsoptional) + - [Customize configurations for the TigerGraph system (Optional)](#customize-configurations-for-the-tigergraph-system-optional) - [Create a TigerGraph Cluster with Specific Options](#create-a-tigergraph-cluster-with-specific-options) - [Step 4: Connect to a TigerGraph Cluster](#step-4-connect-to-a-tigergraph-cluster) - [Connect to a TigerGraph Cluster Pod](#connect-to-a-tigergraph-cluster-pod) - - [Access TigerGraph Suite](#access-tigergraph-suite) - - [Access RESTPP API Service](#access-restpp-api-service) + - [Access TigerGraph Services](#access-tigergraph-services) + - [Verify the API service](#verify-the-api-service) + - [Verify the RESTPP API service](#verify-the-restpp-api-service) + - [Verify the Metrics API service](#verify-the-metrics-api-service) - [Step 5: Operate a TigerGraph Cluster](#step-5-operate-a-tigergraph-cluster) - [Update the Resources (CPU and Memory) of the TigerGraph Cluster](#update-the-resources-cpu-and-memory-of-the-tigergraph-cluster) + - [Update system configurations and license of the TigerGraph cluster](#update-system-configurations-and-license-of-the-tigergraph-cluster) - [Scale a TigerGraph Cluster](#scale-a-tigergraph-cluster) + - [Change the HA factor of the TigerGraph cluster](#change-the-ha-factor-of-the-tigergraph-cluster) - [Upgrade a TigerGraph Cluster](#upgrade-a-tigergraph-cluster) - [Step 6: Destroy the TigerGraph Cluster and the Kubernetes Operator](#step-6-destroy-the-tigergraph-cluster-and-the-kubernetes-operator) - [Destroy the TigerGraph Cluster](#destroy-the-tigergraph-cluster) @@ -268,15 +274,15 @@ Starting from Operator version 0.0.4, users are required to provide their privat Create a secret object based on the private SSH key file generated in Step 1. Ensure that the key name of the secret for the private SSH key is private-ssh-key, and the key name for the public SSH key is public-ssh-key. **Do not modify these key names**. - > [!IMPORTANT] - > The namespace of the Secret object must be the same as that of the TigerGraph cluster. +> [!IMPORTANT] +> The namespace of the Secret object must be the same as that of the TigerGraph cluster. ```bash kubectl create secret generic ${YOUR_SSH_KEY_SECRET_NAME} --from-file=private-ssh-key=$HOME/.ssh/tigergraph_rsa --from-file=public-ssh-key=$HOME/.ssh/tigergraph_rsa.pub --namespace ${YOUR_NAMESPACE} ``` - > [!IMPORTANT] - > For Operator versions 0.0.4 and above, when creating a cluster using the `kubectl tg create command`, you must set the `--private-key-secret` option to `${YOUR_SSH_KEY_SECRET_NAME}`. +> [!IMPORTANT] +> For Operator versions 0.0.4 and above, when creating a cluster using the `kubectl tg create command`, you must set the `--private-key-secret` option to `${YOUR_SSH_KEY_SECRET_NAME}`. These steps enhance the security of your cluster by utilizing your private SSH key pair. @@ -294,18 +300,63 @@ standard (default) rancher.io/local-path Delete WaitForFirstConsume Identify the StorageClass name, and when specifying the `--storage-class` option, use `standard` as its value. This ensures that the appropriate StorageClass is assigned during TigerGraph cluster creation, optimizing storage provisioning and management. +### Specify the additional Storage for mounting multiple PVs(Optional) + +You can specify multiple PVs for TigerGraph Pods by specifying the `--additional-storages` option. The value of this option is a YAML file configuration. For example: + +> [!NOTE] +> Other parameters required to create a cluster are omitted here. + +```bash +kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --additional-storages additional-storage-tg-logs.yaml +``` + +Example additional storage YAML file: + +```YAML +additionalStorages: + - name: tg-kafka + storageSize: 5Gi + - name: tg-log + storageSize: 5Gi + - name: tg-sidecar + storageClassName: efs-sc + storageSize: 5Gi + accessMode: ReadWriteMany + volumeMode: Filesystem + - name: tg-backup + storageSize: 5Gi + mountPath: /home/tigergraph/backup + accessMode: ReadWriteOnce + volumeMode: Filesystem +``` + +You can also specify the multiple PVs using CR configuration, For more information, see [Multiple persistent volumes mounting](../03-deploy/multiple-persistent-volumes-mounting.md) + +### Customize configurations for the TigerGraph system (Optional) + +You can customize the configurations for the TigerGraph system by specifying the `--tigergraph-config` option. The value of this option should be key-value pairs separated by commas. For example: + +```bash + --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" +``` + + The key-value pairs are the same as the configurations that can be set by `gadmin config set` command. For more information, see [Configuration Parameters](https://docs.tigergraph.com/tigergraph-server/current/reference/configuration-parameters). All configurations will be applied to the TigerGraph system when the cluster is initializing. + ### Create a TigerGraph Cluster with Specific Options You can obtain the TigerGraph Docker image versions from [tigergraph-k8s](https://hub.docker.com/r/tigergraph/tigergraph-k8s/tags) -Use the following command to create a new TigerGraph cluster with a free license: +You must also provide your license key with the `--license` command. Contact TigerGraph support for help finding your license key. -- Get and export the free license: +- Export license key as an environment variable ```bash - export LICENSE=$(curl -L "ftp://ftp.graphtiger.com/lic/license3.txt" -o "/tmp/license3.txt" 2>/dev/null && cat /tmp/license3.txt) + export LICENSE= ``` +- Create TigerGraph cluster with kubectl-tg plugin + ```bash kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --private-key-secret ${YOUR_SSH_KEY_SECRET_NAME} --size 3 --ha 2 --version 3.9.1 --license ${LICENSE} \ --storage-class standard --storage-size 10G --cpu 2000m --memory 6Gi --namespace ${YOUR_NAMESPACE} @@ -321,7 +372,7 @@ Use the following command to create a new TigerGraph cluster with a free license ## Step 4: Connect to a TigerGraph Cluster -This section explains how to log into a TigerGraph cluster pod and access the `RESTPP` and `GUI` services. +This section explains how to log into a TigerGraph cluster pod and access the `RESTPP`,`GUI`, and `Metrics` services. ### Connect to a TigerGraph Cluster Pod @@ -331,44 +382,38 @@ To log into a single container within the TigerGraph cluster and execute command kubectl tg connect --cluster-name ${YOUR_CLUSTER_NAME} --namespace ${YOUR_NAMESPACE} ``` -### Access TigerGraph Suite +### Access TigerGraph Services -- Query the external service address: +Query the external service address: ```bash - export GUI_SERVICE_ADDRESS=$(kubectl get svc/${YOUR_CLUSTER_NAME}-gui-external-service --namespace ${YOUR_NAMESPACE} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') - - echo $GUI_SERVICE_ADDRESS - 172.18.255.201 + export EXTERNAL_SERVICE_ADDRESS=$(kubectl get svc/${YOUR_CLUSTER_NAME}-nginx-external-service --namespace ${YOUR_NAMESPACE} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') ``` -- Verify the API service: +#### Verify the API service ```bash - curl http://${GUI_SERVICE_ADDRESS}:14240/api/ping + curl http://${EXTERNAL_SERVICE_ADDRESS}:14240/api/ping {"error":false,"message":"pong","results":null} ``` -To access the TigerGraph Suite, open it in your browser using the following URL: http://${GUI_SERVICE_ADDRESS}:14240, replacing `GUI_SERVICE_ADDRESS` with the actual service address. +To access the TigerGraph Suite, open it in your browser using the following URL: http://${EXTERNAL_SERVICE_ADDRESS}:14240, replacing `EXTERNAL_SERVICE_ADDRESS` with the actual service address. -### Access RESTPP API Service - -- Query the external service address: +#### Verify the RESTPP API service ```bash - export RESTPP_SERVICE_ADDRESS=$(kubectl get svc/${YOUR_CLUSTER_NAME}-rest-external-service --namespace ${YOUR_NAMESPACE} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') + curl http://${EXTERNAL_SERVICE_ADDRESS}:14240/restpp/echo - echo $RESTPP_SERVICE_ADDRESS - 172.18.255.200 + {"error":false, "message":"Hello GSQL"} ``` -- Verify the RESTPP API service: +#### Verify the Metrics API service ```bash - curl http://${RESTPP_SERVICE_ADDRESS}:9000/echo +curl http://${EXTERNAL_SERVICE_ADDRESS}/informant/metrics/get/network -d '{"LatestNum":"1"}' - {"error":false, "message":"Hello GSQL"} +{"NetworkMetrics":[{"EventMeta":{"Targets":[{"ServiceName":"IFM"}],"EventId":"1ebeaf2a380f4941b371efaaceb3467b","TimestampNS":"1703666521019463773","Source":{"ServiceName":"EXE","Partition":2}},"HostID":"m2","CollectTimeStamps":"1703666521008230613","Network":{"IP":"10.244.0.79","TCPConnectionNum":89,"IncomingBytesNum":"1654215","OutgoingBytesNum":"1466486"}},{"EventMeta":{"Targets":[{"ServiceName":"IFM"}],"EventId":"2c54ed5d6ba14e789db03fd9e023219c","TimestampNS":"1703666521020024563","Source":{"ServiceName":"EXE","Partition":3}},"HostID":"m3","CollectTimeStamps":"1703666521011409133","Network":{"IP":"10.244.0.78","TCPConnectionNum":90,"IncomingBytesNum":"1637413","OutgoingBytesNum":"1726712"}},{"EventMeta":{"Targets":[{"ServiceName":"IFM"}],"EventId":"c3478943ca134530bcd3aa439521c626","TimestampNS":"1703666521019483903","Source":{"ServiceName":"EXE","Partition":1}},"HostID":"m1","CollectTimeStamps":"1703666521009116924","Network":{"IP":"10.244.0.77","TCPConnectionNum":107,"IncomingBytesNum":"1298257","OutgoingBytesNum":"1197920"}}]} ``` ## Step 5: Operate a TigerGraph Cluster @@ -381,6 +426,26 @@ Use the following command to update the CPU and memory resources of the TigerGra kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --cpu 3 --memory 8Gi --cpu-limit 3--memory-limit 8Gi --namespace ${YOUR_NAMESPACE} ``` +### Update system configurations and license of the TigerGraph cluster + +Use the following command to update the system configurations of the TigerGraph cluster: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" --namespace ${YOUR_NAMESPACE} +``` + +Use the following command to update the license of the TigerGraph cluster: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --license ${LICENSE} --namespace ${YOUR_NAMESPACE} +``` + +If you want to update both the system configurations and license of the TigerGraph cluster, please provide these two options together in one command(**recommanded**) instead of two separate commands: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" --license ${LICENSE} --namespace ${YOUR_NAMESPACE} +``` + ### Scale a TigerGraph Cluster > [!WARNING] @@ -396,6 +461,14 @@ kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --size 4 --ha 2 --namespa The above command scales the cluster to a size of 4 with an HA factor of 2. +#### Change the HA factor of the TigerGraph cluster + +From Operator version 0.1.0, you can change the HA factor of the TigerGraph cluster without updating size by using the following command: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --ha ${NEW_HA} --namespace ${YOUR_NAMESPACE} +``` + ### Upgrade a TigerGraph Cluster > [!WARNING] diff --git a/k8s/docs/03-deploy/affinity-use-cases.md b/k8s/docs/03-deploy/affinity-use-cases.md index f74f0527..0bb788c4 100644 --- a/k8s/docs/03-deploy/affinity-use-cases.md +++ b/k8s/docs/03-deploy/affinity-use-cases.md @@ -1,36 +1,37 @@ +# NodeSelector, Affinity and Toleration Use Cases> -

NodeSelector, Affinity and Toleration Use Cases

- -- [Basic Knowledge](#basic-knowledge) - - [Which labels are TG using](#which-labels-are-tg-using) - - [TigerGraph Cluster Pods](#tigergraph-cluster-pods) +- [NodeSelector, Affinity and Toleration Use Cases\>](#nodeselector-affinity-and-toleration-use-cases) + - [Basic Knowledge](#basic-knowledge) + - [Which labels are TG using](#which-labels-are-tg-using) + - [TigerGraph Cluster Pods](#tigergraph-cluster-pods) - [TigerGraph Job Pods](#tigergraph-job-pods) - - [TigerGraph Backup/Restore Job Pods](#tigergraph-backuprestore-job-pods) -- [NodeSelector](#nodeselector) - - [Example: schedule pods to nodes with disktype=ssd](#example-schedule-pods-to-nodes-with-disktypessd) -- [Affinity](#affinity) - - [NodeAffinity](#nodeaffinity) - - [Preferred Node Affinity](#preferred-node-affinity) - - [Example: Difference between Preferred Affinity and Required Affinity](#example-difference-between-preferred-affinity-and-required-affinity) - - [Weighted Affinity and Logical Operators](#weighted-affinity-and-logical-operators) - - [Combining Rules with Logical Operators](#combining-rules-with-logical-operators) - - [Examples: Combining Multiple Rules with Different Weights](#examples-combining-multiple-rules-with-different-weights) - - [Inter-pod Affinity and Anti-Affinity](#inter-pod-affinity-and-anti-affinity) - - [Example: Avoiding Scheduling TigerGraph Pods on the Same VM Instance](#example-avoiding-scheduling-tigergraph-pods-on-the-same-vm-instance) - - [Scheduling Pods to Different Zones](#scheduling-pods-to-different-zones) -- [Toleration](#toleration) - - [Example: Implementing User Groups with Taints and Tolerations](#example-implementing-user-groups-with-taints-and-tolerations) -- [Notice](#notice) - -Basic Knowledge -=============== + - [TigerGraph Backup/Restore Job Pods](#tigergraph-backuprestore-job-pods) + - [NodeSelector](#nodeselector) + - [Example: schedule pods to nodes with disktype=ssd](#example-schedule-pods-to-nodes-with-disktypessd) + - [Affinity](#affinity) + - [NodeAffinity](#nodeaffinity) + - [Preferred Node Affinity](#preferred-node-affinity) + - [Example: Difference between Preferred Affinity and Required Affinity](#example-difference-between-preferred-affinity-and-required-affinity) + - [Weighted Affinity and Logical Operators](#weighted-affinity-and-logical-operators) + - [Combining Rules with Logical Operators](#combining-rules-with-logical-operators) + - [Examples: Combining Multiple Rules with Different Weights](#examples-combining-multiple-rules-with-different-weights) + - [Inter-pod Affinity and Anti-Affinity](#inter-pod-affinity-and-anti-affinity) + - [Example: Avoiding Scheduling TigerGraph Pods on the Same VM Instance](#example-avoiding-scheduling-tigergraph-pods-on-the-same-vm-instance) + - [Scheduling Pods to Different Zones](#scheduling-pods-to-different-zones) + - [Toleration](#toleration) + - [Example: Implementing User Groups with Taints and Tolerations](#example-implementing-user-groups-with-taints-and-tolerations) + - [Notice](#notice) + +## Basic Knowledge In a Kubernetes cluster, every node is equipped with labels that provide information about the node's attributes and capabilities. Some labels are automatically assigned by Kubernetes itself, while others can be added manually by administrators. These labels play a crucial role in workload distribution, resource allocation, and overall cluster management.(please refer to [Well-Known Labels, Annotations and Taints](https://kubernetes.io/docs/reference/labels-annotations-taints/) ). You also have the ability to manually assign labels to nodes in your Kubernetes cluster. To view all labels associated with nodes, you can use the following command: + ```bash kubectl get nodes --show-labels ``` + Here's an example of node labels in a Google Kubernetes Engine (GKE) cluster: ```bash @@ -44,15 +45,14 @@ To manually assign labels to nodes, you can use the kubectl label command. For e ```bash kubectl label nodes NODE_1 NODE_2 LABEL_KEY=LABEL_VALUE ``` -These labels can then be utilized in affinity rules and other scheduling configurations to ensure that pods are placed on the most suitable nodes based on your specific requirements. +These labels can then be utilized in affinity rules and other scheduling configurations to ensure that pods are placed on the most suitable nodes based on your specific requirements. -Which labels are TG using -------------------------- +### Which labels are TG using TigerGraph utilizes specific labels for different purposes in Kubernetes: -### TigerGraph Cluster Pods +#### TigerGraph Cluster Pods | Label | Usage | |----------------------------------------|---------------------------------------------------------------------| @@ -68,7 +68,7 @@ TigerGraph utilizes specific labels for different purposes in Kubernetes: | `tigergraph.com/cluster-name=CLUSTER_NAME` | Indicates which cluster the job is for. | | `tigergraph.com/cluster-job={CLUSTER_NAME}-{JOB_TYPE}-job` | Specifies the type of job and the cluster it's associated with (JOB_TYPE: init, upgrade, expand, shrink-pre, shrink-post). | -### TigerGraph Backup/Restore Job Pods +#### TigerGraph Backup/Restore Job Pods | Label | Usage | |--------------------------------------------------|------------------------------------------------------------------------------| @@ -77,16 +77,13 @@ TigerGraph utilizes specific labels for different purposes in Kubernetes: These labels help identify the purpose and affiliation of various pods within the Kubernetes environment, making it easier to manage and monitor different components of TigerGraph clusters, jobs, backups, and restores. -NodeSelector -============ +## NodeSelector NodeSelector in the TigerGraph Custom Resource (CR) allows you to control the scheduling of pods for the TigerGraph cluster. When you define a NodeSelector, the pods related to the TigerGraph cluster will only be scheduled on nodes that have specific labels matching the NodeSelector criteria. This feature ensures that the TigerGraph cluster pods are placed on nodes that meet your specified requirements.(to know more about NodeSelector: [Assign Pods to Nodes](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes/) ) It's important to note that NodeSelector only applies to pods directly associated with the TigerGraph cluster. Other pods running tasks such as init, upgrade, expand, or shrink jobs will not be influenced by the NodeSelector settings. - -Example: schedule pods to nodes with disktype=ssd -------------------------------------------------- +### Example: schedule pods to nodes with disktype=ssd In this example, we will demonstrate how to use the NodeSelector feature to schedule pods to nodes with a specific label, such as disktype=ssd. This example assumes you are using Google Kubernetes Engine (GKE). @@ -112,6 +109,7 @@ kubectl label nodes gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-2p9g \ gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-9t5m \ disktype=ssd ``` + Replace the node names with the actual names of the nodes you want to label as SSD. First, we try to create a TG cluster without any rules. Use following CR: @@ -120,34 +118,29 @@ First, we try to create a TG cluster without any rules. Use following CR: apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraph metadata: - name: test-cluster + name: test-cluster spec: - replicas: 3 - image: docker.io/tigergraph/tigergraph-k8s:3.9.2 - imagePullPolicy: IfNotPresent - privateKeyName: ssh-key-secret - listener: - type: LoadBalancer - resources: - requests: - cpu: 2 - memory: 8Gi - storage: - type: persistent-claim - volumeClaimTemplate: - storageClassName: standard - resources: - requests: - storage: 10G - initTGConfig: - ha: 1 - license: YOUR_LICENSE - version: 3.9.2 - hashBucketInBit: 5 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent + replicas: 3 + image: docker.io/tigergraph/tigergraph-k8s:3.9.2 + imagePullPolicy: IfNotPresent + privateKeyName: ssh-key-secret + listener: + type: LoadBalancer + resources: + requests: + cpu: 2 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + storageClassName: standard + resources: + requests: + storage: 10G + ha: 1 + license: YOUR_LICENSE ``` + Apply the configuration using `kubectl apply -f .yaml`. Use `kubectl describe pod` to see which node each pod is scheduled to @@ -165,52 +158,47 @@ Node: gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-4z0q/10.128.0.73 Note that the pods are scheduled to three random nodes. - Then we create a cluster with NodeSelector: ```yaml apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraph metadata: - name: test-nodeselector + name: test-nodeselector spec: - replicas: 3 - image: docker.io/tigergraph/tigergraph-k8s:3.9.2 - imagePullPolicy: IfNotPresent - privateKeyName: ssh-key-secret - listener: - type: LoadBalancer - resources: - requests: - cpu: 2 - memory: 8Gi - storage: - type: persistent-claim - volumeClaimTemplate: - storageClassName: standard - resources: - requests: - storage: 10G - initTGConfig: - ha: 1 - license: YOUR_LICENSE - version: 3.9.2 - hashBucketInBit: 5 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent - affinityConfiguration: - nodeSelector: - disktype: ssd + replicas: 3 + image: docker.io/tigergraph/tigergraph-k8s:3.9.2 + imagePullPolicy: IfNotPresent + privateKeyName: ssh-key-secret + listener: + type: LoadBalancer + resources: + requests: + cpu: 2 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + storageClassName: standard + resources: + requests: + storage: 10G + + ha: 1 + license: YOUR_LICENSE + affinityConfiguration: + nodeSelector: + disktype: ssd ``` + Apply the configuration using `kubectl apply -f .yaml`. In this configuration, there is an additional field `.spec.affinityConfiguration`, which is used to define NodeSelector. ```yaml - affinityConfiguration: - nodeSelector: - disktype: ssd + affinityConfiguration: + nodeSelector: + disktype: ssd ``` That means the pods can only be scheduled to nodes with label `disktype=ssd`. @@ -230,16 +218,13 @@ Node: gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-4z0q/10.128.0.73 Both `gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-4z0q` and `gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-2p9g` possess the specified label. - -Affinity -======== +## Affinity Please note that affinity settings exclusively impact the pods within the TigerGraph cluster. Any other pods executing init/upgrade/expand/shrink tasks will remain unaffected by these affinity configurations. -NodeAffinity ------------- +### NodeAffinity -Additionally, TigerGraph pods can be strategically allocated to nodes with specific labels through the use of NodeAffinity. To gain a deeper understanding of Node Affinity, you can refer to the official Kubernetes documentation: [Assign Pods to Nodes using Node Affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) +Additionally, TigerGraph pods can be strategically allocated to nodes with specific labels through the use of NodeAffinity. To gain a deeper understanding of Node Affinity, you can refer to the official Kubernetes documentation: [Assign Pods to Nodes using Node Affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/) Here is an illustrative example of a CR (Custom Resource) configuration implementing NodeAffinity: @@ -247,63 +232,57 @@ Here is an illustrative example of a CR (Custom Resource) configuration implemen apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraph metadata: - name: test-nodeaffinity + name: test-nodeaffinity spec: - replicas: 3 - image: docker.io/tigergraph/tigergraph-k8s:3.9.2 - imagePullPolicy: IfNotPresent - privateKeyName: ssh-key-secret - listener: - type: LoadBalancer - resources: - requests: - cpu: 2 - memory: 8Gi - storage: - type: persistent-claim - volumeClaimTemplate: - storageClassName: standard - resources: - requests: - storage: 10G - initTGConfig: - ha: 1 - license: YOUR_LICENSE - version: 3.9.2 - hashBucketInBit: 5 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent - affinityConfiguration: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: disktype - operator: In - values: - - ssd + replicas: 3 + image: docker.io/tigergraph/tigergraph-k8s:3.9.2 + imagePullPolicy: IfNotPresent + privateKeyName: ssh-key-secret + listener: + type: LoadBalancer + resources: + requests: + cpu: 2 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + storageClassName: standard + resources: + requests: + storage: 10G + ha: 1 + license: YOUR_LICENSE + affinityConfiguration: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: disktype + operator: In + values: + - ssd ``` + In this example, the nodeAffinity section is utilized within the affinityConfiguration to specify that the pods require nodes with the label disktype=ssd during scheduling, while allowing execution to continue even if the affinity is disregarded. Certainly, let's take a closer look at the `.spec.affinityConfiguration` section: ```yaml affinityConfiguration: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: disktype - operator: In - values: - - ssd + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: disktype + operator: In + values: + - ssd ``` -Within the affinityConfiguration, the setting `requiredDuringSchedulingIgnoredDuringExecution` is employed. This signifies that it is mandatory for our pods to be scheduled exclusively on nodes possessing the specified label, ensuring a precise node placement throughout both the scheduling and execution phases. - +Within the affinityConfiguration, the setting `requiredDuringSchedulingIgnoredDuringExecution` is employed. This signifies that it is mandatory for our pods to be scheduled exclusively on nodes possessing the specified label, ensuring a precise node placement throughout both the scheduling and execution phases. You can use the following command to observe the nodes to which the pods are scheduled: @@ -317,15 +296,15 @@ test-nodeaffinity-2 0/1 Running 0 Notice that both gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-2p9g and gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-4z0q nodes possess the specified label, indicating the successful enforcement of node affinity. -### Preferred Node Affinity +#### Preferred Node Affinity For a deeper understanding of preferred node affinity, you can explore the document: [Schedule a Pod using preferred node affinity](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/#schedule-a-pod-using-preferred-node-affinity). It's crucial to differentiate between the `preferredDuringSchedulingIgnoredDuringExecution` and `requiredDuringSchedulingIgnoredDuringExecution` fields. When utilizing `requiredDuringSchedulingIgnoredDuringExecution`, pods will remain **unscheduled** if an insufficient number of nodes adhere to the specified rules. On the other hand, opting for `preferredDuringSchedulingIgnoredDuringExecution` indicates that the Kubernetes scheduler will **attempt** to schedule pods onto nodes aligned with the rules. In cases where no nodes fulfill the criteria, the pods will be scheduled alongside other pods. -#### Example: Difference between Preferred Affinity and Required Affinity +##### Example: Difference between Preferred Affinity and Required Affinity -To illustrate the contrast between preferred affinity and required affinity, let's consider a scenario where we label only one node and create a TigerGraph cluster with specific resource requirements. +To illustrate the contrast between preferred affinity and required affinity, let's consider a scenario where we label only one node and create a TigerGraph cluster with specific resource requirements. ```yaml kubectl label nodes gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-2p9g disktype=ssd @@ -337,43 +316,37 @@ We create a TigerGraph cluster with resource requests that would limit one pod p apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraph metadata: - name: test-nodeaffinity + name: test-nodeaffinity spec: - replicas: 3 - image: docker.io/tigergraph/tigergraph-k8s:3.9.2 - imagePullPolicy: IfNotPresent - privateKeyName: ssh-key-secret - listener: - type: LoadBalancer - resources: - requests: - cpu: 4 - memory: 8Gi - storage: - type: persistent-claim - volumeClaimTemplate: - storageClassName: standard - resources: - requests: - storage: 10G - initTGConfig: - ha: 1 - license: YOUR_LICENSE - version: 3.9.2 - hashBucketInBit: 5 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent - affinityConfiguration: - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: disktype - operator: In - values: - - ssd + replicas: 3 + image: docker.io/tigergraph/tigergraph-k8s:3.9.2 + imagePullPolicy: IfNotPresent + privateKeyName: ssh-key-secret + listener: + type: LoadBalancer + resources: + requests: + cpu: 4 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + storageClassName: standard + resources: + requests: + storage: 10G + ha: 1 + license: YOUR_LICENSE + affinityConfiguration: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: disktype + operator: In + values: + - ssd ``` Running kubectl get pods --output=wide provides the following output: @@ -384,15 +357,19 @@ test-nodeaffinity-0 1/1 Running 0 test-nodeaffinity-1 0/1 Pending 0 107s test-nodeaffinity-2 0/1 Pending 0 106s ``` + In this output, you can observe that only one pod has been scheduled to the node labeled with `disktype=ssd`. The remaining two pods are pending due to resource constraints, as there is only one node with the required label and that node does not have sufficient available CPU resources to accommodate all pods. You can utilize the following command to gain insights into why `test-nodeaffinity-1` is in a pending state: -``` + +```bash kubectl describe pod test-nodeaffinity-1 ``` + This command will provide detailed information about the pod's status, including any events and messages related to its scheduling and resource allocation. In this specific case, the output will indicate the reason for the pod's pending status, such as insufficient CPU resources and failure to match the pod's node affinity or selector. Here is an example of the type of information you might encounter: + ```yaml Events: Type Reason Age From Message @@ -400,6 +377,7 @@ Events: Normal NotTriggerScaleUp 2m16s cluster-autoscaler pod didn't trigger scale-up: Warning FailedScheduling 101s (x2 over 2m17s) default-scheduler 0/6 nodes are available: 1 Insufficient cpu, 5 node(s) didn't match Pod's node affinity/selector. preemption: 0/6 nodes are available: 1 No preemption victims found for incoming pod, 5 Preemption is not helpful for scheduling. ``` + This output indicates that the pod is pending due to insufficient CPU resources (`Insufficient cpu`) and the fact that the node affinity or selector criteria are not being met by any available nodes (`node(s) didn't match Pod's node affinity/selector`). Now we edit the above CR, use `preferredDuringSchedulingIgnoredDuringExecution` instead @@ -407,19 +385,21 @@ Now we edit the above CR, use `preferredDuringSchedulingIgnoredDuringExecution` ```yaml #...... #The same as above one - affinityConfiguration: - affinity: - nodeAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 1 - preference: - matchExpressions: - - key: disktype - operator: In - values: - - ssd + affinityConfiguration: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: disktype + operator: In + values: + - ssd ``` + Upon checking pod status with `kubectl get pods --output=wide`, you notice the following: + ```bash > kubectl get pods --output=wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES @@ -430,12 +410,11 @@ test-nodeaffinity-2 0/1 ContainerCreati In the provided output, only one pod has been successfully scheduled to a node with the specified label (`disktype=ssd`). The other pods were scheduled to nodes without the specific label, which demonstrates the behavior of `preferredDuringSchedulingIgnoredDuringExecution`. This affinity setting attempts to schedule pods according to the defined preferences, but it is not a strict requirement. If nodes meeting the preferences are unavailable, the pods will still be scheduled on other nodes. - -### Weighted Affinity and Logical Operators +#### Weighted Affinity and Logical Operators The `weight` attribute, ranging from 1 to 100, can be assigned to each instance of the `preferredDuringSchedulingIgnoredDuringExecution` affinity type. This weight represents the preference given to a particular affinity rule. When all other scheduling requirements for a Pod are met, the scheduler calculates a score by summing up the weights of satisfied preferred rules. This score contributes to the overall prioritization of nodes, with higher scores leading to higher scheduling priority for the Pod. -### Combining Rules with Logical Operators +#### Combining Rules with Logical Operators The `operator` field allows you to employ logical operators to determine how Kubernetes interprets the affinity rules. Various operators such as `In`, `NotIn`, `Exists`, `DoesNotExist`, `Gt`, and `Lt` can be used. These operators can be combined to craft nuanced rules that guide the scheduling behavior. @@ -445,8 +424,7 @@ In scenarios involving multiple terms associated with `nodeAffinity` types withi For a single term within `nodeSelectorTerms`, if multiple expressions are present in a single `matchExpressions` field, the Pod can only be scheduled onto a node if all the expressions are satisfied (expressions are ANDed). - -#### Examples: Combining Multiple Rules with Different Weights +##### Examples: Combining Multiple Rules with Different Weights In this scenario, we have labeled nodes, with two labeled as `disktype=ssd` and two as `physical-machine=true`. We assign a weight of 1 to the `disktype=ssd` rule and a weight of 50 to the `physical-machine=true` rule. The objective is to demonstrate how to combine these rules effectively. @@ -496,12 +474,12 @@ test-nodeaffinity-2 0/1 Running 0 19s 10.36.3.12 gke-t The pods are preferentially scheduled to nodes with the `physical-machine=true` label, as specified by the rule with a weight of 50. Two out of three pods are successfully scheduled on nodes meeting this rule. Additionally, one pod is scheduled to a node with the label `disktype=ssd`. -Inter-pod Affinity and Anti-Affinity -------------------------------------- +### Inter-pod Affinity and Anti-Affinity [Inter-pod affinity and anti-affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity) offer the capability to restrict the nodes on which your Pods are scheduled based on the labels of other Pods that are already running on those nodes. This is in contrast to node affinity, which is based on the labels of the nodes themselves. Similar to node affinity, inter-pod affinity and anti-affinity come in two types: + - `requiredDuringSchedulingIgnoredDuringExecution` - `preferredDuringSchedulingIgnoredDuringExecution` @@ -513,7 +491,7 @@ To define the topology domain (X), a `topologyKey` is used. The `topologyKey` se If you have specific requirements, such as the need for Pods to be distributed across certain domains, thoughtful selection of the appropriate `topologyKey` ensures that the scheduling behavior aligns with your needs. -### Example: Avoiding Scheduling TigerGraph Pods on the Same VM Instance +#### Example: Avoiding Scheduling TigerGraph Pods on the Same VM Instance In this example, we'll explore how to prevent the scheduling of TigerGraph pods on the same virtual machine (VM) instance. Each TigerGraph pod is uniquely labeled with `tigergraph.com/cluster-pod=${CLUSTER_NAME}`, which designates the cluster it belongs to. We will utilize this label to create the scheduling rule. @@ -523,53 +501,46 @@ Consider the following Kubernetes resource definition: apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraph metadata: - name: test-cluster + name: test-cluster spec: - replicas: 3 - image: docker.io/tigergraph/tigergraph-k8s:3.9.2 - imagePullPolicy: IfNotPresent - privateKeyName: ssh-key-secret - listener: - type: LoadBalancer - resources: - requests: - cpu: 2 - memory: 8Gi - storage: - type: persistent-claim - volumeClaimTemplate: - storageClassName: standard - resources: - requests: - storage: 10G - initTGConfig: - ha: 1 - license: YOUR_LICENSE - version: 3.9.2 - hashBucketInBit: 5 - initJob: - image: docker.io/tginternal/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent - affinityConfiguration: - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: tigergraph.com/cluster-pod - operator: In - values: - - test-cluster - topologyKey: kubernetes.io/hostname + replicas: 3 + image: docker.io/tigergraph/tigergraph-k8s:3.9.2 + imagePullPolicy: IfNotPresent + privateKeyName: ssh-key-secret + listener: + type: LoadBalancer + resources: + requests: + cpu: 2 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + storageClassName: standard + resources: + requests: + storage: 10G + ha: 1 + license: YOUR_LICENSE + affinityConfiguration: + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: tigergraph.com/cluster-pod + operator: In + values: + - test-cluster + topologyKey: kubernetes.io/hostname ``` This configuration enforces the rule that TigerGraph pods should not be scheduled on VM instances that are already hosting other TigerGraph pods belonging to the same cluster (`test-cluster`). However, in cases where there are insufficient nodes available, more than one TigerGraph pod may still be scheduled on the same VM instance. By leveraging the `podAntiAffinity` feature with a preferred scheduling strategy, you ensure that TigerGraph pods are spread across different VM instances within the cluster to enhance fault tolerance and resource distribution. - Create TigerGraph with above CR and see which node the pods are scheduled to: ```bash @@ -589,42 +560,36 @@ We can also require them to be scheduled on nodes which does not have pods of an apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraph metadata: - name: test-cluster + name: test-cluster spec: - replicas: 3 - image: docker.io/tigergraph/tigergraph-k8s:3.9.2 - imagePullPolicy: IfNotPresent - privateKeyName: ssh-key-secret - listener: - type: LoadBalancer - resources: - requests: - cpu: 1 - memory: 8Gi - storage: - type: persistent-claim - volumeClaimTemplate: - storageClassName: standard - resources: - requests: - storage: 10G - initTGConfig: - ha: 1 - license: YOUR_LICENSE - version: 3.9.2 - hashBucketInBit: 5 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent - affinityConfiguration: - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: tigergraph.com/cluster-pod - operator: Exists - topologyKey: kubernetes.io/hostname + replicas: 3 + image: docker.io/tigergraph/tigergraph-k8s:3.9.2 + imagePullPolicy: IfNotPresent + privateKeyName: ssh-key-secret + listener: + type: LoadBalancer + resources: + requests: + cpu: 1 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + storageClassName: standard + resources: + requests: + storage: 10G + ha: 1 + license: YOUR_LICENSE + affinityConfiguration: + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: tigergraph.com/cluster-pod + operator: Exists + topologyKey: kubernetes.io/hostname ``` This will require the scheduler to schedule pods of test-cluster to nodes that is not running any pods belonging to another TG cluster. @@ -643,7 +608,7 @@ test-nodeaffinity-1 1/1 Running test-nodeaffinity-2 1/1 Running 0 85m 10.36.3.12 gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-4z0q ``` -### Scheduling Pods to Different Zones +#### Scheduling Pods to Different Zones We create an **OpenShift Cluster** which has one master node and five worker nodes. @@ -660,10 +625,6 @@ tg-k8s-openshift-1024-5jz2w-worker-d-7xv82 Ready worker 84m v1.23.5+012 qiuyuhan@yuhan-qiu-bot-20220808075602-0:~/product/src/cqrs/k8s-operator$ ``` -Certainly, I've polished the provided text for clarity and readability: - ---- - Observing the node configuration, each node is associated with a label: `topology.kubernetes.io/zone=xxx`. The master node bears the label `topology.kubernetes.io/zone=us-east1-b`, while two worker nodes are marked with `topology.kubernetes.io/zone=us-east1-b`, another two with `topology.kubernetes.io/zone=us-east1-c`, and one worker node with `topology.kubernetes.io/zone=us-east1-d`. @@ -693,14 +654,8 @@ spec: resources: requests: storage: 10G - initTGConfig: - ha: 1 - license: YOUR_LICENSE - version: 3.9.2 - hashBucketInBit: 5 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent + ha: 1 + license: YOUR_LICENSE affinityConfiguration: affinity: podAntiAffinity: @@ -725,12 +680,12 @@ test-cluster-2 0/1 ContainerCreating 0 1s To elaborate, `tg-k8s-openshift-1024-5jz2w-worker-d-7xv82` corresponds to `us-east1-d`, `tg-k8s-openshift-1024-5jz2w-worker-b-w96n6` is positioned in `us-east1-b`, and `tg-k8s-openshift-1024-5jz2w-worker-c-456wl` is situated in `us-east1-c`. -Toleration -=========== +## Toleration [Taint and Toleration](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) You can put multiple taints on the same node and multiple tolerations on the same pod. The way Kubernetes processes multiple taints and tolerations is like a filter: start with all of a node's taints, then ignore the ones for which the pod has a matching toleration; the remaining un-ignored taints have the indicated effects on the pod. In particular, + 1. if there is at least one un-ignored taint with effect NoSchedule then Kubernetes will not schedule the pod onto that node 2. if there is no un-ignored taint with effect NoSchedule but there is at least one un-ignored taint with effect PreferNoSchedule then Kubernetes will try to not schedule the pod onto the node 3. if there is at least one un-ignored taint with effect NoExecute then the pod will be evicted from the node (if it is already running on the node), and will not be scheduled onto the node (if it is not yet running on the node). @@ -741,8 +696,7 @@ Should we furnish tolerations within the TigerGraph Custom Resource (CR), TigerG It's important to note that tolerations exclusively affect pods within the TigerGraph cluster. Other pods engaged in init/upgrade/expand/shrink operations will remain unaffected. -Example: Implementing User Groups with Taints and Tolerations ------------------------------------------------------------ +### Example: Implementing User Groups with Taints and Tolerations A practical application of Taints and Tolerations is the establishment of user groups for the exclusive utilization of designated nodes. @@ -763,33 +717,27 @@ Then create a cluster without toleration: apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraph metadata: - name: test-cluster + name: test-cluster spec: - replicas: 3 - image: docker.io/tigergraph/tigergraph-k8s:3.9.2 - imagePullPolicy: IfNotPresent - privateKeyName: ssh-key-secret - listener: - type: LoadBalancer - resources: - requests: - cpu: 4 - memory: 8Gi - storage: - type: persistent-claim - volumeClaimTemplate: - storageClassName: standard - resources: - requests: - storage: 10G - initTGConfig: - ha: 1 - license: YOUR_LICENSE - version: 3.9.2 - hashBucketInBit: 5 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent + replicas: 3 + image: docker.io/tigergraph/tigergraph-k8s:3.9.2 + imagePullPolicy: IfNotPresent + privateKeyName: ssh-key-secret + listener: + type: LoadBalancer + resources: + requests: + cpu: 4 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + storageClassName: standard + resources: + requests: + storage: 10G + ha: 1 + license: YOUR_LICENSE ``` Upon deploying the cluster, it becomes evident that all pods are scheduled to nodes devoid of the applied taints. This aligns with the concept of taints and tolerations, where pods are automatically assigned to nodes that do not possess taints that the pods cannot tolerate. @@ -807,39 +755,33 @@ Then we can establish a new cluster configuration with the specified toleration apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraph metadata: - name: test-toleration + name: test-toleration spec: - replicas: 3 - image: docker.io/tigergraph/tigergraph-k8s:3.9.2 - imagePullPolicy: IfNotPresent - privateKeyName: ssh-key-secret - listener: - type: LoadBalancer - resources: - requests: - cpu: 4 - memory: 8Gi - storage: - type: persistent-claim - volumeClaimTemplate: - storageClassName: standard - resources: - requests: - storage: 10G - initTGConfig: - ha: 1 - license: YOUR_LICENSE - version: 3.9.2 - hashBucketInBit: 5 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent - affinityConfiguration: - tolerations: - - key: "userGroup" - operator: "Equal" - value: "enterprise" - effect: "NoExecute" + replicas: 3 + image: docker.io/tigergraph/tigergraph-k8s:3.9.2 + imagePullPolicy: IfNotPresent + privateKeyName: ssh-key-secret + listener: + type: LoadBalancer + resources: + requests: + cpu: 4 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + storageClassName: standard + resources: + requests: + storage: 10G + ha: 1 + license: YOUR_LICENSE + affinityConfiguration: + tolerations: + - key: "userGroup" + operator: "Equal" + value: "enterprise" + effect: "NoExecute" ``` By integrating tolerations into the configuration, the "test-toleration" cluster is designed to prioritize nodes with the specified taints. In this instance, pods belonging to the "test-toleration" cluster will be exclusively scheduled onto nodes bearing the "userGroup=enterprise" taint with the "NoExecute" effect. @@ -859,13 +801,12 @@ test-toleration-1 0/1 Running 0 test-toleration-2 1/1 Running 0 55s 10.36.5.23 gke-tg-k8s-gke-1024-default-pool-1e4fbc0f-2p9g ``` -Notice -===== +## Notice + +- If the `affinityConfiguration` includes a `NodeSelector`, and the current node does not meet the `NodeSelector` configuration, and the K8S cluster has `auto-scaling` enabled, the K8S cluster will expand more nodes to accommodate the affinityConfiguration, even if the new node cannot accommodate it. This can result in a situation where there are no suitable nodes available for scheduling TigerGraph pods but useless nodes created. Therefore, it is important to configure the affinityConfiguration with the correct node specifications. + +- If the `affinityConfiguration` includes `pod affinity`, and the current node does not meet the `pod affinity` settings, and the K8S cluster contains `multiple zones` with `auto-scaling` enabled, the automatic scaling of the K8S cluster will be prevented. This can result in a message like "2 node(s) had volume node affinity conflict and 1 node(s) didn't match pod affinity rules" being displayed. The "volume node affinity conflict" message means that the PV requires the current PV to be in the initial zone, which may be the reason why K8S cannot automatically scale. Similarly, there may be no suitable node available for scheduling TigerGraph pods. -* If the `affinityConfiguration` includes a `NodeSelector`, and the current node does not meet the `NodeSelector` configuration, and the K8S cluster has `auto-scaling` enabled, the K8S cluster will expand more nodes to accommodate the affinityConfiguration, even if the new node cannot accommodate it. This can result in a situation where there are no suitable nodes available for scheduling TigerGraph pods but useless nodes created. Therefore, it is important to configure the affinityConfiguration with the correct node specifications. - -* If the `affinityConfiguration` includes `pod affinity`, and the current node does not meet the `pod affinity` settings, and the K8S cluster contains `multiple zones` with `auto-scaling` enabled, the automatic scaling of the K8S cluster will be prevented. This can result in a message like "2 node(s) had volume node affinity conflict and 1 node(s) didn't match pod affinity rules" being displayed. The "volume node affinity conflict" message means that the PV requires the current PV to be in the initial zone, which may be the reason why K8S cannot automatically scale. Similarly, there may be no suitable node available for scheduling TigerGraph pods. - ```bash # Pod description Events: @@ -881,19 +822,20 @@ Notice Term 0: topology.kubernetes.io/region in [us-central1] topology.kubernetes.io/zone in [us-central1-a] ``` - -* If pod scheduled failed due to limited resource, and got enough resources by expand more nodes, it may cause any pod move to another node, then it may prompt following error. - + +- If pod scheduled failed due to limited resource, and got enough resources by expand more nodes, it may cause any pod move to another node, then it may prompt following error. + ```bash Warning FailedAttachVolume 45s attachdetach-controller Multi-Attach error for volume "pvc-dcdb2953-b50f-45a9-a5c3-7f7752c36698" Volume is already exclusively attached to one node and can't be attached to another ``` - -* Based on the factors mentioned, the following conclusions can be drawn: - - 1. When running creating TG Cluster operations, it is crucial to configure affinityConfiguration based on the correct node resources to ensure successful scaling and operation of the cluster. - - 2. It is preferred to ensure that there are corresponding nodes to implement HA during the creation TG Cluster phase, rather than updating TG Cluster in the future, because the Node Affinity of the PV may cause failure. - - 3. Two common scenarios that can lead to failure are: - - 1. In a K8S cluster with multiple zones, node resources may be insufficient. Since operator using Volume Node Affinity for PV, the pod associated with the PV must be created on the original node, resulting in the pod creation being stuck in the Pending state. \ No newline at end of file + +- Based on the factors mentioned, the following conclusions can be drawn: + + 1. When running creating TG Cluster operations, it is crucial to configure affinityConfiguration based on the correct node resources to ensure successful scaling and operation of the cluster. + + 2. It is preferred to ensure that there are corresponding nodes to implement HA during the creation TG Cluster phase, rather than updating TG Cluster in the future, because the Node Affinity of the PV may cause failure. + + 3. Two common scenarios that can lead to failure are: + + 1. In a K8S cluster with multiple zones, node resources may be insufficient. Since operator using Volume Node Affinity for PV, the pod associated with the PV must be created on the original node, resulting in the pod creation being stuck in the Pending state. + 2. When updating the affinity or adjusting the size of a TG Cluster that already has affinity, conflicts may occur due to the presence of Volume Node Affinity for PV. This is also because the pod associated with the PV must be created on the original node, but the new affinity may require the pod to be scheduled on other nodes, resulting in conflicts and potential failures. diff --git a/k8s/docs/03-deploy/configure-affinity-by-kubectl-tg.md b/k8s/docs/03-deploy/configure-affinity-by-kubectl-tg.md index 278e6c0e..3343dfd7 100644 --- a/k8s/docs/03-deploy/configure-affinity-by-kubectl-tg.md +++ b/k8s/docs/03-deploy/configure-affinity-by-kubectl-tg.md @@ -1,23 +1,22 @@ -

Use Affinity in kubectl-tg plugin

+# Use Affinity in kubectl-tg plugin To know how to use NodeSelector/Affinity/Tolerations in YAML, please read [NodeSelector, Affinity and Tolerations using cases](./affinity-use-cases.md) . This DOC will include all cases in above document. -- [Usage Instructions](#usage-instructions) - - [Removing Affinity Configuration](#removing-affinity-configuration) -- [Examples](#examples) - - [Scheduling Pods on Nodes with `disktype=ssd` Label](#scheduling-pods-on-nodes-with-disktypessd-label) - - [Preferring Pods to be Scheduled on Nodes with `disktype=ssd` Label](#preferring-pods-to-be-scheduled-on-nodes-with-disktypessd-label) - - [Combining Multiple Rules with Different Weights](#combining-multiple-rules-with-different-weights) +- [Use Affinity in kubectl-tg plugin](#use-affinity-in-kubectl-tg-plugin) + - [Usage Instructions](#usage-instructions) + - [Removing Affinity Configuration](#removing-affinity-configuration) + - [Examples](#examples) + - [Scheduling Pods on Nodes with `disktype=ssd` Label](#scheduling-pods-on-nodes-with-disktypessd-label) + - [Preferring Pods to be Scheduled on Nodes with `disktype=ssd` Label](#preferring-pods-to-be-scheduled-on-nodes-with-disktypessd-label) + - [Combining Multiple Rules with Different Weights](#combining-multiple-rules-with-different-weights) - [Preventing Multiple TigerGraph Pods on the Same VM Instance](#preventing-multiple-tigergraph-pods-on-the-same-vm-instance) - - [Require TG pods not to be scheduled to VM instances that is running TG pods belonging to another cluster](#require-tg-pods-not-to-be-scheduled-to-vm-instances-that-is-running-tg-pods-belonging-to-another-cluster) - - [Require TG pods not to be scheduled to the same zone](#require-tg-pods-not-to-be-scheduled-to-the-same-zone) - - [Implementing User Groups using Taints and Tolerations](#implementing-user-groups-using-taints-and-tolerations) - - [See also](#see-also) + - [Require TG pods not to be scheduled to VM instances that is running TG pods belonging to another cluster](#require-tg-pods-not-to-be-scheduled-to-vm-instances-that-is-running-tg-pods-belonging-to-another-cluster) + - [Require TG pods not to be scheduled to the same zone](#require-tg-pods-not-to-be-scheduled-to-the-same-zone) + - [Implementing User Groups using Taints and Tolerations](#implementing-user-groups-using-taints-and-tolerations) -Usage Instructions -===== +## Usage Instructions To employ affinity within `kubectl-tg`, the procedure involves crafting your affinity rules in a YAML file. Presented below is an exemplary affinity configuration file: @@ -84,8 +83,7 @@ kubectl tg update --cluster-name test-cluster --namespace NAMESPACE \ --affinity tg-affinity.yaml ``` -Removing Affinity Configuration ----------------------------------- +### Removing Affinity Configuration To eliminate all existing tolerations, affinity rules, and nodeSelectors from your TigerGraph cluster configuration, the process is straightforward. Follow the steps outlined below: @@ -108,39 +106,36 @@ To eliminate all existing tolerations, affinity rules, and nodeSelectors from yo This procedure effectively clears all existing affinity-related configurations, providing a clean slate for your TigerGraph cluster settings. If you wish to retain certain rules while removing others, simply modify your configuration file accordingly and execute the `kubectl tg update` command. +## Examples -Examples -======== - -Scheduling Pods on Nodes with `disktype=ssd` Label ------------------------------------------------------ +### Scheduling Pods on Nodes with `disktype=ssd` Label To ensure that pods are scheduled exclusively on nodes labeled with `disktype=ssd`, you can utilize the provided affinity configurations. These configurations utilize both Node Selector and Node Affinity approaches. Please note that when employing **required** rules, if an insufficient number of nodes with the desired label are available for scheduling TigerGraph (TG) pods, the pods will remain in a Pending status. 1. **Using Node Selector:** - ```yaml - nodeSelector: - disktype: ssd - ``` + + ```yaml + nodeSelector: + disktype: ssd + ``` 2. **Using Node Affinity:** - ```yaml - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: disktype - operator: In - values: - - ssd - ``` -With these configurations, TigerGraph pods will be scheduled specifically on nodes bearing the `disktype=ssd` label. However, it's important to be aware that if there are an inadequate number of nodes fulfilling this criterion, the TG pods may become Pending due to the required scheduling rules. + ```yaml + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: disktype + operator: In + values: + - ssd + ``` +With these configurations, TigerGraph pods will be scheduled specifically on nodes bearing the `disktype=ssd` label. However, it's important to be aware that if there are an inadequate number of nodes fulfilling this criterion, the TG pods may become Pending due to the required scheduling rules. -Preferring Pods to be Scheduled on Nodes with `disktype=ssd` Label --------------------------------------------------------------------- +### Preferring Pods to be Scheduled on Nodes with `disktype=ssd` Label If your objective is to prioritize scheduling pods on nodes labeled with `disktype=ssd`, you can implement the desired behavior using a preferred rule within the affinity configuration. Here's how you can achieve this: @@ -161,8 +156,7 @@ With this affinity configuration, the specified weight of 1 signifies a preferen By utilizing this **preferred** rule, you ensure that scheduling attempts prioritize nodes with the desired label, while also allowing for scheduling flexibility to accommodate situations where a limited number of labeled nodes are available. This approach offers a balanced trade-off between preference and availability, optimizing the scheduling behavior of your pods within your Kubernetes cluster. -Combining Multiple Rules with Different Weights ------------------------------------------------ +### Combining Multiple Rules with Different Weights When you need to combine multiple affinity rules with varying weights to guide pod scheduling, you can achieve this by utilizing a configuration similar to the one you provided. Here's an example configuration: @@ -191,8 +185,7 @@ With this configuration: This approach provides a flexible and versatile way to guide pod scheduling behavior based on the defined affinity rules and their associated weights. It ensures that pods are distributed across nodes according to the specified preferences while accommodating availability constraints. -Preventing Multiple TigerGraph Pods on the Same VM Instance ------------------------------------------------------------- +## Preventing Multiple TigerGraph Pods on the Same VM Instance To ensure that no more than one TigerGraph pod is scheduled on the same VM instance, you can employ a `podAntiAffinity` configuration. This rule helps distribute TigerGraph pods across different VM instances, thus avoiding overloading a single instance. Here's how you can achieve this: @@ -228,8 +221,7 @@ This approach effectively prevents the overloading of a single VM instance by en Please note that if there are an adequate number of nodes available, multiple TigerGraph pods may still be scheduled on the same VM instance. The rule is designed to minimize such instances and optimize distribution across VM instances. -Require TG pods not to be scheduled to VM instances that is running TG pods belonging to another cluster --------------------------------------------------------------------------------------------------------- +### Require TG pods not to be scheduled to VM instances that is running TG pods belonging to another cluster ```yaml affinity: @@ -242,8 +234,7 @@ affinity: topologyKey: kubernetes.io/hostname ``` -Require TG pods not to be scheduled to the same zone ----------------------------------------------------- +### Require TG pods not to be scheduled to the same zone ```yaml affinity: @@ -258,8 +249,7 @@ affinity: topologyKey: topology.kubernetes.io/zone ``` -Implementing User Groups using Taints and Tolerations -------------------------------------------------------- +### Implementing User Groups using Taints and Tolerations To establish user groups and control pod scheduling based on taints and tolerations, follow these steps: @@ -286,9 +276,3 @@ To establish user groups and control pod scheduling based on taints and tolerati This configuration specifies that the pods should tolerate the taint with the label `userGroup=enterprise` and the effect `NoExecute`, allowing them to be scheduled on the tainted nodes. By following these steps, you can successfully implement user groups using taints and tolerations. Only pods that adhere to the defined toleration rules will be scheduled on the nodes tainted with the `userGroup=enterprise` label and `NoExecute` effect, allowing you to control and segregate pod scheduling based on user groups. - -## See also - -If you are interested in learning how to use and configure Pod affinity with YAML resources, please refer to the following documentation: - -- [NodeSelector, Affinity and Toleration Use Cases](../03-deploy/affinity-use-cases.md) diff --git a/k8s/docs/03-deploy/custom-containers.md b/k8s/docs/03-deploy/custom-containers.md index a93af20e..130be632 100644 --- a/k8s/docs/03-deploy/custom-containers.md +++ b/k8s/docs/03-deploy/custom-containers.md @@ -1,15 +1,15 @@ -

InitContainers,SidecarContainers and CustomVolumes

+# InitContainers,SidecarContainers and CustomVolumes -- [Basic knowledge](#basic-knowledge) -- [Sidecar Containers](#sidecar-containers) -- [Init Containers](#init-containers) -- [Custom Volumes](#custom-volumes) -- [Combining sidecarContainers, initContainers, and customVolumes](#combining-sidecarcontainers-initcontainers-and-customvolumes) -- [What's Next](#whats-next) +- [InitContainers,SidecarContainers and CustomVolumes](#initcontainerssidecarcontainers-and-customvolumes) + - [Basic knowledge](#basic-knowledge) + - [Sidecar Containers](#sidecar-containers) + - [Init Containers](#init-containers) + - [Custom Volumes](#custom-volumes) + - [Combining sidecarContainers, initContainers, and customVolumes](#combining-sidecarcontainers-initcontainers-and-customvolumes) + - [What's Next](#whats-next) +## Basic knowledge -Basic knowledge -=============== A K8s Pod has the capability to house multiple containers, including both init containers and app containers. Upon pod creation, the init containers execute sequentially in a designated order. Should any of the init containers encounter a failure, the overall pod execution is halted (for more insights, consult [Init Containers](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/)). Following the successful completion of all init containers, the app containers proceed to run concurrently. By default, in the configuration of the TigerGraph CR, each TigerGraph Pod features a singular app container named "tigergraph". This container runs all TigerGraph services within the Pod. The functionality "InitContainers,SidecarContainers and CustomVolumes" empowers users to seamlessly integrate personalized initContainers and sidecarContainers into TigerGraph Pods. Furthermore, users can create customVolumes, enabling the mounting of these volumes within their initContainers or sidecarContainers. @@ -17,12 +17,10 @@ By default, in the configuration of the TigerGraph CR, each TigerGraph Pod featu > [!NOTE] > You can utilize this feature by adding configurations in a YAML file or through `kubectl-tg`. This document exclusively focuses on the usage within YAML files. If you're interested in learning how to use it with `kubectl-tg`, please consult the guide on [Utilizing InitContainers, Sidecar Containers, and Custom Volumes with kubectl-tg](./use-custom-containers-by-kubectl-tg.md). -Sidecar Containers -================= +## Sidecar Containers A sidecar container functions similarly to the app container named "tigergraph". In cases where the sidecar container requires readiness and liveness checks configuration, it is crucial to ensure that these checks do not interfere with the rolling update process of TigerGraph (TG) pods. Simultaneously, adopting the practice of setting resource limits for each sidecar container within the TG pod is recommended to prevent the excessive use of Kubernetes node resources. - To integrate sidecarContainers into TigerGraph Pods, write the configurations in `.spec.sidecarContainers`. For detailed guidance on setting up sidecarContainers, consult the [K8S Containers](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#Container): ```yaml @@ -35,15 +33,8 @@ spec: imagePullPolicy: IfNotPresent imagePullSecrets: - name: tigergraph-image-pull-secret - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent - imagePullSecrets: - - name: tigergraph-image-pull-secret - initTGConfig: - ha: 1 - license: YOUR_LICENSE_HERE - version: 3.9.2 + ha: 1 + license: YOUR_LICENSE_HERE listener: type: LoadBalancer privateKeyName: ssh-key-secret @@ -98,10 +89,9 @@ spec: volumeMode: Filesystem ``` -Init Containers -===== -To incorporate custom initContainers into TigerGraph Pods, place the configuration details within `.spec.initContainers` field. For detailed instructions on setting up initContainers, you can refer to the [K8S Containers API](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#Container). Your personalized initContainers will execute once the TG initContainer finishes its tasks. +## Init Containers +To incorporate custom initContainers into TigerGraph Pods, place the configuration details within `.spec.initContainers` field. For detailed instructions on setting up initContainers, you can refer to the [K8S Containers API](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#Container). Your personalized initContainers will execute once the TG initContainer finishes its tasks. ```yaml apiVersion: graphdb.tigergraph.com/v1alpha1 @@ -113,15 +103,8 @@ spec: imagePullPolicy: IfNotPresent imagePullSecrets: - name: tigergraph-image-pull-secret - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent - imagePullSecrets: - - name: tigergraph-image-pull-secret - initTGConfig: - ha: 1 - license: YOUR_LICENSE_HERE - version: 3.9.2 + ha: 1 + license: YOUR_LICENSE_HERE listener: type: LoadBalancer privateKeyName: ssh-key-secret @@ -149,16 +132,15 @@ spec: volumeMode: Filesystem ``` -Custom Volumes -============= -Incorporating initContainers and sidecarContainers with customVolumes facilitates seamless data exchange. For defining customVolumes, direct your configurations to the `.spec.customVolumes` field. To understand the essential fields of customVolumes, consult the [Kubernetes Volumes documentation](https://kubernetes.io/docs/concepts/storage/volumes/) +## Custom Volumes + +Incorporating initContainers and sidecarContainers with customVolumes facilitates seamless data exchange. For defining customVolumes, direct your configurations to the `.spec.customVolumes` field. To understand the essential fields of customVolumes, consult the [Kubernetes Volumes documentation](https://kubernetes.io/docs/concepts/storage/volumes/) By default, the Operator establishes two volumes: `tg-data` for persistent TG cluster data and `tg-log` for TG logs storage. In your sidecar containers, you can mount volume named `tg-log` to access TG logs effectively or mount `tg-data` to access TG data. -Combining sidecarContainers, initContainers, and customVolumes -===== -The following example demonstrates the integration of sidecarContainers and initContainers while facilitating data exchange through customVolumes. Init containers create a file in the `credentials` volume, which the sidecar named `main-container` subsequently utilizes for readiness checks. The sidecar named `main-container` also outputs to the file `/var/log/myapp.log`, accessible by the `sidecar-container` due to their common customVolume named `log`. +## Combining sidecarContainers, initContainers, and customVolumes +The following example demonstrates the integration of sidecarContainers and initContainers while facilitating data exchange through customVolumes. Init containers create a file in the `credentials` volume, which the sidecar named `main-container` subsequently utilizes for readiness checks. The sidecar named `main-container` also outputs to the file `/var/log/myapp.log`, accessible by the `sidecar-container` due to their common customVolume named `log`. ```yaml apiVersion: graphdb.tigergraph.com/v1alpha1 @@ -170,15 +152,8 @@ spec: imagePullPolicy: IfNotPresent imagePullSecrets: - name: tigergraph-image-pull-secret - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: IfNotPresent - imagePullSecrets: - - name: tigergraph-image-pull-secret - initTGConfig: - ha: 1 - license: YOUR_LICENSE_HERE - version: 3.9.2 + ha: 1 + license: YOUR_LICENSE_HERE listener: type: LoadBalancer privateKeyName: ssh-key-secret @@ -244,6 +219,6 @@ spec: emptyDir: {} ``` -What's Next -==== -* Learn [how to integrate envoy sidecar containers with TG Pods](../07-reference/integrate-envoy-sidecar.md) \ No newline at end of file +## What's Next + +- Learn [how to integrate envoy sidecar containers with TG Pods](../07-reference/integrate-envoy-sidecar.md) diff --git a/k8s/docs/03-deploy/customize-tigergraph-pod.md b/k8s/docs/03-deploy/customize-tigergraph-pod.md new file mode 100644 index 00000000..7f9589a3 --- /dev/null +++ b/k8s/docs/03-deploy/customize-tigergraph-pod.md @@ -0,0 +1,171 @@ +# Customize TigerGraph Pods and TigerGraph Containers + +When you create a TigerGraph cluster, TigerGraph pods will be created in the Kubernetes cluster, TigerGraph system will run in TigerGraph containers. You may want to customize the pods or containers to meet your needs, for example, to add more customized labels and annotations, or to change the security context of the containers. + +We have exposed some configurations for you to customize the pods and containers. You can customize them by modifying TigerGraph CR file or by using the `kubectl tg` command. + +- [Customize TigerGraph Pods and TigerGraph Containers](#customize-tigergraph-pods-and-tigergraph-containers) + - [Customize TigerGraph Pods/TigerGraph Containers by kubectl tg](#customize-tigergraph-podstigergraph-containers-by-kubectl-tg) + - [Customize Labels and Annotations of TigerGraph Pods](#customize-labels-and-annotations-of-tigergraph-pods) + - [Customize Security Context of TigerGraph Containers](#customize-security-context-of-tigergraph-containers) + - [Customize TigerGraph Pods/TigerGraph Containers by TigerGraph CR](#customize-tigergraph-podstigergraph-containers-by-tigergraph-cr) + - [Customize Labels and Annotations of TigerGraph Pods in TigerGraph CR](#customize-labels-and-annotations-of-tigergraph-pods-in-tigergraph-cr) + - [Customize Security Context of TigerGraph Containers in TigerGraph CR](#customize-security-context-of-tigergraph-containers-in-tigergraph-cr) + +## Customize TigerGraph Pods/TigerGraph Containers by kubectl tg + +You can customize the pods by using the `kubectl tg create` command when you create the cluster, and you can also update them by `kubectl tg update`. + +### Customize Labels and Annotations of TigerGraph Pods + +```bash + --pod-labels : add some customized labels to all pods, your input should be like like 'k1=v1,k2="v2 with space"' + --pod-annotations : add some customized annotations to all pods, your input should be like like 'k1=v1,k2="v2 with space"' +``` + +You can specify the labels and annotations of the pods by using the `--pod-labels` and `--pod-annotations` options. The input should be like `k1=v1,k2=v2`. For example: + +```bash +kubectl tg create --cluster-name test-cluster --namespace tigergraph \ + --pod-labels "app=tg,env=prod" --pod-annotations "app=tg,env=prod" ${OTHER_OPTIONS} +``` + +You can also update them by: + +```bash + kubectl tg update --cluster-name test-cluster --namespace tigergraph \ + --pod-labels "app=tg,env=test" --pod-annotations "app=tg,env=test" ${OTHER_OPTIONS} +``` + +### Customize Security Context of TigerGraph Containers + +```bash + --security-context : give a YAML file to specify SecurityContext for tigergraph container +``` + +Since the SecurityContext is a little complicated, you should know the details of it before you use it. You can refer to the [Set the security context for a Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for more information. + +Use a file to set the SecurityContext for TigerGraph containers. The file should be like: + +```yaml +securityContext: + capabilities: + add: + - SYS_PTRACE + - SYSLOG + - SYS_ADMIN +``` + +The above file will add the capabilities to the containers. Name the file as `security-context.yaml`, and then you can create a cluster with this security context configuration by: + +```bash +kubectl tg create --cluster-name test-cluster --namespace tigergraph \ + --security-context security-context.yaml ${OTHER_OPTIONS} +``` + +You can also set `privileged: true` in the file to make the containers privilegedm, which means all capabilities will be added to the containers. For example: + +```yaml +securityContext: + privileged: true +``` + +Name the file as `security-context-privileged.yaml`, and then you can update the cluster with this security context configuration by: + +```bash +kubectl tg update --cluster-name test-cluster --namespace tigergraph \ + --security-context security-context-privileged.yaml ${OTHER_OPTIONS} +``` + +> [!WARNING] +> We always use `runAsUser: 1000` and `runAsGroup: 1000` in the containers, and you are not allowed to change them. If you specify `runAsUser` or `runAsGroup` in the file, the update or creation will fail. + +## Customize TigerGraph Pods/TigerGraph Containers by TigerGraph CR + +### Customize Labels and Annotations of TigerGraph Pods in TigerGraph CR + +You can add labels to field `spec.podLabels` and annotations to field `spec.PodAnnotations` in the TigerGraph CR file. Operator will inject these labels and annotations to all tigergraph pods. +For example: + +```yaml +apiVersion: graphdb.tigergraph.com/v1alpha1 +kind: TigerGraph +metadata: + name: test-cluster +spec: + image: docker.io/tigergraph/tigergraph-k8s:3.10.0 + imagePullPolicy: IfNotPresent + imagePullSecrets: + - name: tigergraph-image-pull-secret + listener: + type: LoadBalancer + privateKeyName: ssh-key-secret + license: YOUR_LICENSE + replicas: 3 + ha: 1 + resources: + requests: + cpu: 4 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10G + storageClassName: standard + volumeMode: Filesystem + podLabels: + key.tg.com: value + podAnnotations: + key.tg.com: value +``` + +### Customize Security Context of TigerGraph Containers in TigerGraph CR + +Since the SecurityContext is a little complicated, you should know the details of it before you use it. You can refer to the [Set the security context for a Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for more information. + +You can add security context to field `spec.securityContext` in the TigerGraph CR file. Operator will configure this security context to all tigergraph containers. + +```yaml +apiVersion: graphdb.tigergraph.com/v1alpha1 +kind: TigerGraph +metadata: + name: test-cluster +spec: + image: docker.io/tigergraph/tigergraph-k8s:3.10.0 + imagePullPolicy: IfNotPresent + imagePullSecrets: + - name: tigergraph-image-pull-secret + listener: + type: LoadBalancer + privateKeyName: ssh-key-secret + license: YOUR_LICENSE + replicas: 3 + ha: 1 + resources: + requests: + cpu: 4 + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10G + storageClassName: standard + volumeMode: Filesystem + securityContext: + capabilities: + add: + - SYS_PTRACE + - SYSLOG + - SYS_ADMIN +``` + +> [!WARNING] +> We always use `runAsUser: 1000` and `runAsGroup: 1000` in the containers, and you are not allowed to change them. If you specify `runAsUser` or `runAsGroup` in the file, the webhook will reject the request. diff --git a/k8s/docs/03-deploy/deploy-without-internet.md b/k8s/docs/03-deploy/deploy-without-internet.md index e0ffc2a1..9e25aeba 100644 --- a/k8s/docs/03-deploy/deploy-without-internet.md +++ b/k8s/docs/03-deploy/deploy-without-internet.md @@ -1,40 +1,44 @@ -

How to install Operator and deploy TG on K8s without internet access

+# How to install Operator and deploy TG on K8s without internet access -Prerequisites -============= +- [How to install Operator and deploy TG on K8s without internet access](#how-to-install-operator-and-deploy-tg-on-k8s-without-internet-access) + - [Prerequisites](#prerequisites) + - [Procedure](#procedure) + - [Transferring Docker Images and Helm Chart Package](#transferring-docker-images-and-helm-chart-package) + - [TigerGraph Operator](#tigergraph-operator) + - [Cert-manager](#cert-manager) + - [Install Operator with kubect-tg](#install-operator-with-kubect-tg) + - [Install Operator using the helm command to install it locally](#install-operator-using-the-helm-command-to-install-it-locally) + - [Deploy TG cluster](#deploy-tg-cluster) -* Docker - -* Private Docker registry - -* Private helm repo - +## Prerequisites -Procedure -========= +- Docker -Transferring Docker Images and Helm Chart Package --------------------------------------------------- +- Private Docker registry + +- Private helm repo + +## Procedure + +### Transferring Docker Images and Helm Chart Package Please ensure that your environment has internet access before proceeding with the download of these docker images and helm chart packages. For illustrative purposes, we will utilize TG cluster version 3.9.2 and TG K8s Operator version 0.0.7. Kindly make the necessary adjustments based on your specific version. -### TigerGraph Operator +#### TigerGraph Operator -* Docker images - +- Docker images -1. tigergraph/tigergraph-k8s:3.9.2 - -2. tigergraph/tigergraph-k8s-operator:0.0.7 - -3. tigergraph/tigergraph-k8s-init:0.0.7 - +1. tigergraph/tigergraph-k8s:3.9.2 + +2. tigergraph/tigergraph-k8s-operator:0.0.7 + +3. tigergraph/tigergraph-k8s-init:0.0.7 ```bash docker pull tigergraph/tigergraph-k8s:3.9.2 -dokcer pull tigergraph/tigergraph-k8s-operator:0.0.7 +docker pull tigergraph/tigergraph-k8s-operator:0.0.7 docker pull tigergraph/tigergraph-k8s-init:0.0.7 docker save tigergraph/tigergraph-k8s:3.9.2 tigergraph/tigergraph-k8s-operator:0.0.7 tigergraph/tigergraph-k8s-init:0.0.7 > tigergraph-operator-images.tar @@ -53,8 +57,8 @@ docker push ${DOCKER_REPO}/tigergraph-k8s-operator:0.0.7 docker push ${DOCKER_REPO}/tigergraph-k8s-init:0.0.7 ``` -* Helm chart package (private helm repo required) - +- Helm chart package (private helm repo required) + If the goal is to install the operator using kubectl-tg, having a private Helm repository is crucial. If such a repository is unavailable and you aim to install an operator without internet connectivity, refer to the section that outlines the procedure for installing the Helm chart locally. ```bash @@ -78,15 +82,13 @@ curl --request DELETE ${HELM_REPO}/api/charts/tg-operator/${VERSION} curl --data-binary "@charts/tg-operator-${VERSION}.tgz" ${HELM_REPO}/api/charts ``` -* Install Operator and deploy TG cluster with private docker repo and helm repo - +- Install Operator and deploy TG cluster with private docker repo and helm repo -### Cert-manager +#### Cert-manager The following examples suppose you are going to use cert-manager 1.8.0 version -* Transferring the cert-manager Docker images to your private Docker registry - +- Transferring the cert-manager Docker images to your private Docker registry ```bash # curl https://github.com/cert-manager/cert-manager/releases/download/v1.8.0/cert-manager.yaml @@ -94,7 +96,7 @@ The following examples suppose you are going to use cert-manager 1.8.0 version # quay.io/jetstack/cert-manager-controller:v1.8.0 # quay.io/jetstack/cert-manager-webhook:v1.8.0 docker pull quay.io/jetstack/cert-manager-cainjector:v1.8.0 -dokcer pull quay.io/jetstack/cert-manager-controller:v1.8.0 +docker pull quay.io/jetstack/cert-manager-controller:v1.8.0 docker pull quay.io/jetstack/cert-manager-webhook:v1.8.0 docker save quay.io/jetstack/cert-manager-cainjector:v1.8.0 quay.io/jetstack/cert-manager-controller:v1.8.0 quay.io/jetstack/cert-manager-webhook:v1.8.0 > cert-manager-images.tar @@ -114,8 +116,7 @@ docker push ${DOCKER_REPO}/cert-manager-controller:v1.8.0 docker push ${DOCKER_REPO}/cert-manager-webhook:v1.8.0 ``` -* Modify the manifests of cert-manager according to your docker registry - +- Modify the manifests of cert-manager according to your docker registry ```bash curl -L "https://github.com/cert-manager/cert-manager/releases/download/v1.8.0/cert-manager.yaml" -o "cert-manager.yaml" @@ -129,11 +130,12 @@ quay.io/jetstack/cert-manager-cainjector:v1.8.0 -> ${DOCKER_REPO}/cert-manager-w kubectl apply -f cert-manager.yaml ``` -**Install Operator with kubect-tg** +### Install Operator with kubect-tg In scenarios where your Docker registry necessitates authentication, you can specify a custom secret name using the `--image-pull-secret` option. The default secret name is `tigergraph-image-pull-secret`. Furthermore, it's imperative to create the image pull secret within the designated namespace before initiating the deployment of your TG cluster. + ```bash # please make sure the HELM_REPO and DOCKER_REPO is correct export HELM_REPO=http://127.0.0.1:8383 @@ -141,20 +143,19 @@ export DOCKER_REPO=docker.io/internal kubectl tg init --namespace tigergraph --helm-repo ${HELM_REPO} --image-pull-secret yoursecret --docker-registry ${DOCKER_REPO} ``` -**Install Operator using the helm command to install it locally** +### Install Operator using the helm command to install it locally Please follow these steps to install a Helm chart: -1. Download the Helm chart you want to install. - -2. Extract the chart to a directory on your local machine. - -3. Open a terminal window and navigate to the directory where you extracted the chart. - -4. Modify the default configuration of Operator by editing `values.yaml`. - -5. Run the following command to install the chart: - +1. Download the Helm chart you want to install. + +2. Extract the chart to a directory on your local machine. + +3. Open a terminal window and navigate to the directory where you extracted the chart. + +4. Modify the default configuration of Operator by editing `values.yaml`. + +5. Run the following command to install the chart: Customize the operator configuration via values.yaml, we should change the image filed to your internal docker repo. @@ -166,7 +167,9 @@ Customize the operator configuration via values.yaml, we should change the image # Default values for deployment replicas of operator replicas: 3 # image is the docker image of operator -image: tigergraph/tigergraph-k8s-operator:0.0.7 +image: tigergraph-k8s-operator +# jobImange is the docker image of cluster operation(int, upgrade, scale and so on) job +jobImage: tigergraph-k8s-init pullPolicy: IfNotPresent # imagePullSecret is the docker image pull secret of operator imagePullSecret: tigergraph-image-pull-secret @@ -225,7 +228,7 @@ tigergraph-operator-controller-manager-7cfc4476c7-76msk 2/2 Running 0 tigergraph-operator-controller-manager-7cfc4476c7-k8425 2/2 Running 0 5m8s ``` -**Deploy TG cluster** +### Deploy TG cluster If your Docker registry necessitates authentication, you need to create an image pull secret. Please make the necessary adjustments to the namespace based on your environment. @@ -268,19 +271,12 @@ kind: TigerGraph metadata: name: test-cluster spec: - image: docker.io/tginternal/tigergraph-k8s:3.9.2 + image: ${DOCKER_REGISTRY}/tigergraph-k8s:3.9.2 imagePullPolicy: Always imagePullSecrets: - name: tigergraph-image-pull-secret - initJob: - image: docker.io/tginternal/tigergraph-k8s-init:0.0.7 - imagePullPolicy: Always - imagePullSecrets: - - name: tigergraph-image-pull-secret - initTGConfig: - ha: 2 - license: xxxxxxxxxxxx - version: 3.9.2 + ha: 2 + license: xxxxxxxxxxxx listener: type: LoadBalancer privateKeyName: ssh-key-secret @@ -299,4 +295,4 @@ spec: storage: 100G storageClassName: standard volumeMode: Filesystem -``` \ No newline at end of file +``` diff --git a/k8s/docs/03-deploy/lifecycle-of-tigergraph.md b/k8s/docs/03-deploy/lifecycle-of-tigergraph.md new file mode 100644 index 00000000..09027293 --- /dev/null +++ b/k8s/docs/03-deploy/lifecycle-of-tigergraph.md @@ -0,0 +1,63 @@ +# Lifecycle of TigerGraph + +TigerGraph CR follows a designed lifecycle to manage the TigerGraph cluster. Starting from `InitializeRoll` status, moving to `InitializePost` status. In `InitializePost` status, TigerGraph Operator will create an init-job to initialize TigerGraph system. After the init-job is finished, TigerGraph Operator will move the TigerGraph CR to `Normal` status. + +- [Lifecycle of TigerGraph](#lifecycle-of-tigergraph) + - [Configure Lifecycle Hooks in TigerGraph CR](#configure-lifecycle-hooks-in-tigergraph-cr) + - [PostInitAction](#postinitaction) + - [Configure Lifecycle Hooks by kubectl-tg](#configure-lifecycle-hooks-by-kubectl-tg) + - [--post-init-action](#--post-init-action) + +## Configure Lifecycle Hooks in TigerGraph CR + +### PostInitAction + +You may want to execute some commands in **TigerGraph container** once the TigerGraph system is initialized. We provide a field `.spec.lifecycle.postInitAction` in TigerGraph CR to support this. You can specify a bash script in this field, and the script will be put into the init-job and be executed in the first TigerGraph pod(whose suffix is `-0`) after the TigerGraph system is initialized. For example: + +```yaml +spec: + lifecycle: + postInitAction: | + echo "This is a post init action" >> /tmp/post-init-action.log +``` + +If you configure the above post init action, the command will be executed in the first TigerGraph pods(whose suffix is `-0`) after the TigerGraph system is initialized. You can check the log in the pod: + +```bash +kubectl exec -ti test-cluster-0 -n tigergraph -- cat /tmp/post-init-action.log +``` + +The output should be: + +```bash +This is a post init action +``` + +> [!WARNING] +> If your PostInitAction failed, the whole init-job will be considered as failed, and the TigerGraph CR will be moved to `InitializePost,False` status. +> So please make sure your PostInitAction is correct before you configure it, and you should also handle any errors in the PostInitAction script. +> If your PostInitAction failed and the cluster is in `InitializePost,False` status, you can modify the TigerGraph CR to correct the PostInitAction, +> then operator will create a new init-job to execute the PostInitAction again. + +## Configure Lifecycle Hooks by kubectl-tg + +### --post-init-action + +You may want to execute some commands in **TigerGraph container** once the TigerGraph system is initialized. We provide a flag `--post-init-action` in `kubectl tg create` and `kubectl tg update` to support this. You can specify a bash script in this flag, and the script will be put into the init-job and be executed in one of the TigerGraph pods after the TigerGraph system is initialized. Create a script file `post-init-action.sh`: + +```bash +echo "This is a post init action" >> /tmp/post-init-action.log +``` + +Then you can create a cluster with this post init action by: + +```bash +kubectl tg create --cluster-name test-cluster --namespace tigergraph \ + --post-init-action post-init-action.sh ${OTHER_OPTIONS} +``` + +> [!WARNING] +> If your PostInitAction failed, the whole init-job will be considered as failed, and the TigerGraph CR will be moved to `InitializePost,False` status. +> So please make sure your PostInitAction is correct before you configure it, and you should also handle any errors in the PostInitAction script. +> If your PostInitAction failed and the cluster is in `InitializePost,False` status, you can use `kubectl tg update` to correct the PostInitAction, +> then operator will create a new init-job to execute the PostInitAction again. diff --git a/k8s/docs/03-deploy/multiple-persistent-volumes-mounting.md b/k8s/docs/03-deploy/multiple-persistent-volumes-mounting.md new file mode 100644 index 00000000..b8004d0a --- /dev/null +++ b/k8s/docs/03-deploy/multiple-persistent-volumes-mounting.md @@ -0,0 +1,379 @@ +# Multiple persistent volumes mounting + +- [Multiple persistent volumes mounting](#multiple-persistent-volumes-mounting) + - [Mounting PVs for components reserved for the cluster](#mounting-pvs-for-components-reserved-for-the-cluster) + - [Mounting a dedicated PV for Kafka](#mounting-a-dedicated-pv-for-kafka) + - [Mounting a dedicated PV for TigerGraph logs](#mounting-a-dedicated-pv-for-tigergraph-logs) + - [Mounting PVs for custom containers(Init, sidecar containers, and TigerGraph containers)](#mounting-pvs-for-custom-containersinit-sidecar-containers-and-tigergraph-containers) + - [Rules for Creating Custom PVs](#rules-for-creating-custom-pvs) + - [YAML Configuration](#yaml-configuration) + - [kubectl-tg Operation](#kubectl-tg-operation) + - [Mounting Existing PVs to Customize Volume Mounts of TigerGraph Containers](#mounting-existing-pvs-to-customize-volume-mounts-of-tigergraph-containers) + - [Rules for Mounting Existing PVs](#rules-for-mounting-existing-pvs) + - [YAML Configuration For Mounting Existing PVs](#yaml-configuration-for-mounting-existing-pvs) + - [kubectl-tg Operation For Mounting Existing PVs](#kubectl-tg-operation-for-mounting-existing-pvs) + - [See also](#see-also) + +The TigerGraph Operator supports the mounting of multiple persistent volumes (PVs) for TigerGraph pods, enabling more efficient data storage for different purposes. + +Prior to TigerGraph Operator version 0.0.9, all TigerGraph data was stored in a single disk (one PV), limiting scalability and negatively impacting disk performance. To address this, the TigerGraph Operator now supports the mounting of dedicated PVs for Kafka and TigerGraph logs, thereby enhancing overall performance. + +Moreover, meeting distinct requirements for mounting an existing PV, as opposed to mounting an exclusive PV for each TigerGraph pod, can be achieved by configuring a custom volume for the TigerGraph pod and custom mount paths for the TigerGraph container. + +> [!WARNING] +> Multiple persistent volumes mounting is only supported when creating a TigerGraph cluster using the Operator. If you already have an existing TigerGraph cluster on Kubernetes, you must recreate the cluster to meet this requirement. + +## Mounting PVs for components reserved for the cluster + +To improve the disk I/O performance of a TigerGraph cluster, dedicated PVs can be mounted for Kafka and TigerGraph logs. + +> [!WARNING] +> To utilize a dedicated PV for Kafka in an existing TigerGraph cluster, backup and restore operations must be employed to fulfill this requirement. + +### Mounting a dedicated PV for Kafka + +- YAML Configuration + +To mount a dedicated PV for Kafka, add a new storage named `tg-kafka` under the spec section's `additionalStorages`. The `storageClassName` attribute is optional, providing flexibility in specifying a custom class or using the main storage class. + +```YAML + storage: + type: persistent-claim + volumeClaimTemplate: + resources: + requests: + storage: 100G + storageClassName: pd-standard + volumeMode: Filesystem + additionalStorages: + - name: tg-kafka + storageClassName: pd-ssd + storageSize: 10Gi +``` + +- kubectl-tg Operation + +> [!NOTE] +> Other parameters required to create a cluster are omitted here. Please refer to other documents. + +```bash +kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --additional-storages additional-storage-kafka.yaml +``` + +Example additional storage YAML file: + +```YAML +additionalStorages: + - name: tg-kafka + storageClassName: pd-ssd + storageSize: 10Gi +``` + +### Mounting a dedicated PV for TigerGraph logs + +- YAML Configuration + +To mount a dedicated PV for TigerGraph logs, add a new storage named `tg-log` under spec `additionalStorages`. The `storageClassName` attribute is optional, providing flexibility in specifying a custom class or using the main storage class. The mount path for TigerGraph logs is set to `/home/tigergraph/tigergraph/log`. + +```YAML + storage: + type: persistent-claim + volumeClaimTemplate: + resources: + requests: + storage: 100G + storageClassName: pd-standard + volumeMode: Filesystem + additionalStorages: + - name: tg-log + storageClassName: pd-standard + storageSize: 5Gi +``` + +- kubectl-tg Operation + +> [!NOTE] +> Other parameters required to create a cluster are omitted here. Please refer to other documents. + +```bash +kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --additional-storages additional-storage-tg-logs.yaml +``` + +Example additional storage YAML file: + +```YAML +additionalStorages: + - name: tg-log + storageClassName: pd-standard + storageSize: 5Gi +``` + +## Mounting PVs for custom containers(Init, sidecar containers, and TigerGraph containers) + +TigerGraph Operator supports the creation and management of PVs for custom containers using the `additionalStorages` specification. This eliminates the need to manually create persistent volumes for Init and sidecar containers. + +### Rules for Creating Custom PVs + +- Storage Names: Do not use reserved storage names for custom PVs. + - config-volume + - private-key-volume + - tg-data + - tg-gstore + +- Mount Paths: You can mount custom PVs to specific paths in TigerGraph containers, but do not use reserved mount paths. + - /tmp/init_tg_cfg + - /etc/private-key-volume + - /home/tigergraph/tigergraph/data/kafka-mntptr + - /home/tigergraph/tigergraph/data + - /home/tigergraph/tigergraph/log + - /home/tigergraph/tigergraph/data/gstore + +### YAML Configuration + +The following example demonstrates how to mount a custom PV for a sidecar container (e.g.,tg-sidecar) and a custom PV to the path `/home/tigergraph/backup` of the TigerGraph container. + +The mountPath attribute is optional for `additionalStorages`. It will mount the PV to this path if you configure it for the specific additional storage. + +```YAML +apiVersion: graphdb.tigergraph.com/v1alpha1 +kind: TigerGraph +metadata: + name: test-cluster + namespace: tigergraph +spec: + ha: 2 + image: docker.io/tigergraph/tigergraph-k8s:3.9.3 + imagePullPolicy: Always + imagePullSecrets: + - name: tigergraph-image-pull-secret + license: xxxxxx + listener: + type: LoadBalancer + privateKeyName: ssh-key-secret + replicas: 3 + resources: + requests: + cpu: "6" + memory: 12Gi + storage: + type: persistent-claim + volumeClaimTemplate: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100G + storageClassName: gp2 + volumeMode: Filesystem + additionalStorages: + - name: tg-kafka + storageSize: 5Gi + - name: tg-log + storageSize: 5Gi + - name: tg-sidecar + storageClassName: efs-sc + storageSize: 5Gi + accessMode: ReadWriteMany + volumeMode: Filesystem + - name: tg-backup + storageSize: 5Gi + mountPath: /home/tigergraph/backup + accessMode: ReadWriteOnce + volumeMode: Filesystem + initContainers: + - image: alpine:3.17.2 + name: init-container + args: + - /bin/sh + - -c + - echo hello + sidecarContainers: + - args: # sidecar will execute this + - /bin/sh + - -c + - | + while true; do + echo "$(date) INFO hello from main-container" >> /tg-sidecar/myapp.log ; + sleep 1; + done + image: alpine:3.17.2 + name: sidecar-container # name of sidecar + readinessProbe: # check if the sidecar is ready + exec: + command: + - sh + - -c + - if [[ -f /tg-sidecar/myapp.log ]];then exit 0; else exit 1;fi + initialDelaySeconds: 10 + periodSeconds: 5 + resources: + requests: # request resouces for sidecar + cpu: 500m + memory: 512Mi + limits: # limit resources + cpu: 500m + memory: 512Mi + env: # inject the environment you need + - name: CLUSTER_NAME + value: test-cluster + volumeMounts: + - mountPath: /tg-sidecar + name: tg-sidecar +``` + +### kubectl-tg Operation + +```bash +kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --additional-storages additional-storage-tg-logs.yaml \ +--custom-containers custom-containers.yaml +``` + +Example additional storage YAML file: + +```YAML +additionalStorages: + - name: tg-kafka + storageSize: 5Gi + - name: tg-log + storageSize: 5Gi + - name: tg-sidecar + storageClassName: efs-sc + storageSize: 5Gi + accessMode: ReadWriteMany + volumeMode: Filesystem + - name: tg-backup + storageSize: 5Gi + mountPath: /home/tigergraph/backup + accessMode: ReadWriteOnce + volumeMode: Filesystem +``` + +Example custom containers YAML file: + +```YAML +initContainers: +- image: alpine:3.17.2 + name: init-container + args: + - /bin/sh + - -c + - echo hello +sidecarContainers: + - args: # sidecar will execute this + - /bin/sh + - -c + - | + while true; do + echo "$(date) INFO hello from main-container" >> /tg-sidecar/myapp.log ; + sleep 1; + done + image: alpine:3.17.2 + name: sidecar-container # name of sidecar + readinessProbe: # check if the sidecar is ready + exec: + command: + - sh + - -c + - if [[ -f /tg-sidecar/myapp.log ]];then exit 0; else exit 1;fi + initialDelaySeconds: 10 + periodSeconds: 5 + resources: + requests: # request resouces for sidecar + cpu: 500m + memory: 512Mi + limits: # limit resources + cpu: 500m + memory: 512Mi + env: # inject the environment you need + - name: CLUSTER_NAME + value: test-cluster + volumeMounts: + - mountPath: /tg-sidecar + name: tg-sidecar +``` + +## Mounting Existing PVs to Customize Volume Mounts of TigerGraph Containers + +If you have created a shared PV (e.g., EFS on AWS), you can mount existing PVs to customize volume mounts of TigerGraph containers for data sharing. + +### Rules for Mounting Existing PVs + +Do not use the following reserved paths when mounting existing PVs: + +- /tmp/init_tg_cfg +- /etc/private-key-volume +- /home/tigergraph/tigergraph/data/kafka-mntptr +- /home/tigergraph/tigergraph/data +- /home/tigergraph/tigergraph/log +- /home/tigergraph/tigergraph/data/gstore + +### YAML Configuration For Mounting Existing PVs + +```YAML +apiVersion: graphdb.tigergraph.com/v1alpha1 +kind: TigerGraph +metadata: + name: test-cluster2 + namespace: tigergraph +spec: + ha: 1 + image: docker.io/tigergraph/tigergraph-k8s:3.9.3 + imagePullPolicy: Always + imagePullSecrets: + - name: tigergraph-image-pull-secret + license: xxxxxx + listener: + type: LoadBalancer + privateKeyName: ssh-key-secret + replicas: 1 + resources: + requests: + cpu: "6" + memory: 12Gi + storage: + type: persistent-claim + volumeClaimTemplate: + resources: + requests: + storage: 100G + storageClassName: efs-sc + volumeMode: Filesystem + customVolumes: + - name: efs-storage + persistentVolumeClaim: + claimName: efs-claim + customVolumeMounts: + - name: efs-storage + mountPath: /efs-data +``` + +### kubectl-tg Operation For Mounting Existing PVs + +```bash + +kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --custom-volumes additional-storage-tg-logs.yaml \ +--custom-volume-mounts custom-volume-mounts.yaml +``` + +Example custom volumes YAML file: + +```YAML +customVolumes: +- name: efs-storage + persistentVolumeClaim: + claimName: efs-claim +``` + +Example custom volume mounts YAML file: + +```YAML +customVolumeMounts: +- name: efs-storage + mountPath: /efs-data +``` + +## See also + +If you are interested in creating static and dynamic persistent volume storage, refer to the following document: + +- [How to use static & dynamic persistent volume storage](../07-reference/static-and-dynamic-persistent-volume-storage.md) diff --git a/k8s/docs/03-deploy/tigergraph-on-eks.md b/k8s/docs/03-deploy/tigergraph-on-eks.md index 3a15b5c7..f146af32 100644 --- a/k8s/docs/03-deploy/tigergraph-on-eks.md +++ b/k8s/docs/03-deploy/tigergraph-on-eks.md @@ -1,29 +1,40 @@ -

Deploy TigerGraph on AWS EKS

+# Deploy TigerGraph on AWS EKS This user manual provides detailed instructions on deploying a TigerGraph cluster on AWS EKS (Elastic Kubernetes Service). -- [Prerequisites](#prerequisites) -- [Deploy TigerGraph Operator](#deploy-tigergraph-operator) - - [Install cert-manager for EKS](#install-cert-manager-for-eks) - - [Install the kubectl-tg Plugin](#install-the-kubectl-tg-plugin) - - [Optional: Install CRDs Independently](#optional-install-crds-independently) - - [Install TigerGraph Operator](#install-tigergraph-operator) -- [Deploy a TigerGraph Cluster](#deploy-a-tigergraph-cluster) - - [Providing a Private SSH Key Pair for Enhanced Security](#providing-a-private-ssh-key-pair-for-enhanced-security) - - [Specify the StorageClass Name](#specify-the-storageclass-name) - - [Create a TigerGraph Cluster with Specific Options](#create-a-tigergraph-cluster-with-specific-options) -- [Connect to a TigerGraph Cluster](#connect-to-a-tigergraph-cluster) - - [Connect to a TigerGraph Cluster Pod](#connect-to-a-tigergraph-cluster-pod) - - [Access TigerGraph Suite](#access-tigergraph-suite) - - [Access RESTPP API Service](#access-restpp-api-service) -- [Upgrade a TigerGraph Cluster](#upgrade-a-tigergraph-cluster) -- [Scale a TigerGraph Cluster](#scale-a-tigergraph-cluster) -- [Update Resources (CPU and Memory) of the TigerGraph Cluster](#update-resources-cpu-and-memory-of-the-tigergraph-cluster) -- [Destroy the TigerGraph Cluster and the Kubernetes Operator](#destroy-the-tigergraph-cluster-and-the-kubernetes-operator) - - [Destroy the TigerGraph Cluster](#destroy-the-tigergraph-cluster) - - [Uninstall TigerGraph Operator](#uninstall-tigergraph-operator) - - [Uninstall the Custom Resource Definitions (CRDs)](#uninstall-the-custom-resource-definitions-crds) -- [See also](#see-also) +- [Deploy TigerGraph on AWS EKS](#deploy-tigergraph-on-aws-eks) + - [Prerequisites](#prerequisites) + - [Deploy TigerGraph Operator](#deploy-tigergraph-operator) + - [Install cert-manager for EKS](#install-cert-manager-for-eks) + - [Install the kubectl-tg Plugin](#install-the-kubectl-tg-plugin) + - [Optional: Install CRDs Independently](#optional-install-crds-independently) + - [Install TigerGraph Operator](#install-tigergraph-operator) + - [Deploy a TigerGraph Cluster](#deploy-a-tigergraph-cluster) + - [Install EBS CSI driver](#install-ebs-csi-driver) + - [Providing a Private SSH Key Pair for Enhanced Security](#providing-a-private-ssh-key-pair-for-enhanced-security) + - [Specify the StorageClass Name](#specify-the-storageclass-name) + - [Specify the additional Storage for mounting multiple PVs(Optional)](#specify-the-additional-storage-for-mounting-multiple-pvsoptional) + - [Customize configurations for the TigerGraph system (Optional)](#customize-configurations-for-the-tigergraph-system-optional) + - [Create a TigerGraph Cluster with Specific Options](#create-a-tigergraph-cluster-with-specific-options) + - [Troubleshooting TigerGraph cluster deployment on EKS](#troubleshooting-tigergraph-cluster-deployment-on-eks) + - [EBS CSI driver not installed](#ebs-csi-driver-not-installed) + - [Node IAM role missing policy AmazonEBSCSIDriverPolicy](#node-iam-role-missing-policy-amazonebscsidriverpolicy) + - [Connect to a TigerGraph Cluster](#connect-to-a-tigergraph-cluster) + - [Connect to a TigerGraph Cluster Pod](#connect-to-a-tigergraph-cluster-pod) + - [Access TigerGraph Services](#access-tigergraph-services) + - [Verify the API service](#verify-the-api-service) + - [Verify the RESTPP API service](#verify-the-restpp-api-service) + - [Verify the Metrics API service](#verify-the-metrics-api-service) + - [Upgrade a TigerGraph Cluster](#upgrade-a-tigergraph-cluster) + - [Update system configurations and license of the TigerGraph cluster](#update-system-configurations-and-license-of-the-tigergraph-cluster) + - [Scale a TigerGraph Cluster](#scale-a-tigergraph-cluster) + - [Change the HA factor of the TigerGraph cluster](#change-the-ha-factor-of-the-tigergraph-cluster) + - [Update Resources (CPU and Memory) of the TigerGraph Cluster](#update-resources-cpu-and-memory-of-the-tigergraph-cluster) + - [Destroy the TigerGraph Cluster and the Kubernetes Operator](#destroy-the-tigergraph-cluster-and-the-kubernetes-operator) + - [Destroy the TigerGraph Cluster](#destroy-the-tigergraph-cluster) + - [Uninstall TigerGraph Operator](#uninstall-tigergraph-operator) + - [Uninstall the Custom Resource Definitions (CRDs)](#uninstall-the-custom-resource-definitions-crds) + - [See also](#see-also) ## Prerequisites @@ -35,7 +46,17 @@ Before proceeding with the deployment, make sure you have the following prerequi - [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) installed with the latest version. This will be used to install the EBS CSI driver `aws-ebs-csi-driver` if necessary. -- An existing [EKS cluster](https://docs.aws.amazon.com/eks/latest/userguide/create-cluster.html) with admin role permissions. +- An existing [EKS cluster](https://docs.aws.amazon.com/eks/latest/userguide/create-cluster.html) with appropriate IAM permissions: + + - The EKS cluster requires an IAM role with the following AWS-managed IAM policies attached: + - arn:aws:iam::aws:policy/AmazonEKSClusterPolicy + - arn:aws:iam::aws:policy/AmazonEKSServicePolicy + - The EKS node group requires an IAM role with the following AWS-managed IAM policies attached: + - arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy + - arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy + - arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly + - arn:aws:iam::aws:policy/AmazonEKSClusterPolicy + - arn:aws:iam::aws:policy/AmazonEKSVPCResourceController ## Deploy TigerGraph Operator @@ -109,6 +130,9 @@ Now, you can install the TigerGraph Operator using the following commands: A namespace-scoped operator watches and manages resources in a single Namespace, whereas a cluster-scoped operator watches and manages resources cluster-wide. +> [!IMPORTANT] +> Namespace-scoped operators require the same operator version to be installed for different namespaces. + - For a namespace-scoped Operator: ```bash @@ -142,6 +166,24 @@ To verify the successful deployment of the Operator, use the following command: This section explains how to deploy a TigerGraph cluster on EKS using the `kubectl-tg` plugin and a CR (Custom Resource) YAML manifest. +### Install EBS CSI driver + +For specific EKS version, EBS CSI is not installed by default, please refer to the below official documents install the driver manually. + +The Amazon Elastic Block Store (Amazon EBS) Container Storage Interface (CSI) driver manages the lifecycle of Amazon EBS volumes as storage for the Kubernetes Volumes that you create. [Official AWS Documentation](https://docs.aws.amazon.com/eks/latest/userguide/ebs-csi.html) + +- Install EBS CSI driver + + ```bash + aws eks create-addon --cluster-name ${YOUR_CLUSTER_NAME} --addon-name aws-ebs-csi-driver + ``` + +- Wait for the Amazon EBS CSI Driver deployment (ebs-csi-controller) to become available in the EKS cluster + + ```bash + kubectl wait --for=condition=Available=True deployment/ebs-csi-controller -n kube-system + ``` + ### Providing a Private SSH Key Pair for Enhanced Security Starting from Operator version 0.0.4, users are required to provide their private SSH key pair for enhanced security before creating a cluster. Follow these steps: @@ -154,8 +196,8 @@ Starting from Operator version 0.0.4, users are required to provide their privat - Step 2: create a Secret Object - > [!IMPORTANT] - > The namespace of the Secret object must be the same as that of the TigerGraph cluster. +> [!IMPORTANT] +> The namespace of the Secret object must be the same as that of the TigerGraph cluster. Create a secret object based on the private SSH key file generated in step 1. Ensure that the key name of the secret for the private SSH key is `private-ssh-key`, and the key name for the public SSH key is `public-ssh-key`. Do not alter these key names: @@ -183,6 +225,118 @@ gp2 (default) kubernetes.io/aws-ebs Delete WaitForFirstConsumer f Choose the appropriate StorageClass (e.g., `gp2`) when creating the TigerGraph cluster, ensuring optimized storage provisioning and management. +### Specify the additional Storage for mounting multiple PVs(Optional) + +You can specify multiple PVs for TigerGraph Pods by specifying the `--additional-storages` option. The value of this option is a YAML file configuration. For example: + +> [!NOTE] +> Other parameters required to create a cluster are omitted here. + +```bash +kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --additional-storages additional-storage-tg-logs.yaml +``` + +Example additional storage YAML file: + +```YAML +additionalStorages: + - name: tg-kafka + storageSize: 5Gi + - name: tg-log + storageSize: 5Gi + - name: tg-sidecar + storageClassName: efs-sc + storageSize: 5Gi + accessMode: ReadWriteMany + volumeMode: Filesystem + - name: tg-backup + storageSize: 5Gi + mountPath: /home/tigergraph/backup + accessMode: ReadWriteOnce + volumeMode: Filesystem +``` + +You can also specify the multiple PVs using CR configuration, For more information, see [Multiple persistent volumes mounting](../03-deploy/multiple-persistent-volumes-mounting.md) + +### Customize configurations for the TigerGraph system (Optional) + +You can customize the configurations for the TigerGraph system by specifying the `--tigergraph-config` option. The value of this option should be key-value pairs separated by commas. For example: + +```bash + --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" +``` + + The key-value pairs are the same as the configurations that can be set by `gadmin config set` command. For more information, see [Configuration Parameters](https://docs.tigergraph.com/tigergraph-server/current/reference/configuration-parameters). All configurations will be applied to the TigerGraph system when the cluster is initializing. + +### Create a TigerGraph Cluster with Specific Options + +You can create a new TigerGraph cluster with specific options, such as size, high availability, version, license, and resource specifications. Here's an example: + +You must provide your license key when creating cluster. Contact TigerGraph support for help finding your license key. + +- Export license key as an environment variable + + ```bash + export LICENSE= + ``` + +- Create TigerGraph cluster with kubectl-tg plugin + + ```bash + kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --private-key-secret ${YOUR_SSH_KEY_SECRET_NAME} --size 3 --ha 2 --version 3.9.3 --license ${LICENSE} \ + --storage-class gp2 --storage-size 100G --cpu 6000m --memory 16Gi --namespace ${YOUR_NAMESPACE} + ``` + +- Create TigerGraph cluster with CR(Custom Resource) YAML manifest + +> [!NOTE] +> Please replace the TigerGraph version (e.g., 3.9.3) and the Operator version (e.g., 0.0.9) with your desired versions. + + ```bash + cat < [!WARNING] > For specific EKS version, you may encounter the following problems, TigerGraph pods will be in pending state because of the PVC pending state. @@ -235,89 +389,77 @@ Choose the appropriate StorageClass (e.g., `gp2`) when creating the TigerGraph c Normal ExternalProvisioning 115s (x25 over 7m54s) persistentvolume-controller waiting for a volume to be created, either by external provisioner "ebs.csi.aws.com" or manually created by system administrator ``` - If you encounter the above issues, please resolve it using the following steps: +If you're facing the issues above, please check the following: - 1. Make sure that the EKS cluster has been installed EBS CSI driver +- Verify if the EBS CSI driver is correctly installed as an EKS add-on. [EBS CSI driver Installation](#install-ebs-csi-driver) +- Confirm that the EKS cluster and EKS node group have the necessary permissions. [See prerequisites](#prerequisites) - ```bash - kubectl get deployment ebs-csi-controller -n kube-system - ``` +#### Node IAM role missing policy AmazonEBSCSIDriverPolicy - 2. If not, install EBS CSI driver through the following commands +> [!WARNING] +> If Node IAM role is missing policy AmazonEBSCSIDriverPolicy, you may encounter the following issues: - > [!WARNING] - > Please ensure that the IAM role for the Amazon EBS CSI driver has been created. You can refer to the official AWS documentation [Creating the Amazon EBS CSI driver IAM role](https://docs.aws.amazon.com/eks/latest/userguide/csi-iam-role.html) for detailed instructions. +- TigerGraph Pod status ```bash - aws eks create-addon --cluster-name $YOUR_EKS_CLUSTER_NAME --addon-name aws-ebs-csi-driver + kubectl get pod -l tigergraph.com/cluster-name=${YOUR_CLUSTER_NAME} -n ${YOUR_NAMESPACE} + + NAME READY STATUS RESTARTS AGE + test-tg-cluster-0 0/1 Pending 0 5m27s + test-tg-cluster-1 0/1 Pending 0 5m27s + test-tg-cluster-2 0/1 Pending 0 5m27s ``` -### Create a TigerGraph Cluster with Specific Options - -You can create a new TigerGraph cluster with specific options, such as size, high availability, version, license, and resource specifications. Here's an example: - -- Get and export free license +- TigerGraph PVC status ```bash - export LICENSE=$(curl -L "ftp://ftp.graphtiger.com/lic/license3.txt" -o "/tmp/license3.txt" 2>/dev/null && cat /tmp/license3.txt) + kubectl get pvc -l tigergraph.com/cluster-name=${YOUR_CLUSTER_NAME} -n ${YOUR_NAMESPACE} + + NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE + tg-data-test-tg-cluster-0 Pending gp2 110s + tg-data-test-tg-cluster-1 Pending gp2 110s + tg-data-test-tg-cluster-2 Pending gp2 110s ``` -- Create TigerGraph cluster with kubectl-tg plugin +- Checking the PVC Events of one Pod ```bash - kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --private-key-secret ${YOUR_SSH_KEY_SECRET_NAME} --size 3 --ha 2 --version 3.9.3 --license ${LICENSE} \ - --storage-class gp2 --storage-size 100G --cpu 6000m --memory 16Gi --namespace ${YOUR_NAMESPACE} - ``` -- Create TigerGraph cluster with CR(Custom Resource) YAML manifest - > [!NOTE] - > Please replace the TigerGraph version (e.g., 3.9.3) and the Operator version (e.g., 0.0.9) with your desired versions. - - ```bash - cat < [!WARNING] @@ -415,7 +567,15 @@ Before scaling out the cluster, ensure that the corresponding node pool is scale kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --size 6 --ha 2 --namespace ${YOUR_NAMESPACE} ``` -The above command scales the cluster to a size of 6 with a high availability factor of 2. If you prefer to use a CR (Custom Resource) YAML manifest for scaling, update the `spec.replicas` and `spec.initTGConfig.ha` fields accordingly. +The above command scales the cluster to a size of 6 with a high availability factor of 2. If you prefer to use a CR (Custom Resource) YAML manifest for scaling, update the `spec.replicas` and `spec.ha` fields accordingly. + +### Change the HA factor of the TigerGraph cluster + +From Operator version 0.1.0, you can change the HA factor of the TigerGraph cluster without updating size by using the following command: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --ha ${NEW_HA} --namespace ${YOUR_NAMESPACE} +``` ## Update Resources (CPU and Memory) of the TigerGraph Cluster diff --git a/k8s/docs/03-deploy/tigergraph-on-gke.md b/k8s/docs/03-deploy/tigergraph-on-gke.md index a732ced8..9891900d 100644 --- a/k8s/docs/03-deploy/tigergraph-on-gke.md +++ b/k8s/docs/03-deploy/tigergraph-on-gke.md @@ -12,14 +12,20 @@ This comprehensive document provides step-by-step instructions on deploying a Ti - [Deploy a TigerGraph cluster](#deploy-a-tigergraph-cluster) - [Providing Private SSH Key Pair for Enhanced Security](#providing-private-ssh-key-pair-for-enhanced-security) - [Specify the StorageClass name](#specify-the-storageclass-name) + - [Specify the additional Storage for mounting multiple PVs(Optional)](#specify-the-additional-storage-for-mounting-multiple-pvsoptional) + - [Customize configurations for the TigerGraph system (Optional)](#customize-configurations-for-the-tigergraph-system-optional) - [Create TG cluster with specific options](#create-tg-cluster-with-specific-options) - [Connect to a TigerGraph cluster](#connect-to-a-tigergraph-cluster) - [Connect to a TigerGraph cluster Pod](#connect-to-a-tigergraph-cluster-pod) - - [Access TigerGraph Suite](#access-tigergraph-suite) - - [Access RESTPP API Service](#access-restpp-api-service) + - [Access TigerGraph Services](#access-tigergraph-services) + - [Verify the API service](#verify-the-api-service) + - [Verify the RESTPP API service](#verify-the-restpp-api-service) + - [Verify the Metrics API service](#verify-the-metrics-api-service) - [Upgrade a TigerGraph cluster](#upgrade-a-tigergraph-cluster) - [Scale a TigerGraph cluster](#scale-a-tigergraph-cluster) + - [Change the HA factor of the TigerGraph cluster](#change-the-ha-factor-of-the-tigergraph-cluster) - [Update the resources(CPU and Memory) of the TigerGraph cluster](#update-the-resourcescpu-and-memory-of-the-tigergraph-cluster) + - [Update system configurations and license of the TigerGraph cluster](#update-system-configurations-and-license-of-the-tigergraph-cluster) - [Destroy the TigerGraph cluster and the Kubernetes Operator](#destroy-the-tigergraph-cluster-and-the-kubernetes-operator) - [Destroy the TigerGraph cluster](#destroy-the-tigergraph-cluster) - [Uninstall TigerGraph Operator](#uninstall-tigergraph-operator) @@ -103,6 +109,9 @@ Now, you can install the TigerGraph Operator based on your requirements: A namespace-scoped operator watches and manages resources in a single Namespace, whereas a cluster-scoped operator watches and manages resources cluster-wide. +> [!IMPORTANT] +> Namespace-scoped operators require the same operator version to be installed for different namespaces. + - Install a namespace-scoped Operator: ```bash @@ -151,8 +160,8 @@ Starting from Operator version 0.0.4, users are required to provide their privat - Step 2: create a Secret Object - > [!IMPORTANT] - > The namespace of the Secret object must be the same as that of the TigerGraph cluster. +> [!IMPORTANT] +> The namespace of the Secret object must be the same as that of the TigerGraph cluster. Create a secret object based on the private SSH key file generated in step 1. Ensure that the key name of the secret for the private SSH key is `private-ssh-key`, and the key name for the public SSH key is `public-ssh-key`. Do not alter these key names: @@ -182,18 +191,61 @@ standard-rwo (default) pd.csi.storage.gke.io Delete WaitForFirstCon Choose the appropriate StorageClass (e.g., `standard`) when creating the TigerGraph cluster, ensuring optimized storage provisioning and management. +### Specify the additional Storage for mounting multiple PVs(Optional) + +You can specify multiple PVs for TigerGraph Pods by specifying the `--additional-storages` option. The value of this option is a YAML file configuration. For example: + +> [!NOTE] +> Other parameters required to create a cluster are omitted here. + +```bash +kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --additional-storages additional-storage-tg-logs.yaml +``` + +Example additional storage YAML file: + +```YAML +additionalStorages: + - name: tg-kafka + storageSize: 5Gi + - name: tg-log + storageSize: 5Gi + - name: tg-sidecar + storageClassName: efs-sc + storageSize: 5Gi + accessMode: ReadWriteMany + volumeMode: Filesystem + - name: tg-backup + storageSize: 5Gi + mountPath: /home/tigergraph/backup + accessMode: ReadWriteOnce + volumeMode: Filesystem +``` + +You can also specify the multiple PVs using CR configuration, For more information, see [Multiple persistent volumes mounting](../03-deploy/multiple-persistent-volumes-mounting.md) + +### Customize configurations for the TigerGraph system (Optional) + +You can customize the configurations for the TigerGraph system by specifying the `--tigergraph-config` option. The value of this option should be key-value pairs separated by commas. For example: + +```bash + --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" +``` + + The key-value pairs are the same as the configurations that can be set by `gadmin config set` command. For more information, see [Configuration Parameters](https://docs.tigergraph.com/tigergraph-server/current/reference/configuration-parameters). All configurations will be applied to the TigerGraph system when the cluster is initializing. + ### Create TG cluster with specific options To create a new TigerGraph cluster with specific options, use either the `kubectl-tg` plugin or a CR YAML manifest. Below are examples using the `kubectl-tg` plugin: You can get all of the TigerGraph docker image version from [tigergraph-k8s](https://hub.docker.com/r/tigergraph/tigergraph-k8s/tags) -The following command will create a new TigerGraph cluster with a free license: +You must provide your license key when creating cluster. Contact TigerGraph support for help finding your license key. -- Get and export free license +- Export license key as an environment variable ```bash - export LICENSE=$(curl -L "ftp://ftp.graphtiger.com/lic/license3.txt" -o "/tmp/license3.txt" 2>/dev/null && cat /tmp/license3.txt) + export LICENSE= ``` - Create TigerGraph cluster with kubectl-tg plugin @@ -217,13 +269,8 @@ The following command will create a new TigerGraph cluster with a free license: spec: image: docker.io/tigergraph/tigergraph-k8s:3.9.3 imagePullPolicy: IfNotPresent - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.9 - imagePullPolicy: IfNotPresent - initTGConfig: - ha: 2 - license: ${LICENSE} - version: 3.9.3 + ha: 2 + license: ${LICENSE} listener: type: LoadBalancer privateKeyName: ${YOUR_SSH_KEY_SECRET_NAME} @@ -265,44 +312,38 @@ To log into a single container within the TigerGraph cluster and execute command kubectl tg connect --cluster-name ${YOUR_CLUSTER_NAME} --namespace ${YOUR_NAMESPACE} ``` -### Access TigerGraph Suite +### Access TigerGraph Services -- Query the external service address. +Query the external service address: ```bash - export GUI_SERVICE_ADDRESS=$(kubectl get svc/${YOUR_CLUSTER_NAME}-gui-external-service --namespace ${YOUR_NAMESPACE} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') - - echo $GUI_SERVICE_ADDRESS - 35.225.232.251 + export EXTERNAL_SERVICE_ADDRESS=$(kubectl get svc/${YOUR_CLUSTER_NAME}-nginx-external-service --namespace ${YOUR_NAMESPACE} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') ``` -- Verify the API service +#### Verify the API service ```bash - curl http://${GUI_SERVICE_ADDRESS}:14240/api/ping + curl http://${EXTERNAL_SERVICE_ADDRESS}:14240/api/ping {"error":false,"message":"pong","results":null} ``` -Access the TigerGraph Suite in your browser using the URL: http://${GUI_SERVICE_ADDRESS}:14240 (replace `GUI_SERVICE_ADDRESS` with the actual service address). +To access the TigerGraph Suite, open it in your browser using the following URL: http://${EXTERNAL_SERVICE_ADDRESS}:14240, replacing `EXTERNAL_SERVICE_ADDRESS` with the actual service address. -### Access RESTPP API Service - -- Query the external service address. +#### Verify the RESTPP API service ```bash - export RESTPP_SERVICE_ADDRESS=$(kubectl get svc/${YOUR_CLUSTER_NAME}-rest-external-service --namespace ${YOUR_NAMESPACE} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') + curl http://${EXTERNAL_SERVICE_ADDRESS}:14240/restpp/echo - echo $RESTPP_SERVICE_ADDRESS - 34.173.210.92 + {"error":false, "message":"Hello GSQL"} ``` -- Verify the RESTPP API service: +#### Verify the Metrics API service ```bash - curl http://${RESTPP_SERVICE_ADDRESS}:9000/echo +curl http://${EXTERNAL_SERVICE_ADDRESS}/informant/metrics/get/network -d '{"LatestNum":"1"}' - {"error":false, "message":"Hello GSQL"} +{"NetworkMetrics":[{"EventMeta":{"Targets":[{"ServiceName":"IFM"}],"EventId":"1ebeaf2a380f4941b371efaaceb3467b","TimestampNS":"1703666521019463773","Source":{"ServiceName":"EXE","Partition":2}},"HostID":"m2","CollectTimeStamps":"1703666521008230613","Network":{"IP":"10.244.0.79","TCPConnectionNum":89,"IncomingBytesNum":"1654215","OutgoingBytesNum":"1466486"}},{"EventMeta":{"Targets":[{"ServiceName":"IFM"}],"EventId":"2c54ed5d6ba14e789db03fd9e023219c","TimestampNS":"1703666521020024563","Source":{"ServiceName":"EXE","Partition":3}},"HostID":"m3","CollectTimeStamps":"1703666521011409133","Network":{"IP":"10.244.0.78","TCPConnectionNum":90,"IncomingBytesNum":"1637413","OutgoingBytesNum":"1726712"}},{"EventMeta":{"Targets":[{"ServiceName":"IFM"}],"EventId":"c3478943ca134530bcd3aa439521c626","TimestampNS":"1703666521019483903","Source":{"ServiceName":"EXE","Partition":1}},"HostID":"m1","CollectTimeStamps":"1703666521009116924","Network":{"IP":"10.244.0.77","TCPConnectionNum":107,"IncomingBytesNum":"1298257","OutgoingBytesNum":"1197920"}}]} ``` ## Upgrade a TigerGraph cluster @@ -324,7 +365,7 @@ Assuming the current version of the cluster is 3.9.2, you can upgrade it to vers kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --version 3.9.3 --namespace ${YOUR_NAMESPACE} ``` -If you prefer using a CR YAML manifest, update the `spec.initTGConfig.version` and `spec.image` field, and then apply it. +If you prefer using a CR YAML manifest, update the `spec.image` field, and then apply it. Ensure the successful upgrade with these commands: @@ -345,7 +386,15 @@ Before scaling the cluster, scale the corresponding node pool to provide suffici kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --size 6 --ha 2 --namespace ${YOUR_NAMESPACE} ``` -The above command scales the cluster to a size of 6 with a high availability factor of 2. If you prefer to use a CR (Custom Resource) YAML manifest for scaling, update the `spec.replicas` and `spec.initTGConfig.ha` fields accordingly. +The above command scales the cluster to a size of 6 with a high availability factor of 2. If you prefer to use a CR (Custom Resource) YAML manifest for scaling, update the `spec.replicas` and `spec.ha` fields accordingly. + +### Change the HA factor of the TigerGraph cluster + +From Operator version 0.1.0, you can change the HA factor of the TigerGraph cluster without updating size by using the following command: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --ha ${NEW_HA} --namespace ${YOUR_NAMESPACE} +``` ## Update the resources(CPU and Memory) of the TigerGraph cluster @@ -357,6 +406,26 @@ kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --cpu 8 --memory 16Gi --c For CR YAML manifests, update the `spec.resources.requests` and `spec.resources.limits` fields and apply the changes. +## Update system configurations and license of the TigerGraph cluster + +Use the following command to update the system configurations of the TigerGraph cluster: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" --namespace ${YOUR_NAMESPACE} +``` + +Use the following command to update the license of the TigerGraph cluster: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --license ${LICENSE} --namespace ${YOUR_NAMESPACE} +``` + +If you want to update both the system configurations and license of the TigerGraph cluster, please provide these two options together in one command(**recommanded**) instead of two separate commands: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" --license ${LICENSE} --namespace ${YOUR_NAMESPACE} +``` + ## Destroy the TigerGraph cluster and the Kubernetes Operator ### Destroy the TigerGraph cluster diff --git a/k8s/docs/03-deploy/tigergraph-on-openshift.md b/k8s/docs/03-deploy/tigergraph-on-openshift.md index 4da7b0f1..8bc79bbd 100644 --- a/k8s/docs/03-deploy/tigergraph-on-openshift.md +++ b/k8s/docs/03-deploy/tigergraph-on-openshift.md @@ -14,14 +14,20 @@ This document provides detailed instructions for deploying a TigerGraph cluster - [Acquire special permission](#acquire-special-permission) - [Providing a Private SSH Key Pair for Enhanced Security](#providing-a-private-ssh-key-pair-for-enhanced-security) - [Specify the StorageClass name](#specify-the-storageclass-name) + - [Specify the additional Storage for mounting multiple PVs(Optional)](#specify-the-additional-storage-for-mounting-multiple-pvsoptional) + - [Customize configurations for the TigerGraph system (Optional)](#customize-configurations-for-the-tigergraph-system-optional) - [Create TG cluster with specific options](#create-tg-cluster-with-specific-options) - [Connect to a TigerGraph cluster](#connect-to-a-tigergraph-cluster) - [Connecting to a TigerGraph cluster Pod](#connecting-to-a-tigergraph-cluster-pod) - - [Access TigerGraph Suite](#access-tigergraph-suite) - - [Access RESTPP API Service](#access-restpp-api-service) + - [Access TigerGraph Services](#access-tigergraph-services) + - [Verify the API service](#verify-the-api-service) + - [Verify the RESTPP API service](#verify-the-restpp-api-service) + - [Verify the Metrics API service](#verify-the-metrics-api-service) - [Upgrade a TigerGraph cluster](#upgrade-a-tigergraph-cluster) - [Scale a TigerGraph cluster](#scale-a-tigergraph-cluster) + - [Change the HA factor of the TigerGraph cluster](#change-the-ha-factor-of-the-tigergraph-cluster) - [Update the resources(CPU and Memory) of the TigerGraph cluster](#update-the-resourcescpu-and-memory-of-the-tigergraph-cluster) + - [Update system configurations and license of the TigerGraph cluster](#update-system-configurations-and-license-of-the-tigergraph-cluster) - [Destroy the TigerGraph cluster and the Kubernetes Operator](#destroy-the-tigergraph-cluster-and-the-kubernetes-operator) - [Destroy the TigerGraph cluster](#destroy-the-tigergraph-cluster) - [Uninstall TigerGraph Operator](#uninstall-tigergraph-operator) @@ -115,6 +121,9 @@ Install TigerGraph Operator using the following command: A namespace-scoped operator watches and manages resources in a single Namespace, whereas a cluster-scoped operator watches and manages resources cluster-wide. +> [!IMPORTANT] +> Namespace-scoped operators require the same operator version to be installed for different namespaces. + - Install a namespace-scoped Operator ```bash @@ -357,8 +366,8 @@ Starting from Operator version 0.0.4, users are required to provide their privat - Step 2: Create a Secret Object - > [!IMPORTANT] - > The namespace of the Secret object must be the same as that of the TigerGraph cluster. +> [!IMPORTANT] +> The namespace of the Secret object must be the same as that of the TigerGraph cluster. Create a secret object based on the private SSH key file generated in step 1. Ensure that the key name of the secret for the private SSH key is `private-ssh-key`, and the key name for the public SSH key is `public-ssh-key`. Do not alter these key names: @@ -388,18 +397,61 @@ With the StorageClass identified, you can proceed to create clusters using the c This process ensures that the appropriate StorageClass is assigned to your TigerGraph cluster creation, optimizing storage provisioning and management. +### Specify the additional Storage for mounting multiple PVs(Optional) + +You can specify multiple PVs for TigerGraph Pods by specifying the `--additional-storages` option. The value of this option is a YAML file configuration. For example: + +> [!NOTE] +> Other parameters required to create a cluster are omitted here. + +```bash +kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --additional-storages additional-storage-tg-logs.yaml +``` + +Example additional storage YAML file: + +```YAML +additionalStorages: + - name: tg-kafka + storageSize: 5Gi + - name: tg-log + storageSize: 5Gi + - name: tg-sidecar + storageClassName: efs-sc + storageSize: 5Gi + accessMode: ReadWriteMany + volumeMode: Filesystem + - name: tg-backup + storageSize: 5Gi + mountPath: /home/tigergraph/backup + accessMode: ReadWriteOnce + volumeMode: Filesystem +``` + +You can also specify the multiple PVs using CR configuration, For more information, see [Multiple persistent volumes mounting](../03-deploy/multiple-persistent-volumes-mounting.md) + +### Customize configurations for the TigerGraph system (Optional) + +You can customize the configurations for the TigerGraph system by specifying the `--tigergraph-config` option. The value of this option should be key-value pairs separated by commas. For example: + +```bash + --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" +``` + + The key-value pairs are the same as the configurations that can be set by `gadmin config set` command. For more information, see [Configuration Parameters](https://docs.tigergraph.com/tigergraph-server/current/reference/configuration-parameters). All configurations will be applied to the TigerGraph system when the cluster is initializing. + ### Create TG cluster with specific options To create a new TigerGraph cluster with specific options, use either the kubectl-tg plugin or a CR YAML manifest. Below are examples using the kubectl-tg plugin: You can get all of the TigerGraph docker image version from [tigergraph-k8s](https://hub.docker.com/r/tigergraph/tigergraph-k8s/tags) -The following command will create a new TigerGraph cluster with a free license: +You must provide your license key when creating cluster. Contact TigerGraph support for help finding your license key. -- Get and export free license +- Export license key as an environment variable ```bash - export LICENSE=$(curl -L "ftp://ftp.graphtiger.com/lic/license3.txt" -o "/tmp/license3.txt" 2>/dev/null && cat /tmp/license3.txt) + export LICENSE= ``` - Create TigerGraph cluster with kubectl-tg plugin @@ -421,13 +473,8 @@ The following command will create a new TigerGraph cluster with a free license: spec: image: docker.io/tigergraph/tigergraph-k8s:3.9.3 imagePullPolicy: IfNotPresent - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.9 - imagePullPolicy: IfNotPresent - initTGConfig: - ha: 2 - license: ${LICENSE} - version: 3.9.3 + ha: 2 + license: ${LICENSE} listener: type: LoadBalancer privateKeyName: ${YOUR_SSH_KEY_SECRET_NAME} @@ -469,44 +516,38 @@ To log into a single container within the TigerGraph cluster and execute command kubectl tg connect --cluster-name ${YOUR_CLUSTER_NAME} --namespace ${YOUR_NAMESPACE} ``` -### Access TigerGraph Suite +### Access TigerGraph Services -- Query the external service address. +Query the external service address: ```bash - export GUI_SERVICE_ADDRESS=$(kubectl get svc/${YOUR_CLUSTER_NAME}-gui-external-service --namespace ${YOUR_NAMESPACE} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') - - echo $GUI_SERVICE_ADDRESS - 35.225.232.251 + export EXTERNAL_SERVICE_ADDRESS=$(kubectl get svc/${YOUR_CLUSTER_NAME}-nginx-external-service --namespace ${YOUR_NAMESPACE} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') ``` -- Verify the API service +#### Verify the API service ```bash - curl http://${GUI_SERVICE_ADDRESS}:14240/api/ping + curl http://${EXTERNAL_SERVICE_ADDRESS}:14240/api/ping {"error":false,"message":"pong","results":null} ``` -Access the TigerGraph Suite in your browser using the URL: http://${GUI_SERVICE_ADDRESS}:14240 (replace `GUI_SERVICE_ADDRESS` with the actual service address). +To access the TigerGraph Suite, open it in your browser using the following URL: http://${EXTERNAL_SERVICE_ADDRESS}:14240, replacing `EXTERNAL_SERVICE_ADDRESS` with the actual service address. -### Access RESTPP API Service - -- Query the external service address. +#### Verify the RESTPP API service ```bash - export RESTPP_SERVICE_ADDRESS=$(kubectl get svc/${YOUR_CLUSTER_NAME}-rest-external-service --namespace ${YOUR_NAMESPACE} -o=jsonpath='{.status.loadBalancer.ingress[0].ip}') + curl http://${EXTERNAL_SERVICE_ADDRESS}:14240/restpp/echo - echo $RESTPP_SERVICE_ADDRESS - 34.173.210.92 + {"error":false, "message":"Hello GSQL"} ``` -- Verify the RESTPP API service: +#### Verify the Metrics API service ```bash - curl http://${RESTPP_SERVICE_ADDRESS}:9000/echo +curl http://${EXTERNAL_SERVICE_ADDRESS}/informant/metrics/get/network -d '{"LatestNum":"1"}' - {"error":false, "message":"Hello GSQL"} +{"NetworkMetrics":[{"EventMeta":{"Targets":[{"ServiceName":"IFM"}],"EventId":"1ebeaf2a380f4941b371efaaceb3467b","TimestampNS":"1703666521019463773","Source":{"ServiceName":"EXE","Partition":2}},"HostID":"m2","CollectTimeStamps":"1703666521008230613","Network":{"IP":"10.244.0.79","TCPConnectionNum":89,"IncomingBytesNum":"1654215","OutgoingBytesNum":"1466486"}},{"EventMeta":{"Targets":[{"ServiceName":"IFM"}],"EventId":"2c54ed5d6ba14e789db03fd9e023219c","TimestampNS":"1703666521020024563","Source":{"ServiceName":"EXE","Partition":3}},"HostID":"m3","CollectTimeStamps":"1703666521011409133","Network":{"IP":"10.244.0.78","TCPConnectionNum":90,"IncomingBytesNum":"1637413","OutgoingBytesNum":"1726712"}},{"EventMeta":{"Targets":[{"ServiceName":"IFM"}],"EventId":"c3478943ca134530bcd3aa439521c626","TimestampNS":"1703666521019483903","Source":{"ServiceName":"EXE","Partition":1}},"HostID":"m1","CollectTimeStamps":"1703666521009116924","Network":{"IP":"10.244.0.77","TCPConnectionNum":107,"IncomingBytesNum":"1298257","OutgoingBytesNum":"1197920"}}]} ``` ## Upgrade a TigerGraph cluster @@ -528,7 +569,7 @@ Assuming the current version of the cluster is 3.9.2, you can upgrade it to vers kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --version 3.9.3 --namespace ${YOUR_NAMESPACE} ``` -If you prefer using a CR YAML manifest, update the `spec.initTGConfig.version` and `spec.image` field, and then apply it. +If you prefer using a CR YAML manifest, update the `spec.version` and `spec.image` field, and then apply it. Ensure the successful upgrade with these commands: @@ -549,7 +590,15 @@ Before scaling the cluster, scale the corresponding node pool to provide suffici kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --size 6 --ha 2 --namespace ${YOUR_NAMESPACE} ``` -The above command scales the cluster to a size of 6 with a high availability factor of 2. If you prefer to use a CR (Custom Resource) YAML manifest for scaling, update the `spec.replicas` and `spec.initTGConfig.ha` fields accordingly. +The above command scales the cluster to a size of 6 with a high availability factor of 2. If you prefer to use a CR (Custom Resource) YAML manifest for scaling, update the `spec.replicas` and `spec.ha` fields accordingly. + +### Change the HA factor of the TigerGraph cluster + +From Operator version 0.1.0, you can change the HA factor of the TigerGraph cluster without updating size by using the following command: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --ha ${NEW_HA} --namespace ${YOUR_NAMESPACE} +``` ## Update the resources(CPU and Memory) of the TigerGraph cluster @@ -561,6 +610,26 @@ kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --cpu 8 --memory 16Gi --c For CR YAML manifests, update the `spec.resources.requests` and `spec.resources.limits` fields and apply the changes. +## Update system configurations and license of the TigerGraph cluster + +Use the following command to update the system configurations of the TigerGraph cluster: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" --namespace ${YOUR_NAMESPACE} +``` + +Use the following command to update the license of the TigerGraph cluster: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --license ${LICENSE} --namespace ${YOUR_NAMESPACE} +``` + +If you want to update both the system configurations and license of the TigerGraph cluster, please provide these two options together in one command(**recommanded**) instead of two separate commands: + +```bash +kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --tigergraph-config "System.Backup.TimeoutSec=900,Controller.BasicConfig.LogConfig.LogFileMaxSizeMB=40" --license ${LICENSE} --namespace ${YOUR_NAMESPACE} +``` + ## Destroy the TigerGraph cluster and the Kubernetes Operator ### Destroy the TigerGraph cluster diff --git a/k8s/docs/03-deploy/use-custom-containers-by-kubectl-tg.md b/k8s/docs/03-deploy/use-custom-containers-by-kubectl-tg.md index 8fafb4dd..91cf65b9 100644 --- a/k8s/docs/03-deploy/use-custom-containers-by-kubectl-tg.md +++ b/k8s/docs/03-deploy/use-custom-containers-by-kubectl-tg.md @@ -1,24 +1,24 @@ -

Use InitContainers,SidecarContainers and CustomVolumes in kubectl-tg

+# Use InitContainers,SidecarContainers and CustomVolumes in kubectl-tg -- [Basic knowledge](#basic-knowledge) -- [Usage](#usage) - - [Creating initContainers,sidecarContainers and customVolumes](#creating-initcontainerssidecarcontainers-and-customvolumes) - - [Removing initContainers/sidecarContainers/customVolumes](#removing-initcontainerssidecarcontainerscustomvolumes) - - [Managing a TG Cluster with Custom Containers](#managing-a-tg-cluster-with-custom-containers) - - [See also](#see-also) +- [Use InitContainers,SidecarContainers and CustomVolumes in kubectl-tg](#use-initcontainerssidecarcontainers-and-customvolumes-in-kubectl-tg) + - [Basic knowledge](#basic-knowledge) + - [Usage](#usage) + - [Creating initContainers,sidecarContainers and customVolumes](#creating-initcontainerssidecarcontainers-and-customvolumes) + - [Removing initContainers/sidecarContainers/customVolumes](#removing-initcontainerssidecarcontainerscustomvolumes) + - [Managing a TG Cluster with Custom Containers](#managing-a-tg-cluster-with-custom-containers) + +## Basic knowledge -Basic knowledge -=============== A K8S Pod has the capability to house multiple containers, including both init containers and app containers. Upon pod creation, the init containers execute sequentially in a designated order. Should any of the init containers encounter a failure, the overall pod execution is halted (for more insights, consult [Init Containers](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/)). Following the successful completion of all init containers, the app containers proceed to run concurrently. By default, in the configuration of the TigerGraph CR, each TigerGraph Pod features a singular app container named "tigergraph". This container runs all TigerGraph services within the Pod. The functionality "InitContainers,SidecarContainers and CustomVolumes" empowers users to seamlessly integrate personalized initContainers and sidecarContainers into TigerGraph Pods. Furthermore, users can create customVolumes, enabling the mounting of these volumes within their initContainers or sidecarContainers. To grasp the concepts of InitContainers, Sidecar Containers, and Custom Volumes, please refer to the guide on [InitContainers, Sidecar Containers, and Custom Volumes](./custom-containers.md). -Usage -===== -Creating initContainers,sidecarContainers and customVolumes ------ +## Usage + +### Creating initContainers,sidecarContainers and customVolumes + To make use of this feature, follow these steps: 1. Prepare a YAML file that includes the definitions for your `initContainers`, `sidecarContainers`, and `customVolumes`. This YAML file will be passed to the `--custom-containers` option. @@ -72,7 +72,11 @@ sidecarContainers: volumeMounts: - name: log mountPath: /var/log +``` +Below is an illustrative example of a custom volume YAML file: + +```YAML customVolumes: - name: log emptyDir: {} @@ -80,24 +84,23 @@ customVolumes: This comprehensive example showcases the configuration of `initContainers`, `sidecarContainers`, and a `customVolume` named "log". Adjust the contents according to your specific use case. -Name the above YAML file as `tg-custom-container.yaml`. To create a new cluster using the `tg-custom-container.yaml` YAML file: +Name the above YAML file as `tg-custom-container.yaml`. To create a new cluster using the `tg-custom-container.yaml` and `tg-custom-volume.yaml` file: ```bash kubectl tg create --cluster-name test-cluster --namespace ${NAMESPACE} \ --size 3 --ha 2 -k ssh-key-secret --version ${TG_CLUSTER_VERSION} \ --storage-class standard --storage-size 10G -l ${LICENSE} \ - --custom-containers tg-custom-container.yaml + --custom-containers tg-custom-container.yaml --custom-volumes tg-custom-volume.yaml ``` If you already have a cluster, and you want to add/update initContainers/sidecarContainers for it, you can run ```bash kubectl tg update --cluster-name test-cluster --namespace ${NAMESPACE} \ - --custom-containers tg-custom-container.yaml + --custom-containers tg-custom-container.yaml --custom-volumes tg-custom-volume.yaml ``` -Removing initContainers/sidecarContainers/customVolumes ------------------------------------------------------ +### Removing initContainers/sidecarContainers/customVolumes To remove all of them, you can pass an empty file as an argument to the `--custom-containers` option: @@ -105,30 +108,23 @@ To remove all of them, you can pass an empty file as an argument to the `--custo touch empty.yaml kubectl tg update --cluster-name test-cluster --namespace ${NAMESPACE} \ - --custom-containers empty.yaml + --custom-containers empty.yaml --custom-volumes empty.yaml ``` If you wish to remove specific containers or volumes, simply edit your configuration file and then use the `kubectl tg update` command to apply the changes. -Managing a TG Cluster with Custom Containers ------------------------------------------ +### Managing a TG Cluster with Custom Containers Operating a TG cluster with custom containers is similar to managing a cluster without custom containers. You can utilize the `kubectl tg update` command to perform actions such as updating, upgrading, expanding, or shrinking the cluster. If you need to modify your `initContainers`, `sidecarContainers`, or `customVolumes`, follow these steps: -1. Make the necessary adjustments to your custom container YAML file. +1. Make the necessary adjustments to your custom container and custom volume YAML file. 2. Execute the following command to update the cluster with the new custom containers: ```bash -kubectl tg update --cluster-name test-cluster --namespace ${NAMESPACE} --custom-containers tg-custom-container.yaml +kubectl tg update --cluster-name test-cluster --namespace ${NAMESPACE} --custom-containers tg-custom-container.yaml --custom-volumes tg-custom-volume.yaml ``` This command triggers a rolling update, ensuring that your custom containers are seamlessly updated within the cluster. - -## See also - -If you are interested in learning how to configure custom containers with YAML resources, please refer to the following documentation: - -- [Use custom containers with YAML resources](../03-deploy/custom-containers.md) diff --git a/k8s/docs/04-manage/backup-and-restore/README.md b/k8s/docs/04-manage/backup-and-restore/README.md index de87f5c1..34955b94 100644 --- a/k8s/docs/04-manage/backup-and-restore/README.md +++ b/k8s/docs/04-manage/backup-and-restore/README.md @@ -1,10 +1,9 @@ -

Backup and Restore Overview

+# Backup and Restore Overview -This document describes how to perform backup and restore on TigerGraph cluster on Kubernetes. +This document describes how to perform backup and restore on TigerGraph cluster on Kubernetes. To backup and restore your data, you can use the `kubectl-tg` plugin or the YAML file that corresponds to the TigerGraphBackup/TigerGraphRestore Custom Resource(CR). -Difference Between Managing Backup/Restore by YAML File and Using kubectl-tg Plugin ---- +## Difference Between Managing Backup/Restore by YAML File and Using kubectl-tg Plugin **Using YAML Files:** @@ -20,41 +19,47 @@ Difference Between Managing Backup/Restore by YAML File and Using kubectl-tg Plu 3. **CR Management:** The `kubectl tg` plugin operates directly on CRs, enabling you to manage and modify them conveniently through commands. -Ultimately, both approaches achieve the same outcome, but the `kubectl tg` plugin simplifies the process by eliminating manual configuration steps and providing a more streamlined and efficient method for managing backup and restore operations. +Ultimately, both approaches achieve the same outcome, but the `kubectl tg` plugin simplifies the process by eliminating manual configuration steps and providing a more streamlined and efficient method for managing backup and restore operations. * See [Backup & Restore cluster by kubectl-tg plugin](./backup-restore-by-kubectl-tg.md) to know how to use `kubectl tg` for backup & restore. * See [Backup & Restore cluster by CR](./backup-restore-by-cr.md) to get the example YAML files for backup & restore. +## Usage scenarios -Usage scenarios ---- ### Back up data + You can create backups of your TigerGraph clusters and store the backup files to Local storage or S3 bucket. Refer to: + * [Backup to Local Storage](./backup-restore-by-kubectl-tg.md#backup-to-local-storage) * [Backup to S3 Bucket](./backup-restore-by-kubectl-tg.md#backup-to-an-s3-bucket) You can create a backup schedule to backup cluster periodically. Refer to: + * [Creating and Managing Backup Schedules](./backup-restore-by-kubectl-tg.md#creating-and-managing-backup-schedules) * [TigerGraphBackupSchedule CR](./backup-restore-by-cr.md#tigergraphbackupschedule) About managing backup files and backup CR, refer to: -- [Listing Backup Custom Resources](./backup-restore-by-kubectl-tg.md#listing-backup-custom-resources) -- [Displaying Backup Process Status](./backup-restore-by-kubectl-tg.md#displaying-backup-process-status) -- [Delete Backup Custom Resource (CR)](./backup-restore-by-kubectl-tg.md#delete-backup-custom-resource-cr) -- [Listing Backups](./backup-restore-by-kubectl-tg.md#listing-backups) -- [Removing Backups](./backup-restore-by-kubectl-tg.md#removing-backups) + +* [Listing Backup Custom Resources](./backup-restore-by-kubectl-tg.md#listing-backup-custom-resources) +* [Displaying Backup Process Status](./backup-restore-by-kubectl-tg.md#displaying-backup-process-status) +* [Delete Backup Custom Resource (CR)](./backup-restore-by-kubectl-tg.md#delete-backup-custom-resource-cr) +* [Listing Backups](./backup-restore-by-kubectl-tg.md#listing-backups) +* [Removing Backups](./backup-restore-by-kubectl-tg.md#removing-backups) ### Restore data + If you have created backups of your cluster to Local storage or S3 Bucket, you can restore the cluster using a specific backup. Refer to: + * [Restoring within the Same Cluster](./backup-restore-by-kubectl-tg.md#restoring-within-the-same-cluster) If you have created backups of your cluster to S3 Bucket, you can restore in another cluster, which we call cross-cluster restore. Refer to: + * [Cross-Cluster Restore from Backup](./backup-restore-by-kubectl-tg.md#cross-cluster-restore-from-backup) If you want to clone your cluster, you can use cross-cluster to achieve this goal. Refer to: + * [Clone Cluster from Backup](./backup-restore-by-kubectl-tg.md#clone-cluster-from-backup) +## Troubleshoot -Troubleshoot ---- -If you encounter any Error with backup & restore process, please refer to [How to debug Backup & Restore](./troubleshoot.md) for troubleshooting guidance. \ No newline at end of file +If you encounter any Error with backup & restore process, please refer to [How to debug Backup & Restore](./troubleshoot.md) for troubleshooting guidance. diff --git a/k8s/docs/04-manage/backup-and-restore/backup-restore-by-cr.md b/k8s/docs/04-manage/backup-and-restore/backup-restore-by-cr.md index 6adaaec5..ad5c0515 100644 --- a/k8s/docs/04-manage/backup-and-restore/backup-restore-by-cr.md +++ b/k8s/docs/04-manage/backup-and-restore/backup-restore-by-cr.md @@ -1,28 +1,23 @@ -

Backup & Restore cluster by CR

+# Backup & Restore cluster by CR + +- [Backup \& Restore cluster by CR](#backup--restore-cluster-by-cr) + - [Creating an S3 Secret for Backup and Restore](#creating-an-s3-secret-for-backup-and-restore) + - [TigerGraphBackup](#tigergraphbackup) + - [Backup to local storage](#backup-to-local-storage) + - [Backup to S3 bucket](#backup-to-s3-bucket) + - [TigerGraphBackupSchedule](#tigergraphbackupschedule) + - [Schedule backup to local storage](#schedule-backup-to-local-storage) + - [Schedule backup to S3 bucket](#schedule-backup-to-s3-bucket) + - [TigerGraphRestore](#tigergraphrestore) + - [Restore from local backup](#restore-from-local-backup) + - [Restore from backup in S3 bucket](#restore-from-backup-in-s3-bucket) + - [Cross-cluster restore in existing cluster](#cross-cluster-restore-in-existing-cluster) + - [Cluster version \>=3.9.2](#cluster-version-392) + - [Clone a cluster(Create a new cluster and do cross-cluster restore)](#clone-a-clustercreate-a-new-cluster-and-do-cross-cluster-restore) + - [Clone Cluster version \>=3.9.2](#clone-cluster-version-392) + +## Creating an S3 Secret for Backup and Restore -- [Creating an S3 Secret for Backup and Restore](#creating-an-s3-secret-for-backup-and-restore) -- [TigerGraphBackup](#tigergraphbackup) - - [Backup to local storage](#backup-to-local-storage) - - [Backup to S3 bucket](#backup-to-s3-bucket) -- [TigerGraphBackupSchedule](#tigergraphbackupschedule) - - [Schedule backup to local storage](#schedule-backup-to-local-storage) - - [Schedule backup to S3 bucket](#schedule-backup-to-s3-bucket) -- [TigerGraphRestore](#tigergraphrestore) - - [Restore from local backup](#restore-from-local-backup) - - [Restore from backup in S3 bucket](#restore-from-backup-in-s3-bucket) - - [Cross-cluster restore in existing cluster](#cross-cluster-restore-in-existing-cluster) - - [Cluster version \>=3.9.2](#cluster-version-392) - - [Clone a cluster(Create a new cluster and do cross-cluster restore)](#clone-a-clustercreate-a-new-cluster-and-do-cross-cluster-restore) - - [Cluster version \>=3.9.2](#cluster-version-392-1) - - - - - - - -Creating an S3 Secret for Backup and Restore -==== When working with backup and restore operations involving S3 buckets, you need to create a Kubernetes Secret to securely store your AWS access credentials. Here's how you can create an S3 Secret: 1. **Encode AWS Access Key ID and Secret Access Key**: @@ -66,15 +61,15 @@ When working with backup and restore operations involving S3 buckets, you need t By creating an S3 Secret in this manner, you ensure that your AWS access credentials are securely stored and can be easily referenced when needed for backup and restore tasks involving S3 buckets. -TigerGraphBackup -================ +## TigerGraphBackup + > [!NOTE] > There are many examples on different conditions of backup and restore. Some fields in the YAML format CR is optional, a mark `# optional` is put above them. All fields without the optional mark is required. For optimal organization, we recommend using the naming convention `${CLUSTER-NAME}-backup-${TAG}` for your backup CR. -Backup to local storage ------------------------ +### Backup to local storage + Certainly, here's the modified YAML file for performing a backup to local storage. You can save this content to a file (e.g., backup-local.yaml), and then run `kubectl apply -f backup-local.yaml -n YOUR_NAMESPACE` to create the backup. ```yaml @@ -108,8 +103,7 @@ spec: compressLevel: DefaultCompression # Choose from DefaultCompression/BestSpeed/BestCompression ``` -Backup to S3 bucket -------------------- +### Backup to S3 bucket Certainly, here's the YAML file for performing a backup to an S3 bucket using a previously created Secret named `s3-secret`. You can save this content to a file (e.g., `backup-s3.yaml`), and then run `kubectl apply -f backup-s3.yaml -n YOUR_NAMESPACE` to create the backup. @@ -126,9 +120,7 @@ spec: # Specify the name of the S3 bucket you want to use bucketName: operator-backup # Specify the Secret containing the S3 access key and secret access key - secretKey: - name: s3-secret - + secretKeyName: s3-secret # Configure the name of backup files and the path storing temporary files backupConfig: tag: s3 @@ -145,17 +137,13 @@ spec: compressLevel: DefaultCompression # Choose from DefaultCompression/BestSpeed/BestCompression ``` -TigerGraphBackupSchedule -======================== +## TigerGraphBackupSchedule The field `.spec.schedule` uses the cron schedule expression. You can refer to [https://crontab.guru/](https://crontab.guru/). The field `.spec.backupTemplate` is the same as the `.spec` of TigerGraphBackup - - -Schedule backup to local storage --------------------------------- +### Schedule backup to local storage ```yaml apiVersion: graphdb.tigergraph.com/v1alpha1 @@ -200,8 +188,7 @@ spec: compressLevel: DefaultCompression #choose from DefaultCompression/BestSpeed/BestCompression ``` -Schedule backup to S3 bucket ----------------------------- +### Schedule backup to S3 bucket ```yaml apiVersion: graphdb.tigergraph.com/v1alpha1 @@ -223,13 +210,12 @@ spec: pause: false backupTemplate: clusterName: test-cluster - destination: - storage: s3Bucket - s3Bucket: - # specify the bucket you want to use - bucketName: operator-backup - secretKey: - name: s3-secret + destination: + storage: s3Bucket + s3Bucket: + # specify the bucket you want to use + bucketName: operator-backup + secretKeyName: s3-secret # Configure the name of backup files and the path storing temporary files backupConfig: tag: s3-daily @@ -245,11 +231,9 @@ spec: compressLevel: DefaultCompression #choose from DefaultCompression/BestSpeed/BestCompression ``` -TigerGraphRestore -================= +## TigerGraphRestore -Restore from local backup -------------------------- +### Restore from local backup ```yaml apiVersion: graphdb.tigergraph.com/v1alpha1 @@ -272,8 +256,7 @@ spec: clusterName: test-cluster ``` -Restore from backup in S3 bucket --------------------------------- +### Restore from backup in S3 bucket ```yaml apiVersion: graphdb.tigergraph.com/v1alpha1 @@ -292,14 +275,12 @@ spec: s3Bucket: # specify the bucket you want to use bucketName: operator-backup - secretKey: - name: s3-secret + secretKeyName: s3-secret # Specify the name of cluster clusterName: test-cluster ``` -Cross-cluster restore in existing cluster ------------------------------------------ +### Cross-cluster restore in existing cluster We recommend using `kubectl tg restore` command to do this(See [Cross-Cluster Restore from Backup](./backup-restore-by-kubectl-tg.md#cross-cluster-restore-from-backup)). Since it is complicated to get metadata of backup and put it in CR. @@ -316,8 +297,7 @@ spec: storage: s3Bucket s3Bucket: bucketName: operator-backup - secretKey: - name: s3-secret + secretKeyName: s3-secret restoreConfig: meta: | { @@ -367,8 +347,11 @@ spec: } stagingPath: /home/tigergraph/data ``` -### Cluster version >=3.9.2 + +#### Cluster version >=3.9.2 + If you are using a TigerGraph cluster whose version >=3.9.2, the CR could be simplified. You don't need to put the metadata into it, you only need to specify the tag + ```yaml apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraphRestore @@ -386,14 +369,12 @@ spec: s3Bucket: # specify the bucket you want to use bucketName: operator-backup - secretKey: - name: s3-secret + secretKeyName: s3-secret # Specify the name of cluster clusterName: test-cluster-new ``` -Clone a cluster(Create a new cluster and do cross-cluster restore) -------------------------------------------------- +### Clone a cluster(Create a new cluster and do cross-cluster restore) We recommend using `kubectl tg restore` command to do this(See [Clone Cluster from Backup](./backup-restore-by-kubectl-tg.md#clone-cluster-from-backup)). Since it is complicated to get metadata of backup , clusterTemplate of the original cluster and put them in CR. @@ -410,8 +391,7 @@ spec: storage: s3Bucket s3Bucket: bucketName: operator-backup - secretKey: - name: s3-secret + secretKeyName: s3-secret restoreConfig: meta: | { @@ -477,16 +457,12 @@ spec: resources: requests: storage: 10G - initTGConfig: - ha: 1 - license: "YOUR_LICENSE" - version: 3.9.3 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.9 - imagePullPolicy: IfNotPresent + ha: 1 + license: "YOUR_LICENSE" ``` -### Cluster version >=3.9.2 +#### Clone Cluster version >=3.9.2 + ```yaml apiVersion: graphdb.tigergraph.com/v1alpha1 kind: TigerGraphRestore @@ -498,8 +474,7 @@ spec: storage: s3Bucket s3Bucket: bucketName: operator-backup - secretKey: - name: s3-secret + secretKeyName: s3-secret restoreConfig: tag: daily-2022-10-13T022218 stagingPath: /home/tigergraph/data @@ -520,12 +495,6 @@ spec: resources: requests: storage: 10G - initTGConfig: - ha: 1 - license: "YOUR_LICENSE" - version: 3.9.3 - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.9 - imagePullPolicy: IfNotPresent + ha: 1 + license: "YOUR_LICENSE" ``` - diff --git a/k8s/docs/04-manage/backup-and-restore/backup-restore-by-kubectl-tg.md b/k8s/docs/04-manage/backup-and-restore/backup-restore-by-kubectl-tg.md index 8a8ab1cd..17040f5d 100644 --- a/k8s/docs/04-manage/backup-and-restore/backup-restore-by-kubectl-tg.md +++ b/k8s/docs/04-manage/backup-and-restore/backup-restore-by-kubectl-tg.md @@ -1,22 +1,23 @@ -

Backup & Restore clustey kubectl-tg plugin

+# Backup & Restore clustey kubectl-tg plugin If you have experience with Custom Resources in Kubernetes (K8S), you can leverage CRs to initiate backup or restore processes. We provide a dedicated document detailing the steps for performing backup and restore using Custom Resources (CRs). [Backup & restore by CR](backup-restore-by-cr.md) -- [Prerequisite](#prerequisite) -- [Utilizing `kubectl tg` Command for Backup](#utilizing-kubectl-tg-command-for-backup) - - [Creating and Updating Backups](#creating-and-updating-backups) - - [Backup to Local Storage](#backup-to-local-storage) - - [Backup to an S3 Bucket](#backup-to-an-s3-bucket) - - [\[Preview\] Performing Incremental Backup](#preview-performing-incremental-backup) - - [Updating Backup Custom Resources](#updating-backup-custom-resources) - - [Changing Backup Types](#changing-backup-types) - - [Creating Another Backup](#creating-another-backup) - - [Listing Backup Custom Resources](#listing-backup-custom-resources) - - [Displaying Backup Process Status](#displaying-backup-process-status) - - [Delete Backup Custom Resource (CR)](#delete-backup-custom-resource-cr) - - [Listing Backups](#listing-backups) - - [Removing Backups](#removing-backups) -- [Creating and Managing Backup Schedules](#creating-and-managing-backup-schedules) +- [Backup \& Restore clustey kubectl-tg plugin](#backup--restore-clustey-kubectl-tg-plugin) + - [Prerequisite](#prerequisite) + - [Utilizing `kubectl tg` Command for Backup](#utilizing-kubectl-tg-command-for-backup) + - [Creating and Updating Backups](#creating-and-updating-backups) + - [Backup to Local Storage](#backup-to-local-storage) + - [Backup to an S3 Bucket](#backup-to-an-s3-bucket) + - [\[Preview\] Performing Incremental Backup](#preview-performing-incremental-backup) + - [Updating Backup Custom Resources](#updating-backup-custom-resources) + - [Changing Backup Types](#changing-backup-types) + - [Creating Another Backup](#creating-another-backup) + - [Listing Backup Custom Resources](#listing-backup-custom-resources) + - [Displaying Backup Process Status](#displaying-backup-process-status) + - [Delete Backup Custom Resource (CR)](#delete-backup-custom-resource-cr) + - [Listing Backups](#listing-backups) + - [Removing Backups](#removing-backups) + - [Creating and Managing Backup Schedules](#creating-and-managing-backup-schedules) - [Specifying Backup Schedule](#specifying-backup-schedule) - [Creating Backup Schedules](#creating-backup-schedules) - [Creating a Local Backup Schedule](#creating-a-local-backup-schedule) @@ -27,7 +28,7 @@ If you have experience with Custom Resources in Kubernetes (K8S), you can levera - [Showing Backup Schedule Status](#showing-backup-schedule-status) - [Pausing and Resuming a Backup Schedule](#pausing-and-resuming-a-backup-schedule) - [Backup Strategy Overview](#backup-strategy-overview) -- [Utilizing `kubectl tg` for Restore](#utilizing-kubectl-tg-for-restore) + - [Utilizing `kubectl tg` for Restore](#utilizing-kubectl-tg-for-restore) - [Restore within the Same Cluster](#restore-within-the-same-cluster) - [Cross-Cluster Restore from Backup](#cross-cluster-restore-from-backup) - [Clone Cluster from Backup](#clone-cluster-from-backup) @@ -37,23 +38,19 @@ If you have experience with Custom Resources in Kubernetes (K8S), you can levera - [Show Status of Restore](#show-status-of-restore) - [Delete Restore Job](#delete-restore-job) - -Prerequisite -============ +## Prerequisite The successful execution of the `kubectl tg backup|restore|backup-schedule` command relies on the presence of several dependencies: `kubectl`, `helm`, `jq`, and `yq`. It is imperative to ensure that all these components are properly installed on your system. Furthermore, prior to using the backup command, it is essential to have the TigerGraph Kubectl Plugin installed(please refer to [Install kubectl-tg plugin](../../02-get-started/get_started.md#install-kubectl-tg-plugin)). Additionally, you must create your cluster as a prerequisite step. -Utilizing `kubectl tg` Command for Backup -========================================== +## Utilizing `kubectl tg` Command for Backup To maintain coherence between the `kubectl-tg` command and custom resources presented in YAML format, the `--name` option is employed to specify the name of the custom resources to be created or managed. -Creating and Updating Backups ------------------------------- +### Creating and Updating Backups -``` +```bash Usage: kubectl tg backup [create|update] [OPTIONS] @@ -86,11 +83,11 @@ Options: The secret should contain accessKeyID and secretAccessKey. ``` -### Backup to Local Storage +#### Backup to Local Storage use the following command to backup cluster whose name is test-cluster and store backup files in local storage -``` +```bash kubectl tg backup create --name backup-to-local \ --cluster-name test-cluster --tag testlocal -n tigergraph \ --destination local --local-path /home/tigergraph/tigergraph/data/mybackup @@ -98,23 +95,24 @@ use the following command to backup cluster whose name is test-cluster and store you can also customize timeout, staging path, the compress level and the compress process number -``` +```bash kubectl tg backup create --name backup-to-local --cluster-name test-cluster \ --tag testlocal -n tigergraph --destination local \ --local-path /home/tigergraph/tigergraph/data/mybackup --staging-path /home/tigergraph/temp \ --timeout 18000 --compress-process-number 0 --compress-level BestSpeed ``` + > [!NOTE] -> 1. Please use subpath of `/home/tigergraph/tigergraph/data/` as local path for backup since this path is mounted with PV. For example, you can use `/home/tigergraph/tigergraph/data/mybackup` .If you do not use that, you will lose your backup data if the pod restarts. And be careful that don’t use the same path for local path as the staging path. If you don’t configure staging path, the default staging path is `/home/tigergraph/tigergraph/data/backup`, if you set local path as `/home/tigergraph/tigergraph/data/backup`, the backup will fail. -> 2. Please remember which path you use and use the same path if you want to restore the backup file you create. - +> Please use subpath of `/home/tigergraph/tigergraph/data/` as local path for backup since this path is mounted with PV. For example, you can use `/home/tigergraph/tigergraph/data/mybackup` .If you do not use that, you will lose your backup data if the pod restarts. And be careful that don’t use the same path for local path as the staging path. If you don’t configure staging path, the default staging path is `/home/tigergraph/tigergraph/data/backup`, if you set local path as `/home/tigergraph/tigergraph/data/backup`, the backup will fail. +> +> Please remember which path you use and use the same path if you want to restore the backup file you create. -### Backup to an S3 Bucket +#### Backup to an S3 Bucket Follow the steps below to back up a cluster named "test-cluster" and store the backup files in an S3 bucket. Make sure you provide the S3 bucket name, access key ID, and secret key for S3. 1. First, create a Kubernetes secret containing the access key ID and secret key: - + ```bash kubectl create secret generic aws-secret \ --from-literal=accessKeyID=AWSACCESSKEY \ @@ -144,7 +142,8 @@ kubectl tg backup create --name backup-to-s3 -n tigergraph \ > [!NOTE] > Ensure that you have created the necessary Kubernetes secret containing the access key ID and secret key before initiating the backup process to the S3 bucket. -### [Preview] Performing Incremental Backup +#### [Preview] Performing Incremental Backup + > [!NOTE] > For TigerGraph version 3.9, performing an incremental backup requires the existence of at least one previous backup for the cluster. Without a prior full backup, attempting an incremental backup will result in failure. To verify the presence of a full backup, you can utilize the command `kubectl tg backup list`. @@ -157,7 +156,7 @@ kubectl tg backup create --cluster-name test-cluster -n tigergraph --name increm --local-path /home/tigergraph/tigergraph/data/mybackup ``` -### Updating Backup Custom Resources +#### Updating Backup Custom Resources If you have previously created a backup using the `kubectl tg backup create` command, you can modify the backup configuration by employing the `kubectl tg backup update` command. Once the `update` command is executed, the backup process will be triggered immediately with the updated settings. @@ -180,7 +179,7 @@ kubectl tg backup update --name backup-to-local -n tigergraph \ Subsequently, the timeout value will be updated to 20000, and a backup process with the revised timeout setting will be immediately initiated. -#### Changing Backup Types +##### Changing Backup Types You have the flexibility to switch between full and incremental backups using the following commands: @@ -198,9 +197,7 @@ You have the flexibility to switch between full and incremental backups using th These commands allow you to seamlessly modify the backup type based on your evolving requirements. - - -### Creating Another Backup +#### Creating Another Backup If you have previously initiated a backup using the `kubectl tg backup create` command: @@ -225,8 +222,8 @@ Alternatively, you can employ the `-y` option, indicating "yes to all questions, kubectl tg backup update --name backup-to-local -n tigergraph -y ``` -Listing Backup Custom Resources ----- +### Listing Backup Custom Resources + To retrieve a list of all backup Custom Resources (CRs) within a specific namespace, utilize the following command: ```bash @@ -235,10 +232,9 @@ kubectl get tgbackup --namespace tigergraph This command will provide you with an overview of the backup CRs present in the designated namespace. +### Displaying Backup Process Status -Displaying Backup Process Status ----- -Upon executing `kubectl tg backup create/update`, a backup job will be generated within the Kubernetes (k8s) environment. To facilitate monitoring, we offer the `kubectl tg backup status` command, allowing you to assess the status of the backup process. Should you encounter errors or warnings, refer to the [How to Debug Backup & Restore](#how-to-debug-backup--restore) section for troubleshooting guidance. +Upon executing `kubectl tg backup create/update`, a backup job will be generated within the Kubernetes (k8s) environment. To facilitate monitoring, we offer the `kubectl tg backup status` command, allowing you to assess the status of the backup process. Should you encounter errors or warnings, refer to the [How to Debug Backup & Restore](./troubleshoot.md) section for troubleshooting guidance. To display the status of all backup processes within the `tigergraph` namespace, use the following command: @@ -248,7 +244,7 @@ kubectl tg backup status --namespace tigergraph The output will resemble the following: -``` +```bash NAME CLUSTER TAG STORAGE INCREMENTAL STARTTIME COMPLETIONTIME test-cluster-backup-daily test-cluster daily local 3d12h test-cluster-backup-local test-cluster local local 16s 5s @@ -267,7 +263,7 @@ The output provides a comprehensive overview of the backup process, including co The output is like this: -``` +```bash kubectl tg backup status --cluster-name test-cluster --tag daily Name: test-cluster-backup-daily Namespace: default @@ -309,21 +305,19 @@ Events: You can identify the occurrence of events marked as "Backup job failed," which indicates that the respective backup task has encountered a failure. -Delete Backup Custom Resource (CR) -------------------------------------- +### Delete Backup Custom Resource (CR) To remove a backup Custom Resource (CR), execute the following command: -``` +```bash kubectl tg backup delete --name backup-to-local --namespace tigergraph ``` -Listing Backups ---------------- +### Listing Backups To list available backups, utilize the command: -``` +```bash Usage: kubectl tg backup list [OPTIONS] @@ -335,37 +329,32 @@ Options: --meta : Retrieve the metadata of the backup. ``` - To examine the existing backups for a particular cluster, you can employ the following commands to list all backups associated with the "test-cluster": -``` +```bash kubectl tg backup list --cluster-name test-cluster -n tigergraph ``` If you prefer to obtain the backup list in JSON format, use: -``` +```bash kubectl tg backup list --cluster-name test-cluster -n tigergraph --json ``` - In the context of a cross-cluster restore, acquiring backup metadata is essential. To accomplish this, utilize the tag obtained from the `kubectl tg backup list` command. Run the following command: -``` +```bash kubectl tg backup list --cluster-name test-cluster -n tigergraph \ --tag tests3-2022-10-31T031005 --meta ``` This command will display the metadata in the standard output. If you wish to store this metadata in a file, execute: -``` +```bash kubectl tg backup list --cluster-name test-cluster -n tigergraph --tag tests3 --meta > metadata ``` - - -Removing Backups ------------------- +### Removing Backups To eliminate backups that are no longer needed, follow these steps: @@ -375,15 +364,14 @@ Use the following command to remove specific backups associated with the "test-c kubectl tg backup remove --cluster-name test-cluster --namespace tigergraph \ --tag daily-20xx-xx-xxTxxxxx ``` - + This command enables you to selectively remove backups based on their tags. Please ensure you accurately specify the relevant cluster name, namespace, and backup tag when executing this command. +## Creating and Managing Backup Schedules -Creating and Managing Backup Schedules -==== -The `kubectl tg backup-schedule` command enables you to create, update, monitor, list, delete, pause, and resume backup schedules for specific clusters. This comprehensive set of options empowers you to effortlessly manage your backup scheduling requirements. +The `kubectl tg backup-schedule` command enables you to create, update, monitor, list, delete, pause, and resume backup schedules for specific clusters. This comprehensive set of options empowers you to effortlessly manage your backup scheduling requirements. -``` +```bash Usage: kubectl tg backup-schedule [create|update|status|list|delete|pause|resume] [OPTIONS] @@ -425,10 +413,9 @@ Options: --aws-secret : name of secret for aws, the secret should contain accessKeyID and secretAccessKey ``` - ### Specifying Backup Schedule -To define a backup schedule, utilize a cron expression to set the timing. You can conveniently generate cron expressions using tools like [https://crontab.guru/](https://crontab.guru/), which provides an intuitive interface for creating intricate schedules. +To define a backup schedule, utilize a cron expression to set the timing. You can conveniently generate cron expressions using tools like [https://crontab.guru/](https://crontab.guru/), which provides an intuitive interface for creating intricate schedules. For instance, if you desire to execute a backup once daily at 00:00, you would specify the following cron expression: @@ -438,7 +425,6 @@ For instance, if you desire to execute a backup once daily at 00:00, you would s Please ensure to enclose the cron expression in single quotation marks (`'`) to prevent unintended filename expansion. - ### Creating Backup Schedules #### Creating a Local Backup Schedule @@ -451,6 +437,7 @@ Please ensure to enclose the cron expression in single quotation marks (`'`) to --tag localdaily --schedule '0 0 * * *' \ --destination local --local-path /home/tigergraph/backup ``` + #### Creating an S3 Backup Schedule For a schedule that conducts hourly backups for the "test-cluster" at minute 0, storing backup files in an S3 bucket, proceed as follows: @@ -475,7 +462,6 @@ Please ensure to enclose the cron expression in single quotation marks (`'`) to By executing these commands, you'll set up automatic backup schedules tailored to your requirements. - ### Updating a Backup Schedule + When updating a backup schedule, ensure you provide the correct name. For instance, to adjust the schedule for daily backups at 12:00, execute the following: @@ -502,7 +489,6 @@ kubectl tg backup-schedule update --name backupsch-local \ Please note that ongoing backup jobs remain unaffected by configuration changes. The new configuration will take effect during the subsequent schedule. - ### Listing All Backup Schedules To view a comprehensive list of all existing backup schedules within a specific namespace, employ the following command: @@ -531,8 +517,7 @@ kubectl tg backup-schedule status --name test-cluster-schedule-daily \ The output will provide insights into the status of the specified backup schedule, allowing you to monitor its progress and execution. - -``` +```bash Name: test-cluster-schedule-daily Namespace: default Labels: @@ -575,10 +560,8 @@ Events: Normal Backup job created 10s (x2 over 71s) TigerGraphBackupSchedule Schedule a new backup job ``` - Indeed, the events associated with backup schedule executions provide valuable insights into the success or failure of the scheduled jobs. By examining these events, you can ascertain whether the backup schedules were executed as intended and if any issues arose during the process. - ### Pausing and Resuming a Backup Schedule You have the ability to temporarily halt a running backup schedule or resume a paused one using the following commands: @@ -597,7 +580,6 @@ To resume a paused backup schedule: kubectl tg backup-schedule resume --name backupsch-local -n tigergraph ``` - ### Backup Strategy Overview It's important to note that the backup strategy feature is available for cluster versions equal to or greater than 3.9.0. This feature provides enhanced control over backup operations and file retention. Presently, you have three distinct options at your disposal to facilitate a comprehensive backup strategy: @@ -614,8 +596,8 @@ Furthermore, with `--max-reserved-day 7`, backups created more than 7 days ago ( By leveraging these options, you can meticulously manage your backup jobs and safeguard against excessive disk usage. This proactive approach to backup strategy aids in optimizing storage utilization while preserving the necessary backups for operational needs. -Utilizing `kubectl tg` for Restore -==== +## Utilizing `kubectl tg` for Restore + When you possess backups generated through the backup process or backup schedule, you have the capability to restore your cluster to a previous state. You can initiate restore from a backup that was crafted by the same cluster, and this feature extends to both local storage and S3 buckets. It's important to highlight that we also offer cross-cluster restore, enabling you to restore Cluster B utilizing backups from Cluster A. As of now, this functionality exclusively supports S3 buckets. @@ -628,9 +610,7 @@ A crucial consideration is that the restore process is currently restricted to c | Restore in a cluster with different partition | Y | N or Y | N | Source cluster: 3*x, Target cluster: 2\*3 or 2\*2 | | Restore in a cluster with different HA | N | Y | Y | Source cluster: 3\*3, Target cluster: 3\*1 | - - -``` +```bash USAGE: kubectl tg restore [OPTIONS] @@ -651,10 +631,11 @@ Options: --s3-bucket : S3 Bucket name --aws-secret : name of secret for aws, the secret should contain accessKeyID and secretAccessKey ``` + ### Restore within the Same Cluster Suppose you have previously created a backup for `test-cluster` using the `kubectl tg backup create` command. To initiate restore within the same cluster, retrieve the tag of all Backups first: - + Execute the following command to retrieve the tags associated with all available backups: ```bash @@ -665,7 +646,7 @@ Suppose you have previously created a backup for `test-cluster` using the `kubec For instance: - ``` + ```bash +------------------------------+------+---------+--------+---------------------+ | TAG | TYPE | VERSION | SIZE | CREATED AT | +------------------------------+------+---------+--------+---------------------+ @@ -676,28 +657,30 @@ Suppose you have previously created a backup for `test-cluster` using the `kubec +------------------------------+------+---------+--------+---------------------+ ``` - Using backup in local storage: To restore your cluster utilizing a backup stored in local storage, execute the following command: -``` + +```bash kubectl tg restore --name restore-from-local \ --cluster-name test-cluster -n tigergraph --tag daily-2022-11-02T103601\ --source local --local-path /home/tigergraph/backup ``` + Replace `/home/tigergraph/backup` with the appropriate path to the backup stored in your local storage. This command will initiate the restore process and bring your cluster back to the state captured by the specified backup. Use backup in s3 bucket: First, create a secret in k8s containing access key id and secret key: -``` +```bash kubectl create secret generic aws-secret \ --from-literal=accessKeyID=AWSACCESSKEY \ --from-literal=secretAccessKey='AWSSECRETKEY' ``` Select a backup tag from the available backups and execute the following command to initiate restore from an S3 bucket: -``` + +```bash kubectl tg restore --name restore-from-s3 \ --namespace tigergraph --cluster-name test-cluster \ --tag tests3-2022-10-31T031005 \ @@ -707,8 +690,8 @@ kubectl tg restore --name restore-from-s3 \ Make sure to replace tests3-2022-10-31T031005 with the desired backup tag and adjust tg-backup to your S3 bucket name. This command will trigger the restore process, bringing your cluster back to the chosen backup's state. - ### Cross-Cluster Restore from Backup + > [!NOTE] > This section pertains to users utilizing TigerGraph cluster version 3.9.2 or higher. If you are operating on an earlier version, please consult the [Restore an Existing Cluster from Backup Created by Another Cluster (Cluster version < 3.9.2)](#restore-an-existing-cluster-from-backup-created-by-another-cluster-cluster-version--392) section for relevant instructions. @@ -744,12 +727,10 @@ Performing a cross-cluster restore, where you restore an existing cluster (targe Remember to adjust the cluster names, backup tag, S3 bucket name, and AWS credentials as needed for your specific setup. Cross-cluster restore is a powerful way to recover data and configurations across different clusters, ensuring data resilience and system stability. - - ### Clone Cluster from Backup -> [!NOTE] -> This section pertains to users utilizing TigerGraph cluster version 3.9.2 or higher. If you are operating on an earlier version, please consult the [Clone a Cluster (Cluster version \< 3.9.2)](#clone-a-cluster-cluster-version--392) +> [!NOTE] +> This section pertains to users utilizing TigerGraph cluster version 3.9.2 or higher. If you are operating on an earlier version, please consult the [Clone a Cluster (Cluster version \< 3.9.2)](#clone-a-cluster-cluster-version--392) Creating a new cluster and restoring it from a backup created by another cluster, often referred as "cloning", involves several steps. Follow these instructions to successfully clone a cluster using the `kubectl tg restore` command: @@ -760,6 +741,7 @@ Creating a new cluster and restoring it from a backup created by another cluster ```bash kubectl tg export --cluster-name source-cluster -n tigergraph ``` + Assume the output file is /home/test-cluster_backup_1668069319.yaml. 2. **Retrieve the Backup Tag:** @@ -794,13 +776,12 @@ By following these steps, you can easily perform cross-cluster restore or clone Once the process is complete, the new cluster (`new-cluster`) will be initialized and ready for use. The restore ensures that the new cluster matches the state of the source cluster captured by the backup. Cloning a cluster from a backup is a powerful way to quickly replicate environments and configurations for testing, development, or disaster recovery purposes. - - ### Cross-Cluster Restore and Cluster Clone (Cluster Version < 3.9.2) Starting from TigerGraph cluster version 3.9.2, the process for cross-cluster restore and cluster cloning has been simplified. You only need the backup tag to specify the backup file that you want to restore. If you are using cluster < 3.9.2, you need to follow the instructions below: #### Restore an Existing Cluster from Backup Created by Another Cluster (Cluster version < 3.9.2) + 1. **Retrieve Backup Metadata for Source Cluster:** Obtain the metadata of the backup from the source cluster (source-cluster) and save it to a file named `backup-metadata`. Run the following command: @@ -838,21 +819,20 @@ Starting from TigerGraph cluster version 3.9.2, the process for cross-cluster re Remember to adjust the cluster names, backup tag, S3 bucket name, and AWS credentials as needed for your specific setup. Cross-cluster restores are a powerful way to recover data and configurations across different clusters, ensuring data resilience and system stability. - - #### Clone a Cluster (Cluster version < 3.9.2) Creating a new cluster and restoring it from a backup created by another cluster, often referred to as "cloning," involves several steps. Follow these instructions to successfully clone a cluster using the `kubectl tg restore` command: 1. **Export Configuration of Source Cluster:** - Obtain the custom resource (CR) configuration of the source cluster (source-cluster) and save it to a YAML file. Run the following command: + Obtain the custom resource (CR) configuration of the source cluster (source-cluster) and save it to a YAML file. Run the following command: - ```bash - kubectl tg export --cluster-name source-cluster -n tigergraph - ``` - Assume the output file is /home/test-cluster_backup_1668069319.yaml. - This file will serve as the template for creating the new cluster. + ```bash + kubectl tg export --cluster-name source-cluster -n tigergraph + ``` + + Assume the output file is /home/test-cluster_backup_1668069319.yaml. + This file will serve as the template for creating the new cluster. 2. **Retrieve Backup Metadata for Source Cluster:** @@ -912,5 +892,3 @@ kubectl tg restore delete --name restore-from-local --namespace $NAMESPACE ``` This command will delete the specified restore job. Make sure to replace `restore-from-local` with the actual name of the restore job you want to delete, and provide the appropriate namespace using the `$NAMESPACE` variable. - - diff --git a/k8s/docs/04-manage/backup-and-restore/status-of-backup-restore.md b/k8s/docs/04-manage/backup-and-restore/status-of-backup-restore.md new file mode 100644 index 00000000..6a7ba555 --- /dev/null +++ b/k8s/docs/04-manage/backup-and-restore/status-of-backup-restore.md @@ -0,0 +1,179 @@ +# Status of Backup and Restore + +## Status of TigerGraphBackup + +You can check the status of all TigerGraphBackups in a specific namespace by running the following command: + +```bash +# if you have kubectl tg installed +kubectl tg backup status -n $NAMESPACE + +# alternatively, use kubectl +kubectl get tgbackup -n $NAMESPACE +``` + +The output will be similar to the following: + +```bash +NAME CLUSTER TAG STORAGE INCREMENTAL STARTTIME COMPLETIONTIME STATUS AGE +backup-test-cluster test-cluster test local 88s 75s Succeed 88s +``` + +The field `STATUS` shows the status of the backup. + +You can also check the status of a specific TigerGraphBackup by running the following command: + +```bash +# if you have kubectl tg installed +kubectl tg backup status -n $NAMESPACE --name $BACKUP_NAME + +# alternatively, use kubectl +kubectl describe tgbackup $BACKUP_NAME -n $NAMESPACE +``` + +You can check the Status field in the output to see the status of the backup: + +```bash +Status: + Completion Time: 2024-01-03T07:54:26Z + Conditions: + Last Transition Time: 2024-01-03T07:54:26Z + Message: Backup succeed + Reason: BackupSucceed + Status: True + Type: Succeed + Start Time: 2024-01-03T07:54:13Z + Target Ready: true +``` + +The `Status.Conditions[0].Type` is identical to the `STATUS` field in the output of `kubectl tg backup status`. + +The following table lists the possible values of the `Status.Conditions[0].Type` field: + +| Status.Conditions[0].Type | Description | +| --- | --- | +| Succeed | The backup is completed. | +| Failed | The backup failed. You should check the logs of backup-job. | +| Active | The backup is in progress. | +| Retrying | The backup job failed at least once, and is retrying. | +| Waiting | The backup job is waiting for the target cluster to be ready. | +| Forbidden | The backup is forbidden because some configurations are not supported for target cluster(the version of target cluster is too old). | +| BackOff | The backup job failed too many times, and is backoff. | + +## Status of TigerGraphRestore + +You can check the status of all TigerGraphRestores in a specific namespace by running the following command: + +```bash +# if you have kubectl tg installed +kubectl tg restore status -n $NAMESPACE + +# alternatively, use kubectl +kubectl get tgrestore -n $NAMESPACE +``` + +The output will be similar to the following: + +```bash +NAME STARTTIME COMPLETIONTIME CLUSTER TAG STATUS AGE +restore-tg 8s test-cluster test-2024-01-03T075416 Active 8s +``` + +The field `STATUS` shows the status of the restore. + +You can also check the status of a specific TigerGraphRestore by running the following command: + +```bash +# if you have kubectl tg installed +kubectl tg restore status -n $NAMESPACE --name $RESTORE_NAME + +# alternatively, use kubectl +kubectl describe tgrestore $RESTORE_NAME -n $NAMESPACE +``` + +You can check the Status field in the output to see the status of the restore: + +```bash +Status: + Conditions: + Last Transition Time: 2024-01-03T08:15:55Z + Message: Restore job is running + Reason: RestoreActive + Status: True + Type: Active + Start Time: 2024-01-03T08:15:55Z + Target Ready: true +``` + +The `Status.Conditions[0].Type` is identical to the `STATUS` field in the output of `kubectl tg restore status`. + +The following table lists the possible values of the `Status.Conditions[0].Type` field: + +| Status.Conditions[0].Type | Description | +| --- | --- | +| Succeed | The restore is completed. | +| Failed | The restore failed. You should check the logs of restore-job. | +| Active | The restore is in progress. | +| Retrying | The restore job failed at least once, and is retrying. | +| Waiting | The restore job is waiting for the target cluster to be ready. | +| Forbidden | The restore is forbidden because some configurations are not supported for target cluster(the version of target cluster is too old). | +| BackOff | The restore job failed too many times, and is backoff. | +| ClusterCreating | (Only for cloning cluster) The target cluster is being created. | + +## Status of TigerGraphBackupSchedule + +You can check the status of all TigerGraphBackupSchedules in a specific namespace by running the following command: + +```bash +# if you have kubectl tg installed +kubectl tg backup-schedule list -n $NAMESPACE + +# alternatively, use kubectl +kubectl get tgbackupsch -n $NAMESPACE +``` + +The output will be similar to the following: + +```bash +NAME CLUSTER TAG LASTSCHEDULE LASTSUCCESSFUL SUCCESS FAILURE STATUS AGE +backupsch test-cluster sch 50s 37s 1 Succeed 2m20s +``` + +The field `STATUS` shows the status of the backup schedule. + +You can also check the status of a specific TigerGraphBackupSchedule by running the following command: + +```bash +# if you have kubectl tg installed +kubectl tg backup-schedule status -n $NAMESPACE --name $BACKUP_SCHEDULE_NAME + +# alternatively, use kubectl +kubectl describe tgbackupsch $BACKUP_SCHEDULE_NAME -n $NAMESPACE +``` + +You can check the Status field in the output to see the status of the backup schedule: + +```bash +Status: + Conditions: + Last Transition Time: 2024-01-03T08:25:13Z + Message: Backup job succeed. + Reason: BackupSucceed + Status: True + Type: Succeed + Job Counter: + Successful Jobs: 1 + Last Schedule Time: 2024-01-03T08:25:00Z + Last Successful Time: 2024-01-03T08:25:13Z +``` + +The `Status.Conditions[0].Type` is identical to the `STATUS` field in the output of `kubectl tg backup-schedule status`. + +The following table lists the possible values of the `Status.Conditions[0].Type` field: + +| Status.Conditions[0].Type | Description | +| --- | --- | +| Succeed | The last scheduled backup job succeeded. | +| Failed | The last scheduled backup job failed. | +| Active | The last scheduled backup job is in progress. | +| Forbidden | The backup is forbidden because some configurations are not supported for target cluster(the version of target cluster is too old). | diff --git a/k8s/docs/04-manage/backup-and-restore/troubleshoot.md b/k8s/docs/04-manage/backup-and-restore/troubleshoot.md index c12e35f2..21e717d2 100644 --- a/k8s/docs/04-manage/backup-and-restore/troubleshoot.md +++ b/k8s/docs/04-manage/backup-and-restore/troubleshoot.md @@ -1,47 +1,46 @@ -How to Debug Backup & Restore -==== +# How to Debug Backup & Restore -General Guidelines ----- +## General Guidelines * It is important to avoid initiating multiple backup and restore jobs simultaneously for the same cluster. - + Attempting to do so may result in the following outcomes: - - * If a backup job is already in progress and you attempt to create another `TigerGraphBackup` to back up the identical cluster, the controller will await the completion of the ongoing job before generating a backup job for the new `TigerGraphBackup`. - - * If a restore job is currently active and you create another `TigerGraphRestore` for the same cluster, the controller will wait for the ongoing job to finish before creating a restore job for the new `TigerGraphRestore`. - - * In case a backup job is running and you create a new `TigerGraphRestore`, or if a restore job is ongoing and you create an additional `TigerGraphBackup`, the subsequently created job will encounter failure. - + + * If a backup job is already in progress and you attempt to create another `TigerGraphBackup` to back up the identical cluster, the controller will await the completion of the ongoing job before generating a backup job for the new `TigerGraphBackup`. + + * If a restore job is currently active and you create another `TigerGraphRestore` for the same cluster, the controller will wait for the ongoing job to finish before creating a restore job for the new `TigerGraphRestore`. + + * In case a backup job is running and you create a new `TigerGraphRestore`, or if a restore job is ongoing and you create an additional `TigerGraphBackup`, the subsequently created job will encounter failure. + * If the targeted cluster for backup or restore is not in a ready state (e.g., the cluster is in an uninitialized state, undergoing shrinkage, or undergoing an upgrade),the backup/restore controller will patiently await the cluster's return to a normal state before proceeding to create the backup/restore job. - + * Up to three pods responsible for executing backup or restore operations will be maintained for each cluster. These pods can prove useful for debugging purposes. - + * Should the backup process extend beyond the configured backup schedule interval, resulting in a duration exceeding the scheduled backup window, the scheduled backup will be missed. For instance, if your backup schedule is set to `0 * * * *`, indicating an hourly backup at the 1st minute of each hour, and if a backup process takes 1.5 hours, a backup job initiated at 00:00 will conclude at 01:30, leading to the scheduled 01:00 backup job being skipped. -Debug backup or restore job ----- +## Debug backup or restore job + When dealing with backup and restore jobs, it's important to be able to troubleshoot and diagnose any issues that may arise. Here's a guide on how to debug backup and restore operations: 1. **List Pods**: To begin, you can list pods running backup in the specified namespace using the following command: - ```bash - kubectl get pods -n NAMESPACE -l tigergraph.com/backup-cluster=test-cluster - ``` + ```bash + kubectl get pods -n NAMESPACE -l tigergraph.com/backup-cluster=test-cluster + ``` + + This will give you an overview of pods running backup for test-cluster in the specified namespace. You can replace "test-cluster" with the name of your cluster. - This will give you an overview of pods running backup for test-cluster in the specified namespace. You can replace "test-cluster" with the name of your cluster. - ```bash - NAME READY STATUS RESTARTS AGE - test-cluster-backup-local-backup-job-7sbcs 0/1 Completed 0 2d - test-cluster-backup-local-backup-job-7xd58 0/1 Error 0 5d13h - ``` + ```bash + NAME READY STATUS RESTARTS AGE + test-cluster-backup-local-backup-job-7sbcs 0/1 Completed 0 2d + test-cluster-backup-local-backup-job-7xd58 0/1 Error 0 5d13h + ``` -1. **Identify Backup and Restore Pods**: Look for pods related to backup and restore operations. These pods are typically named `${BACKUP_NAME}-backup-job-{SUFFIX}` for backup jobs and `${RESTORE_NAME}-restore-job-{SUFFIX}` for restore jobs. +2. **Identify Backup and Restore Pods**: Look for pods related to backup and restore operations. These pods are typically named `${BACKUP_NAME}-backup-job-{SUFFIX}` for backup jobs and `${RESTORE_NAME}-restore-job-{SUFFIX}` for restore jobs. -2. **Check Pod Status**: Check the status of the pods. If a pod's status is "Error" or not in the "Running" state, it indicates an issue with the backup or restore process. +3. **Check Pod Status**: Check the status of the pods. If a pod's status is "Error" or not in the "Running" state, it indicates an issue with the backup or restore process. -3. **View Logs**: To view the logs of a specific pod, you can use the following command: +4. **View Logs**: To view the logs of a specific pod, you can use the following command: ```bash kubectl logs $POD_NAME -n NAMESPACE @@ -49,39 +48,37 @@ When dealing with backup and restore jobs, it's important to be able to troubles Replace `$POD_NAME` with the name of the pod you want to inspect, and specify the appropriate namespace using the `-n NAMESPACE` flag. The logs may provide valuable information about any errors or issues that occurred during the backup or restore job. - ```bash - > kubectl logs test-cluster-backup-job-7xd58 - Warning: Permanently added '[test-cluster-internal-service.default]:10022' (ED25519) to the list of known hosts. - Fri Dec 16 13:44:19 UTC 2022 - Start configure backup - [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. - [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. - Use Local Storage - [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. - [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. - [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. - Apply config - [Warning] No difference from staging config, config apply is skipped. - [ Info] Successfully applied configuration change. Please restart services to make it effective immediately. - Create backup - [ Error] NotReady (check backup dependency service online get error: NotReady (GPE is not available; NotReady (GSE is not available))) - ``` + ```bash + > kubectl logs test-cluster-backup-job-7xd58 + Warning: Permanently added '[test-cluster-internal-service.default]:10022' (ED25519) to the list of known hosts. + Fri Dec 16 13:44:19 UTC 2022 + Start configure backup + [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. + [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. + Use Local Storage + [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. + [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. + [ Info] Configuration has been changed. Please use 'gadmin config apply' to persist the changes. + Apply config + [Warning] No difference from staging config, config apply is skipped. + [ Info] Successfully applied configuration change. Please restart services to make it effective immediately. + Create backup + [ Error] NotReady (check backup dependency service online get error: NotReady (GPE is not available; NotReady (GSE is not available))) + ``` + 5. **Troubleshoot Errors**: Examine the logs for any error messages or warnings. These messages can help you identify the root cause of the problem. Common issues could include connectivity problems, resource limitations, or configuration errors. For instance, in above logs we can know that the reason of this error is that GPE is not ready. 6. **Verify Configuration**: Double-check the configuration options provided for the backup or restore job. Ensure that paths, destinations, tags, and other settings are correctly specified. 7. **Permissions and Secrets**: Ensure that any necessary permissions, access keys, or secrets (such as AWS credentials) are correctly configured and accessible to the pods. -8. **Retry or Rerun**: If the issue is transient, you might consider retrying the backup or restore operation. You can also delete failed pods and trigger the job again. - -9. **Documentation**: Refer to the official documentation for TigerGraph's backup and restore features for more detailed troubleshooting steps and specific error messages. - -By following these steps, you can effectively troubleshoot and resolve issues with backup and restore operations. +8. **Retry or Rerun**: If the issue is transient, you might consider retrying the backup or restore operation. You can also delete failed pods and trigger the job again. +9. **Documentation**: Refer to the official documentation for TigerGraph's backup and restore features for more detailed troubleshooting steps and specific error messages. +By following these steps, you can effectively troubleshoot and resolve issues with backup and restore operations. -Debug backup schedule job ----- +## Debug backup schedule job When debugging backup schedules in TigerGraph, you may encounter issues with the scheduled backup jobs. Here's a step-by-step guide on how to troubleshoot and debug backup schedule problems: @@ -105,4 +102,4 @@ When debugging backup schedules in TigerGraph, you may encounter issues with the 5. **Analyze Logs**: Carefully analyze the logs to identify any error messages, warnings, or anomalies. Look for clues that may point to the cause of the issue, such as connectivity problems, configuration errors, or resource limitations. -By following these steps, you can effectively troubleshoot and resolve issues with backup schedule operations. If you encounter specific error messages or need further assistance, you can refer to the documentation or seek help. \ No newline at end of file +By following these steps, you can effectively troubleshoot and resolve issues with backup schedule operations. If you encounter specific error messages or need further assistance, you can refer to the documentation or seek help. diff --git a/k8s/docs/04-manage/operator-upgrade.md b/k8s/docs/04-manage/operator-upgrade.md index 00275df3..df733e84 100644 --- a/k8s/docs/04-manage/operator-upgrade.md +++ b/k8s/docs/04-manage/operator-upgrade.md @@ -2,6 +2,24 @@ This document provides step-by-step instructions for upgrading the TigerGraph Kubernetes Operator using the kubectl-tg plugin. +- [How to upgrade TigerGraph Kubernetes Operator](#how-to-upgrade-tigergraph-kubernetes-operator) + - [Install Operator 0.0.7 and TigerGraph 3.9.2](#install-operator-007-and-tigergraph-392) + - [Install Operator 0.0.7](#install-operator-007) + - [Install TigerGraph 3.9.2](#install-tigergraph-392) + - [Upgrade Operator and CRD](#upgrade-operator-and-crd) + - [Install the latest kubectl-tg plugin](#install-the-latest-kubectl-tg-plugin) + - [Upgrade CRD to the latest version](#upgrade-crd-to-the-latest-version) + - [Upgrade Operator to the latest version](#upgrade-operator-to-the-latest-version) + - [How to upgrade for mandatory change](#how-to-upgrade-for-mandatory-change) + - [How to upgrade for optional change](#how-to-upgrade-for-optional-change) + - [How to upgrade for a specific breaking change](#how-to-upgrade-for-a-specific-breaking-change) + - [Key considerations for upgrading Operator to 0.1.0 and TigerGraph to 3.10.0](#key-considerations-for-upgrading-operator-to-010-and-tigergraph-to-3100) + - [Exposing Nginx Service Instead of Exposing RESTPP and GST (Tools,GUI) Services](#exposing-nginx-service-instead-of-exposing-restpp-and-gst-toolsgui-services) + - [Performing a Full Backup Before Running HA, Shrink, and Expand Operations](#performing-a-full-backup-before-running-ha-shrink-and-expand-operations) + - [Upgrade TG Cluster](#upgrade-tg-cluster) + - [Upgrading a legacy TG Cluster](#upgrading-a-legacy-tg-cluster) + - [Upgrading to a TG Cluster with Multiple PVCs](#upgrading-to-a-tg-cluster-with-multiple-pvcs) + ## Install Operator 0.0.7 and TigerGraph 3.9.2 If you have previously installed an older version of the Operator and TigerGraph cluster, you can skip this section. This section is only for verifying operator upgrading. @@ -121,3 +139,117 @@ kubectl tg create --cluster-name test-cluster --license xxxxxxxxxxxxxxxxxxxxxxxx If you don't require the new optional configuration of the CRD, no extra steps are needed. However, if you wish to use the new optional configuration, you can simply update the cluster as needed. + +## How to upgrade for a specific breaking change + +In order to optimize the user experience of the operator, such as improving ease of use and removing some configurations that are no longer used, Operator upgrading and TigerGraph upgrading may bring breaking changes. If you have an old version of Operator and TigerGraph, please follow the key considerations and upgrading steps carefully. + +### Key considerations for upgrading Operator to 0.1.0 and TigerGraph to 3.10.0 + +#### Exposing Nginx Service Instead of Exposing RESTPP and GST (Tools,GUI) Services + +- The new Custom Resource Definition (CRD) is applicable starting from TigerGraph 3.9.2. In the TigerGraph Cluster before 3.9.2, after the CRD is updated, the image must be upgraded to 3.9.2 or later. + + ```bash + kubectl tg update --cluster-name ${cluster_name} --version 3.9.2 -n ${NAMESPACE_OF_YOUR_CLUSTER} + ``` + +- If using the new CRD (0.1.0) with the old version TG image, the NGINX service cannot serve correctly on Tools, RESTPP, and informant services. +- If using the old CRD (<=0.0.9), the Tools (GUI), RESTPP, and informant services cannot serve on 3.6.3 but can serve on 3.7.0, 3.8.0, 3.9.1, 3.9.2, and 3.9.3. + +#### Performing a Full Backup Before Running HA, Shrink, and Expand Operations + +If the HA Update process is broken by an interrupter operation such as the update pod being killed or the update job being killed, then the TG Cluster may be damaged and unrecoverable automatically, even if the HA update pod is recreated and reruns the HA update job. As a customer, it is necessary to prepare a full backup before running the HA update. + +> [!WARNING] +> If the license is expired, the shrink/expand process will be stuck for a long time at exporting/importing graph data and will finally fail. + +#### Upgrade TG Cluster + +##### Upgrading a legacy TG Cluster + +- The best and fastest way to upgrade from an old operator + CRD to a new operator + CRD is: + + - Uninstall operator + + ```bash + kubectl tg uninstall + ``` + + - Delete all CRDs + + ```bash + kubectl delete crd tigergraphbackups.graphdb.tigergraph.com + kubectl delete crd tigergraphbackupschedules.graphdb.tigergraph.com + kubectl delete crd tigergraphrestores.graphdb.tigergraph.com + kubectl delete crd tigergraphs.graphdb.tigergraph.com + ``` + + - **DO NOT** delete PVCs of the TigerGraph Cluster + - Install the new operator, it applies the new CRD + - Upgrade the TigerGraph version before uninstalling the legacy operator or after installing the new operator. + +##### Upgrading to a TG Cluster with Multiple PVCs + +Best Practice for Upgrading TigerGraph <=3.9.3 with only one PVC and Operator <=0.0.9 to TigerGraph 3.10.0 with multiple PVCs and Operator 0.1.0: + +- In the environment of TigerGraph <=3.9.3 and Operator <=0.0.9, perform a backup of TigerGraph to S3 or local storage. + +```bash +kubectl tg backup create + --namespace $ns \ + --name $name \ + --cluster-name $YOUR_CLUSTER_NAME \ + --destination s3Bucket \ + --s3-bucket $bucket \ + --tag $tag \ + --timeout $time_out \ + --aws-secret $aws_secret +``` + +- If using local backup, ensure the local backup is stored outside the Pod for persistent storage. +- Remove the legacy TG and delete TigerGraph's PVC. + +```bash +kubectl tg delete --cluster-name $YOUR_CLUSTER_NAME --namespace $YOUR_NAMESPACE --cascade +kubectl delete pvc --namespace default -l tigergraph.com/cluster-name=$YOUR_CLUSTER_NAME +``` + +- Uninstall the legacy Operator and delete all TigerGraph CRDs. + +```bash +kubectl tg uninstall +kubectl delete crd tigergraphbackups.graphdb.tigergraph.com +kubectl delete crd tigergraphbackupschedules.graphdb.tigergraph.com +kubectl delete crd tigergraphrestores.graphdb.tigergraph.com +kubectl delete crd tigergraphs.graphdb.tigergraph.com +``` + +- Install the new Operator 0.1.0, which will concurrently install new CRDs. + +```bash +curl https://dl.tigergraph.com/k8s/0.1.0/kubectl-tg -o kubectl-tg +sudo install kubectl-tg /usr/local/bin/ + +kubectl tg init --namespace $YOUR_NAMESPACE --docker-registry docker.io --docker-image-repo tigergraph --image-pull-policy Always \ +--operator-version 0.1.0 --operator-size 3 --cluster-scope true +``` + +- Create a new TG with the same version as before and customize by adding `additionalStorage` and `customVolume`. + +```bash +kubectl tg restore \ + --namespace $ns \ + --name "${name}-restore" \ + --cluster-name $YOUR_CLUSTER_NAME \ + --source s3Bucket \ + --s3-bucket $bucket \ + --tag $tag \ + --aws-secret $aws_secret +``` + +- Upon completion of the restore process, upgrade the TigerGraph to 3.10.0 using the appropriate command. + +```bash +kubectl tg update --cluster-name $YOUR_CLUSTER_NAME --version 3.10.0 --namespace $YOUR_NAMESPACE +``` diff --git a/k8s/docs/04-manage/pause-and-resume.md b/k8s/docs/04-manage/pause-and-resume.md new file mode 100644 index 00000000..4dd58f18 --- /dev/null +++ b/k8s/docs/04-manage/pause-and-resume.md @@ -0,0 +1,273 @@ +# Pause and Resume TigerGraph cluster + +If you have experience with Custom Resources in Kubernetes, you can modify TigerGraph CR to pause and resume TigerGraph cluster. Alternatively, you can also use `kubectl tg` command to pause and resume TigerGraph cluster. + +When you pause a TigerGraph cluster, the TigerGraph Operator will clean **all services** of the cluster and delete **all computing resources** including all pods, while **persistent volumes will be kept** . When you resume a TigerGraph cluster, the TigerGraph Operator will create all services and pods of the cluster again. + +> [!WARNING] +> When a cluster is paused, all services will be unavailable. Please make sure that there is no active job including loading job and query running on the cluster before pausing it. + +You can pause cluster only when the cluster is in `Normal` or `Resume` status. You can resume cluster only when the cluster is in `Paused` status. That means when the cluster is initializing, upgrading, scaling or config-updating, you cannot pause the cluster. If you try to pause or resume the cluster in these cases, you will get an error message. + +- [Pause and Resume TigerGraph cluster](#pause-and-resume-tigergraph-cluster) + - [Pause and Resume TigerGraph cluster by kubectl tg](#pause-and-resume-tigergraph-cluster-by-kubectl-tg) + - [Prerequisites](#prerequisites) + - [Utilizing `kubectl tg` to pause TigerGraph cluster](#utilizing-kubectl-tg-to-pause-tigergraph-cluster) + - [Pause a running cluster](#pause-a-running-cluster) + - [Pause a running cluster along with its backup schedule](#pause-a-running-cluster-along-with-its-backup-schedule) + - [Utilizing `kubectl tg` to resume TigerGraph cluster](#utilizing-kubectl-tg-to-resume-tigergraph-cluster) + - [Resume a paused cluster](#resume-a-paused-cluster) + - [Resume a paused cluster with updated resources](#resume-a-paused-cluster-with-updated-resources) + - [Pause and Resume TigerGraph cluster by modifying TigerGraph CR](#pause-and-resume-tigergraph-cluster-by-modifying-tigergraph-cr) + - [Troubleshooting](#troubleshooting) + - [It takes too long to pause a cluster](#it-takes-too-long-to-pause-a-cluster) + - [Cluster cannot resume successfully due to insufficient resources](#cluster-cannot-resume-successfully-due-to-insufficient-resources) + +## Pause and Resume TigerGraph cluster by kubectl tg + +### Prerequisites + +The successful execution of the `kubectl tg pause|resume` commands requires that you have installed the `kubectl tg` command line tool. For more information, see [Install kubectl-tg plugin](../02-get-started/get_started.md#install-kubectl-tg-plugin). Additionally, you must create your cluster as a prerequisite step. + +### Utilizing `kubectl tg` to pause TigerGraph cluster + +```bash +Pause a running cluster + +Usage: + kubectl tg pause [options] + +Options: + -n|--namespace : set namespace to deploy TG cluster, if not set, use the default namespace in context + -c|--cluster-name : + (required)set cluster-name to deploy TG cluster, no default + --cascade : if the option is given, related backup schedules will be paused +``` + +#### Pause a running cluster + +First check the name of the running cluster that you want to pause: + +```bash +kubectl tg list -n tigergraph +``` + +The output should be similar to the following: + +```bash +NAME REPLICAS CLUSTER-SIZE CLUSTER-HA CLUSTER-VERSION SERVICE-TYPE CONDITION-TYPE CONDITION-STATUS AGE +test-cluster 3 3 2 docker.io/tigergraph/tigergraph-k8s:3.9.3 LoadBalancer Normal True 12m +``` + +Use the following command to pause a running cluster: + +```bash +kubectl tg pause --cluster-name test-cluster -n tigergraph +``` + +Check the status of the cluster: + +```bash +kubectl tg list -n tigergraph + +NAME REPLICAS CLUSTER-SIZE CLUSTER-HA CLUSTER-VERSION SERVICE-TYPE CONDITION-TYPE CONDITION-STATUS AGE +test-cluster 3 3 2 docker.io/tigergraph/tigergraph-k8s:3.9.3 LoadBalancer PauseRoll Unknown 13m +``` + +Wait for several minutes until the cluster is paused. You can check the status of the cluster: + +```bash +kubectl tg list -n tigergraph + +NAME REPLICAS CLUSTER-SIZE CLUSTER-HA CLUSTER-VERSION SERVICE-TYPE CONDITION-TYPE CONDITION-STATUS AGE +test-cluster 3 3 2 docker.io/tigergraph/tigergraph-k8s:3.9.3 LoadBalancer Paused True 13m +``` + +#### Pause a running cluster along with its backup schedule + +You may have created a backup schedule for your cluster. When you pause the cluster, the backup schedule will still try to backup the cluster according to the schedule. So it's better to pause all backup schedule of the cluster. If you want to pause the cluster along with its backup schedule, you can use the `--cascade` option: + +```bash +kubectl tg pause --cluster-name test-cluster -n tigergraph --cascade +``` + +### Utilizing `kubectl tg` to resume TigerGraph cluster + +```bash +Resume a paused cluster + +Usage: + kubectl tg resume [options] + +Options: + -n|--namespace : set namespace to deploy TG cluster, if not set, use the default namespace in context + -c|--cluster-name : + (required)set cluster-name to deploy TG cluster, no default + -k|--private-key-secret : + set the secret name of private ssh key + --service-account-name : + set the name of the service account for TG cluster pod, default empty + --listener-type : update TG cluster listener type, available types: NodePort, LoadBalancer, and Ingress, default LoadBalancer + --nginx-node-port : update node port of TG cluster external nginx service when listener type is NodePort, default as 30240 + --nginx-host : update host name of TG cluster external nginx service when listener type is Ingress, default empty + --secret-name : update secret name of TG cluster external service when listener type is Ingress, default empty + --listener-labels : add the labels to TG services, your input should be like 'k1=v1,k2="v2 with space"' + --listener-annotations : + add the annotations to TG services, your input should be like 'k1=v1,k2="v2 with space"' + --cpu : update TG cluster cpu size of every instance, default as 8000m + --cpu-limit : limit cpu size of every instance, if you set it to 0, cpu is not limited + --memory : update TG cluster memory size of every instance, default as 16Gi + --memory-limit : limit memory size of every instance, if you set it to 0, memory is not limited + --affinity : give a YAML file to specify the nodeSelector,affinity and tolerations for TigerGraph pods + --custom-containers : + give a YAML file to add sidecar containers,init containers and sidecar volumes to TigerGraph pods + --pod-labels : add some customized labels to all pods, your input should be like like 'k1=v1,k2="v2 with space"' + --pod-annotations : add some customized annotations to all pods, your input should be like like 'k1=v1,k2="v2 with space"' +``` + +#### Resume a paused cluster + +First check the name of the paused cluster that you want to resume: + +```bash +kubectl tg list -n tigergraph +``` + +The output should be similar to the following: + +```bash +NAME REPLICAS CLUSTER-SIZE CLUSTER-HA CLUSTER-VERSION SERVICE-TYPE CONDITION-TYPE CONDITION-STATUS AGE +test-cluster 3 2 docker.io/tigergraph/tigergraph-k8s:3.9.3 LoadBalancer Paused True 27m +``` + +Use the following command to resume a paused cluster: + +```bash +kubectl tg resume --cluster-name test-cluster -n tigergraph +``` + +Check the status of the cluster: + +```bash +kubectl tg list -n tigergraph -w + +NAME REPLICAS CLUSTER-SIZE CLUSTER-HA CLUSTER-VERSION SERVICE-TYPE CONDITION-TYPE CONDITION-STATUS AGE +test-cluster 3 3 2 docker.io/tigergraph/tigergraph-k8s:3.9.3 LoadBalancer ResumeRoll Unknown 29m +``` + +Wait for several minutes until the cluster is resumed. You can check the status of the cluster: + +```bash +kubectl tg list -n tigergraph + +NAME REPLICAS CLUSTER-SIZE CLUSTER-HA CLUSTER-VERSION SERVICE-TYPE CONDITION-TYPE CONDITION-STATUS AGE +test-cluster 3 3 2 docker.io/tigergraph/tigergraph-k8s:3.9.3 LoadBalancer Normal True 30m +``` + +And you can check that all pods of the cluster are running: + +```bash +kubectl get pods -n tigergraph + +NAME READY STATUS RESTARTS AGE +test-cluster-0 1/1 Running 0 8m27s +test-cluster-1 1/1 Running 0 8m27s +test-cluster-2 1/1 Running 0 8m27s +test-cluster-init-job-fj8q2 0/1 Completed 0 36m +tigergraph-operator-controller-manager-99b6fb86d-4kn49 2/2 Running 0 37m +``` + +#### Resume a paused cluster with updated resources + +If you use above command to resume a paused cluster, the cluster will be resumed with the same resources as before. If you want to update the resources of the cluster, you can add other options to the command. For example, you can update the cpu and memory of the cluster while resuming it: + +```bash +kubectl tg resume --cluster-name test-cluster -n tigergraph --cpu 4000m --memory 7Gi +``` + +Then the cluster will be resumed with the updated resources. You should be careful when you update the resources of the cluster. If you set the cpu or memory to a smaller value, the cluster may not work properly. If you set the cpu or memory to a larger value, the cluster may not be able to be scheduled to a node. So you should make sure that the updated resources are suitable for your cluster. + +> [!NOTE] +> If you set wrong resources and the cluster cannot be scheduled to a node, you can pause the cluster and resume it with corrected resources again. + +## Pause and Resume TigerGraph cluster by modifying TigerGraph CR + +To pause or resume a TigerGraph cluster, you only need to modify the `.spec.pause` field of the TigerGraph CR. If you set the `pause` field to `true`, the cluster will be paused. If you set the `pause` field to `false`, the cluster will be resumed. But you have to make sure that the cluster is in `Normal` or `Resume` status when you pause it, and the cluster is in `Paused` status when you resume it. Otherwise the webhooks of the TigerGraph Operator will reject your request to change this field. + +Here is an example of pausing a cluster: + +```yaml +apiVersion: graphdb.tigergraph.com/v1alpha1 +kind: TigerGraph +metadata: + name: test-cluster +spec: + ha: 1 + image: docker.io/tigergraph/tigergraph-k8s:3.9.3 + imagePullPolicy: Always + imagePullSecrets: + - name: tigergraph-image-pull-secret + license: YOUR_LICENSE + listener: + type: LoadBalancer + privateKeyName: ssh-key-secret + replicas: 1 + resources: + requests: + cpu: "4" + memory: 8Gi + storage: + type: persistent-claim + volumeClaimTemplate: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10G + storageClassName: standard + volumeMode: Filesystem + # pause the cluster. when set as false, the cluster will be resumed + pause: true +``` + +## Troubleshooting + +### It takes too long to pause a cluster + +When operator deletes all pods of the cluster, the pods will perform a graceful shutdown. So it may take a few minutes to pause the cluster. The maximum time is **6 minutes**. It's better to wait for the pods to be deleted instead of deleting them forcibly. If it takes more than 6 minutes to pause the cluster and the cluster is stuck in `PauseRoll` status, you can try to delete the pods forcibly. + +### Cluster cannot resume successfully due to insufficient resources + +When you resume a cluster, the TigerGraph Operator will create all pods of the cluster again. If there is not enough resources in the cluster, the pods may not be Pending forever: + +```bash +kubectl tg resume -c test-cluster -n tigergraph + +$ kubectl get pods -n tigergraph +NAME READY STATUS RESTARTS AGE +test-cluster-0 0/1 Pending 0 5s +test-cluster-1 0/1 Pending 0 5s +test-cluster-2 0/1 Pending 0 5s +test-cluster-init-job-fj8q2 0/1 Completed 0 3h41m +tigergraph-operator-controller-manager-99b6fb86d-4kn49 2/2 Running 0 3h42m +``` + +Use `kubectl describe pod` to check the reason why the pods are pending: + +```bash +kubectl describe pods test-cluster-0 -n tigergraph + +... +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Warning FailedScheduling 63s default-scheduler 0/1 nodes are available: 1 Insufficient cpu. preemption: 0/1 nodes are available: 1 No preemption victims found for incoming pod. +``` + +In this case, you can try to pause the cluster and resume the cluster with smaller resources request. + +```bash +kubectl tg pause -c test-cluster -n tigergraph + +kubectl tg resume -c test-cluster -n tigergraph --cpu 4 --memory 8Gi +``` diff --git a/k8s/docs/05-troubleshoot/cluster-deployment.md b/k8s/docs/05-troubleshoot/cluster-deployment.md index 9613564e..cb36e6a8 100644 --- a/k8s/docs/05-troubleshoot/cluster-deployment.md +++ b/k8s/docs/05-troubleshoot/cluster-deployment.md @@ -548,11 +548,11 @@ If you've successfully created the StatefulSet and cluster pods for your TigerGr ### Conflict port for NodePort Listener type -If you encounter conflicts with port allocation when creating or updating a cluster with `LISTENER_TYPE=NodePort` and specified `rest-node-port` or `gui-node-port` values that conflict with in-use ports, you will receive an error message. To resolve this issue, specify available ports for these services: +If you encounter conflicts with port allocation when creating or updating a cluster with `LISTENER_TYPE=NodePort` and specified `nginx-node-port` values that conflict with in-use ports, you will receive an error message. To resolve this issue, specify available ports for these services: ```bash # Create a cluster with --listener-type NodePort, and there is a tg cluster using the default port 30090, 30240 -kubectl tg create --cluster-name tg-cluster-2 --listener-type NodePort --rest-node-port 30092 --gui-node-port 30242 +kubectl tg create --cluster-name tg-cluster-2 --listener-type NodePort --nginx-node-port 30240 # Check the CR, it indicates the provided port is already allocated. kubectl describe tigergraph.graphdb.tigergraph.com/tg-cluster-2 @@ -561,5 +561,83 @@ Events: ---- ------ ---- ---- ------- Normal Create init ConfigMap 20s TigerGraph Create a new init ConfigMap success Normal Create env ConfigMap 20s TigerGraph Create a new env ConfigMap success - Warning Failed to create external rest service 10s (x11 over 20s) TigerGraph Failed to create external service: Service "tg-cluster-2-rest-external-service" is invalid: spec.ports[0].nodePort: Invalid value: 30090: provided port is already allocated + Warning Failed to create external rest service 10s (x11 over 20s) TigerGraph Failed to create external service: Service "tg-cluster-2-rest-external-service" is invalid: spec.ports[0].nodePort: Invalid value: 30240: provided port is already allocated ``` + +### TigerGraph Status is empty and Pods are not created + +This issue may happen when: + +1. Upgrade operator by `kubectl tg upgrade` command +2. Resize Node Pool to 0 or delete Node Pool in GKE/EKS clusters + +When you create a TigerGraph CR, and run `kubectl get tg -n $NAMESPACE`, you will find that the status of the cluster is empty: + +```bash +$ kubectl get tg -n tigergraph -w +NAME REPLICAS CLUSTER-SIZE CLUSTER-HA CLUSTER-VERSION SERVICE-TYPE CONDITION-TYPE CONDITION-STATUS AGE +test-cluster docker.io/tginternal/tigergraph-k8s:3.9.3 LoadBalancer 1m +``` + +And when you run `kubectl get pods -n $NAMESPACE`, no pod has been created for this CR. The possible reason is that the reconcile is not triggered due to an issue of controller-runtime package. The log of operator will be like: + +```bash +I0117 06:53:09.436291 1 request.go:690] Waited for 1.045716414s due to client-side throttling, not priority and fairness, request: GET:https://10.12.0.1:443/apis/autoscaling/v2?timeout=32s +2024-01-17T06:53:10Z INFO controller-runtime.metrics Metrics server is starting to listen {"addr": "127.0.0.1:8080"} +2024-01-17T06:53:10Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraph", "path": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraph"} +2024-01-17T06:53:10Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraph"} +2024-01-17T06:53:10Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraph", "path": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraph"} +2024-01-17T06:53:10Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraph"} +2024-01-17T06:53:10Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraphBackup", "path": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraphbackup"} +2024-01-17T06:53:10Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraphbackup"} +2024-01-17T06:53:10Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraphBackup", "path": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraphbackup"} +2024-01-17T06:53:10Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraphbackup"} +2024-01-17T06:53:10Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraphBackupSchedule", "path": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraphbackupschedule"} +2024-01-17T06:53:10Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraphbackupschedule"} +2024-01-17T06:53:10Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraphBackupSchedule", "path": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraphbackupschedule"} +2024-01-17T06:53:10Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraphbackupschedule"} +2024-01-17T06:53:10Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraphRestore", "path": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraphrestore"} +2024-01-17T06:53:10Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraphrestore"} +2024-01-17T06:53:10Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraphRestore", "path": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraphrestore"} +2024-01-17T06:53:10Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraphrestore"} +2024-01-17T06:53:10Z INFO setup starting manager +2024-01-17T06:53:10Z INFO controller-runtime.webhook.webhooks Starting webhook server +2024-01-17T06:53:10Z INFO Starting server {"path": "/metrics", "kind": "metrics", "addr": "127.0.0.1:8080"} +2024-01-17T06:53:10Z INFO Starting server {"kind": "health probe", "addr": "[::]:8081"} +I0117 06:53:10.444024 1 leaderelection.go:248] attempting to acquire leader lease tigergraph/9d6fe668.tigergraph.com... +2024-01-17T06:53:10Z INFO controller-runtime.certwatcher Updated current TLS certificate +2024-01-17T06:53:10Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} +2024-01-17T06:53:10Z INFO controller-runtime.certwatcher Starting certificate watcher +I0117 06:53:28.295175 1 leaderelection.go:258] successfully acquired lease tigergraph/9d6fe668.tigergraph.com +2024-01-17T06:53:28Z DEBUG events tigergraph-operator-controller-manager-65fbf7689b-zg6h9_301c1e20-6188-46f8-b548-feaeb28e542a became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"tigergraph","name":"9d6fe668.tigergraph.com","uid":"af0adb63-c1e8-441d-aa81-58507ab90c7f","apiVersion":"coordination.k8s.io/v1","resourceVersion":"1414049"}, "reason": "LeaderElection"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraph", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraph", "source": "kind source: *v1alpha1.TigerGraph"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraphbackupschedule", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphBackupSchedule", "source": "kind source: *v1alpha1.TigerGraphBackupSchedule"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraph", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraph", "source": "kind source: *v1.ConfigMap"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraph", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraph", "source": "kind source: *v1.Service"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraph", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraph", "source": "kind source: *v1.StatefulSet"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraph", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraph", "source": "kind source: *v1.Job"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraph", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraph", "source": "kind source: *v1.Ingress"} +2024-01-17T06:53:28Z INFO Starting Controller {"controller": "tigergraph", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraph"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraphbackupschedule", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphBackupSchedule", "source": "kind source: *v1.CronJob"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraphbackup", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphBackup", "source": "kind source: *v1alpha1.TigerGraphBackup"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraphbackup", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphBackup", "source": "kind source: *v1.Job"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraphrestore", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphRestore", "source": "kind source: *v1alpha1.TigerGraphRestore"} +2024-01-17T06:53:28Z INFO Starting EventSource {"controller": "tigergraphrestore", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphRestore", "source": "kind source: *v1.Job"} +2024-01-17T06:53:28Z INFO Starting Controller {"controller": "tigergraphrestore", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphRestore"} +2024-01-17T06:53:28Z INFO Starting Controller {"controller": "tigergraphbackupschedule", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphBackupSchedule"} +2024-01-17T06:53:28Z INFO Starting Controller {"controller": "tigergraphbackup", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphBackup"} +2024-01-17T06:53:28Z INFO Starting workers {"controller": "tigergraphrestore", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphRestore", "worker count": 1} +2024-01-17T06:53:28Z INFO Starting workers {"controller": "tigergraphbackupschedule", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphBackupSchedule", "worker count": 1} +2024-01-17T06:53:28Z INFO Starting workers {"controller": "tigergraph", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraph", "worker count": 1} +2024-01-17T06:53:28Z INFO Starting workers {"controller": "tigergraphbackup", "controllerGroup": "graphdb.tigergraph.com", "controllerKind": "TigerGraphBackup", "worker count": 1} +2024-01-17T06:54:19Z DEBUG controller-runtime.webhook.webhooks received request {"webhook": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraph", "UID": "89ee67ac-a8c5-4b8f-b147-eac94380f50c", "kind": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraph", "resource": {"group":"graphdb.tigergraph.com","version":"v1alpha1","resource":"tigergraphs"}} +2024-01-17T06:54:19Z INFO tigergraph-resource validate delete {"name": "test-cluster", "namespace": "tigergraph"} +2024-01-17T06:54:19Z DEBUG controller-runtime.webhook.webhooks wrote response {"webhook": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraph", "code": 200, "reason": "", "UID": "89ee67ac-a8c5-4b8f-b147-eac94380f50c", "allowed": true} +2024-01-17T07:16:47Z DEBUG controller-runtime.webhook.webhooks received request {"webhook": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraph", "UID": "a571285b-c58f-4341-baeb-5a18db28c4a9", "kind": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraph", "resource": {"group":"graphdb.tigergraph.com","version":"v1alpha1","resource":"tigergraphs"}} +2024-01-17T07:16:47Z DEBUG controller-runtime.webhook.webhooks wrote response {"webhook": "/mutate-graphdb-tigergraph-com-v1alpha1-tigergraph", "code": 200, "reason": "", "UID": "a571285b-c58f-4341-baeb-5a18db28c4a9", "allowed": true} +2024-01-17T07:16:47Z DEBUG controller-runtime.webhook.webhooks received request {"webhook": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraph", "UID": "744ec62b-fe79-4bf1-868e-bb5946422bae", "kind": "graphdb.tigergraph.com/v1alpha1, Kind=TigerGraph", "resource": {"group":"graphdb.tigergraph.com","version":"v1alpha1","resource":"tigergraphs"}} +2024-01-17T07:16:47Z INFO tigergraph-resource validate create {"name": "test-cluster2", "namespace": "tigergraph"} +2024-01-17T07:16:47Z DEBUG controller-runtime.webhook.webhooks wrote response {"webhook": "/validate-graphdb-tigergraph-com-v1alpha1-tigergraph", "code": 200, "reason": "", "UID": "744ec62b-fe79-4bf1-868e-bb5946422bae", "allowed": true} +``` + +You can see there are logs output by webhooks, which means webhooks work well. But when webhooks accept creation of TigerGraph CR, the reconcile for TigerGraph CR is not triggered. If you encounter this issue, you can uninstall the running operator by `kubectl tg uninstall --namespace $NAMESPACE`, and install it again. Then the reconcile will be triggered properly. diff --git a/k8s/docs/05-troubleshoot/cluster-management.md b/k8s/docs/05-troubleshoot/cluster-management.md index 362172d5..2319f7f7 100644 --- a/k8s/docs/05-troubleshoot/cluster-management.md +++ b/k8s/docs/05-troubleshoot/cluster-management.md @@ -2,6 +2,22 @@ This document provides solutions for common issues that may arise during the management of a TigerGraph cluster in Kubernetes. +- [TigerGraph Cluster management Troubleshooting](#tigergraph-cluster-management-troubleshooting) + - [Troubleshooting Steps for updating cluster](#troubleshooting-steps-for-updating-cluster) + - [Potential failure of update](#potential-failure-of-update) + - [Troubleshooting Steps for upgrading cluster](#troubleshooting-steps-for-upgrading-cluster) + - [Potential failure of upgrading](#potential-failure-of-upgrading) + - [Troubleshooting Steps for scaling cluster](#troubleshooting-steps-for-scaling-cluster) + - [Expansion](#expansion) + - [Potential failure of expansion](#potential-failure-of-expansion) + - [Shrinking](#shrinking) + - [Potential Causes](#potential-causes) + - [Troubleshooting Steps for External Service Accessibility](#troubleshooting-steps-for-external-service-accessibility) + - [TigerGraph GUI](#tigergraph-gui) + - [Troubleshooting Steps for customizing and updating license/TigerGraph configurations](#troubleshooting-steps-for-customizing-and-updating-licensetigergraph-configurations) + - [Customizing TigerGraph configurations during initialization](#customizing-tigergraph-configurations-during-initialization) + - [Updating TigerGraph configurations and license when cluster is running](#updating-tigergraph-configurations-and-license-when-cluster-is-running) + ## Troubleshooting Steps for updating cluster - Verify the CPU and memory resources of the cluster Custom Resource (CR) have been updated: @@ -461,3 +477,108 @@ In TigerGraph 3.6.3, Session Affinity is not supported. Direct external access t timeoutSeconds: 1800 ``` + +## Troubleshooting Steps for customizing and updating license/TigerGraph configurations + +### Customizing TigerGraph configurations during initialization + +The init-job may fail due to wrong TigerGraph configurations. Check the logs of the init-job to find the root cause. + +```bash +kubectl get pod -l job-name=test-cluster-init-job -n tigergraph +``` + +The output should be similar to the following: + +```bash +NAME READY STATUS RESTARTS AGE +test-cluster-init-job-2j4x4 0/1 Error 0 2m +``` + +Use `kubectl logs` to check the logs of the init-job: + +```bash +kubectl logs test-cluster-init-job-2j4x4 -n tigergraph +``` + +You may see logs like the following: + +```bash +Server: 10.96.0.10 +Address: 10.96.0.10:53 + +Name: test-cluster-0.test-cluster-internal-service.tigergraph.svc.cluster.local +Address: 10.244.0.36 + + +Server: 10.96.0.10 +Address: 10.96.0.10:53 + + +Name: test-cluster-1.test-cluster-internal-service.tigergraph.svc.cluster.local +Address: 10.244.0.37 + +Server: 10.96.0.10 +Address: 10.96.0.10:53 + + +Name: test-cluster-2.test-cluster-internal-service.tigergraph.svc.cluster.local +Address: 10.244.0.38 + +Warning: Permanently added '[test-cluster-0.test-cluster-internal-service.tigergraph]:10022' (ED25519) to the list of known hosts. +HOST_LIST: [{"Hostname":"test-cluster-0.test-cluster-internal-service","ID":"m1","Region":""},{"Hostname":"test-cluster-1.test-cluster-internal-service","ID":"m2","Region":""},{"Hostname":"test-cluster-2.test-cluster-internal-service","ID":"m3","Region":""}] +[Warning] For HA setup, there might be some nodes unused since replication factor 2 is not a factor of machine number 3 +the bucket bit is 5 +false +[Wed Dec 20 06:39:02 UTC 2023] set config entry Controller.ServiceManager.AutoRestart to true +Log mode is prod +[Wed Dec 20 06:39:02 UTC 2023] start setting TigerGraph configurations +[ Error] config entry WRONG_CONFIG not found +[ Error] ParameterErr (failed to set one or more config entries) +``` + +The error message `[ Error] config entry WRONG_CONFIG not found` means that you have set a wrong configuration entry in the CR. Check the CR and make sure the configuration entry is correct according to the [Configuration Parameters](https://docs.tigergraph.com/tigergraph-server/current/reference/configuration-parameters). Once you have corrected the configuration entry, you can update the CR and wait for the init-job to be recreated automatically. + +### Updating TigerGraph configurations and license when cluster is running + +You can updae TigerGraph configurations and license when the cluster is running. This will trigger a config-update job to update the configurations and license in the cluster. The config-update job may fail if you set a wrong configuration entry or wrong license in the CR. Check the logs of the config-update job to find the root cause. + +```bash +kubectl get pod -l job-name=test-cluster-config-update-job -n tigergraph +``` + +The output should be similar to the following: + +```bash +NAME READY STATUS RESTARTS AGE +test-cluster-config-update-job-9p6bt 0/1 Error 0 18s +``` + +Use `kubectl logs` to check the logs of the config-update-job: + +```bash +kubectl logs test-cluster-config-update-job-9p6bt -n tigergraph +``` + +If the output is like + +```bash +Warning: Permanently added '[test-cluster-0.test-cluster-internal-service.tigergraph]:10022' (ED25519) to the list of known hosts. +[Wed Dec 20 07:07:22 UTC 2023] Start updating TigerGraph configurations +[ Error] config entry WRONG_CONFIG not found +[ Error] ParameterErr (failed to set one or more config entries) +``` + +You need to check the CR and make sure the configuration entry is correct according to the [Configuration Parameters](https://docs.tigergraph.com/tigergraph-server/current/reference/configuration-parameters). + +If the output is like + +```bash +Warning: Permanently added '[test-cluster-0.test-cluster-internal-service.tigergraph]:10022' (ED25519) to the list of known hosts. +[Wed Dec 20 07:12:43 UTC 2023] Start setting license +[ Error] ExternalError (failed to set license; token contains an invalid number of segments) +``` + +You need to check the CR and make sure the license is correct. + +Once you correct TigerGraph configurations and license in the CR, the config-update job will be recreated automatically. diff --git a/k8s/docs/05-troubleshoot/rolling-update.md b/k8s/docs/05-troubleshoot/rolling-update.md index 4e471993..8cec59b8 100644 --- a/k8s/docs/05-troubleshoot/rolling-update.md +++ b/k8s/docs/05-troubleshoot/rolling-update.md @@ -276,8 +276,8 @@ Init Containers: Containers: tigergraph: Container ID: containerd://f47028197d9376f558a088b5b88cb34e0f00f6639a433d115b29b493b54c2e87 - Image: docker.io/tginternal/tigergraph-k8s:3.9.2-post-start - Image ID: docker.io/tginternal/tigergraph-k8s@sha256:dd3dd058fbef7eae77cf51e622c467d290ceeaf9644b8392b5b0eec4920b84de + Image: docker.io/tigergraph/tigergraph-k8s:3.9.3 + Image ID: docker.io/tigergraph/tigergraph-k8s@sha256:dd3dd058fbef7eae77cf51e622c467d290ceeaf9644b8392b5b0eec4920b84de Ports: 9000/TCP, 14240/TCP, 22/TCP Host Ports: 0/TCP, 0/TCP, 0/TCP State: Running @@ -300,8 +300,8 @@ Events: Normal Pulled 34m kubelet Container image "alpine:3.17.2" already present on machine Normal Started 34m kubelet Started container init-tigergraph Normal Created 34m kubelet Created container init-tigergraph - Normal Pulled 34m kubelet Successfully pulled image "docker.io/tginternal/tigergraph-k8s:3.9.2-post-start" in 2.034685698s - Normal Pulled 32m kubelet Successfully pulled image "docker.io/tginternal/tigergraph-k8s:3.9.2-post-start" in 338.940713ms + Normal Pulled 34m kubelet Successfully pulled image "docker.io/tigergraph/tigergraph-k8s:3.9.2" in 2.034685698s + Normal Pulled 32m kubelet Successfully pulled image "docker.io/tigergraph/tigergraph-k8s:3.9.2" in 338.940713ms Warning FailedPreStopHook 31m (x2 over 32m) kubelet Exec lifecycle hook ([/bin/bash -c if [ "$(ls -A /home/tigergraph/tigergraph/data/|grep -v lost|tail -1)" ]; then export PATH=/home/tigergraph/tigergraph/app/cmd:$PATH @@ -318,8 +318,8 @@ Events: gadmin stop $PROCESS_ALL -y fi' exited with 1: , message: "ExternalError (Failed to get the APP root from config; The file ~/.tg.cfg either does not exist or is a broken link. Please create a new symlink at this location and point it to the tg.cfg file located in the 'configs' directory of System.DataRoot. This can be done using the following command: ln -s /path/to/System.DataRoot/configs/tg.cfg ~/.tg.cfg; open /home/tigergraph/.tg.cfg: no such file or directory)\n" Normal Killing 31m (x2 over 32m) kubelet FailedPostStartHook - Normal Pulling 31m (x3 over 34m) kubelet Pulling image "docker.io/tginternal/tigergraph-k8s:3.9.2-post-start" - Normal Pulled 31m kubelet Successfully pulled image "docker.io/tginternal/tigergraph-k8s:3.9.2-post-start" in 315.864405ms + Normal Pulling 31m (x3 over 34m) kubelet Pulling image "docker.io/tigergraph/tigergraph-k8s:3.9.2" + Normal Pulled 31m kubelet Successfully pulled image "docker.io/tigergraph/tigergraph-k8s:3.9.2" in 315.864405ms Normal Created 31m (x3 over 34m) kubelet Created container tigergraph Normal Started 31m (x3 over 34m) kubelet Started container tigergraph Warning BackOff 18m (x41 over 31m) kubelet Back-off restarting failed container diff --git a/k8s/docs/06-FAQs/README.md b/k8s/docs/06-FAQs/README.md index e5c770b9..e657dffe 100644 --- a/k8s/docs/06-FAQs/README.md +++ b/k8s/docs/06-FAQs/README.md @@ -22,7 +22,7 @@ TigerGraph's exceptional performance comes with certain considerations regarding For your optimal experience, it is strongly recommended to start a backup operation before starting any upgrade or scaling activities. This precautionary measure ensures the safety of your data and system integrity. -## How to know the status of cluster management? Do I need to confirm stat before modifying TigerGraph cluster CR configuration? +## How can I know the status of cluster management? Do I need to confirm stat before modifying TigerGraph cluster CR configuration? In essence, TigerGraph does not inherently maintain a record of the cluster status throughout its lifecycle. However, understanding the status of the TigerGraph cluster is pivotal for the TigerGraph Operator. This insight empowers the Operator to determine which operations can be executed at any given moment. @@ -42,44 +42,61 @@ TigerGraph cluster status in Operator are following as: | Upgrade | TigerGraph cluster is in upgrading process, pulling new version image and performing upgrade job| | Expand | TigerGraph cluster is in scale up process, preparing for new pods and performing expansion job| | Shrink | TigerGraph cluster is in scale down process, performing shrinking job and scale down pods| +| ConfigUpdate | TigerGraph cluster is in config updating process. A config updating job will run and change configurations of TigerGraph (by gadmin config set). | +| Pause | TigerGraph is paused, statefulSet and external services are deleted, but TigerGraph data remains in the PVC. | +| Resume | TigerGraph is resuming from paused state, it will recreate the statefulSet and external services and load TigerGraph data into the remaining PVCs| +| HAUpdate | TigerGraph cluster is in Ha updating process. A HAUpdate job will run and change configurations of ha.| You can execute the following command to check the status of TigerGraph cluster on Kubernetes: ```bash kubectl get tg ${TIGERGRAPH_CLUSTER_NAME} -o yaml -n ${NAMESPACE}|yq .status clusterSize: 3 -clusterTopology: - test-cluster-0: - - gui - - nginx - - restpp - test-cluster-1: - - gui - - nginx - - restpp - test-cluster-2: - - gui - - nginx - - restpp conditions: - - lastProbeTime: "2023-08-23T08:37:00Z" + - lastTransitionTime: "2023-12-29T02:24:43Z" + message: Cluster is in Normal condition type and status is True + reason: ClusterNormalTrue status: "True" type: Normal - - lastProbeTime: "2023-08-24T05:46:24Z" - message: Hello GSQL - status: "True" - type: test-cluster-0-rest-Available - - lastProbeTime: "2023-08-24T05:46:24Z" - message: Hello GSQL - status: "True" - type: test-cluster-1-rest-Available - - lastProbeTime: "2023-08-24T05:46:24Z" - message: Hello GSQL - status: "True" - type: test-cluster-2-rest-Available ha: 2 -image: docker.io/tginternal/tigergraph-k8s:3.9.2 +hashBucketInBit: 5 +image: docker.io/tigergraph/tigergraph-k8s:3.10.0 +licenseHash: a9bfdffca9fa31dabba770e15be83de7 listener: type: LoadBalancer replicas: 3 ``` + +## What is the purpose and functionality of the custom resource definition in the current setup? + +The Custom Resource Definition (CRD) is used to define and describe the specifications for creating TigerGraph (TG) on Kubernetes (K8s). When implemented, the end user only needs to focus on the CRD, eliminating the need to manage native K8s resources such as StatefulSet and Job. + +## Is it feasible to install multiple instances of TigerGraph, such as development, testing, and production environments, on a single EKS cluster? + +Yes, the current version supports this feature. The simple way is to install one operator with the cluster scope option enabled (`kubectl tg init --cluster-scope true`). Alternatively, you can install a namespace-scoped operator (`kubectl tg init --cluster-scope false`), which watches and manages resources for a specific namespace. Choosing this option allows different teams to manage their TG clusters in a designated namespace with their operator. For more details, please see [install-tigergraph-operator](../03-deploy/tigergraph-on-eks.md#install-tigergraph-operator) + +## Is the cert manager directly involved with the webhooks, and if so, how? What is the cert manager doing? + +In Kubernetes, webhooks are typically used for validating admission requests, ensuring that a resource being created or modified adheres to certain policies, including security, compliance, or custom business rules. Cert Manager can be used to manage TLS certificates for services exposing webhooks. + +## Why are there multiple TigerGraph operators in the TigerGraph namespace? We observed three pods labeled as "operator" – are these simply scaled replicas, or do they serve distinct functions? + +For a cluster-scoped Operator, there will be only one Operator in a specific namespace. The namespace-scoped operator is installed for each namespace managing and deploying TigerGraph. Operator installation creates a Deployment resource on K8s, and multiple pods labeled as “operator” belong to this Deployment, enabling High Availability of Operators. You can specify and update the pod numbers of the Operator with `kubectl tg init|upgrade --operator-size 3`. (default value is 3). + +## How does Kubernetes handle situations where the GSQL leader or another node fails? + +K8s will schedule new pods to another available node if node failures occur. The High Availability of GSQL is the same as on-premise; leader switches are done automatically if GSQL replication is more than 1. + +## Does using `kubectl tg update --size 3 --ha 1` change the cluster configuration to 1 by 3, or is it necessary to modify the YAML file for this purpose? + +We don't have to necessarily do it only via YAML edit - we can also go from a 1x1 to a 1x3 with the operator by running `kubectl tg update --cluster-name ${YOUR_CLUSTER_NAME} --size 3 --ha 1 --namespace ${YOUR_NAMESPACE}` We support YAML and kubeclt-tg two modes to manage TG clusters on K8s + +## Is it possible to restore a 3 node cluster from a 2 node cluster configuration, and if so, how? + +A simple rule for restoration: the partition must be consistent, and replication can differ. Typical scenarios are as follows: + +| Scenarios| Is Partition changed? |Is HA changed?|Support or not|Example(a*b means partition=a ha=b and size=ab)| +|----------|----------|----------|----------|----------| +| Clone an identical cluster| No| No | Yes|Source cluster: 3*2, Target cluster: 3*2| +| Restore in a cluster with different partition| Yes| Yes or No | No|Source cluster: 3*2, Target cluster: 2*3 or 2*2| +| Restore in a cluster with different HA| No| Yes | Yes|Source cluster: 3*3, Target cluster: 3*1| diff --git a/k8s/docs/07-reference/cluster-status-of-tigergraph.md b/k8s/docs/07-reference/cluster-status-of-tigergraph.md new file mode 100644 index 00000000..cb33c9b5 --- /dev/null +++ b/k8s/docs/07-reference/cluster-status-of-tigergraph.md @@ -0,0 +1,77 @@ +# Cluster status of TigerGraph on k8s + +## Get the status of TigerGraph cluster + +You can get the status of TigerGraph clusters in a specific namespace by running the following command: + +```bash +# if you have installed kubectl tg command, use this one +kubectl tg list -n $NAMESPACE + +# alternatively, you can use kubectl get command +kubectl get tg -n $NAMESPACE +``` + +The output will be like this: + +```bash +NAME REPLICAS CLUSTER-SIZE CLUSTER-HA CLUSTER-VERSION SERVICE-TYPE CONDITION-TYPE CONDITION-STATUS AGE +test-cluster0 3 3 2 docker.io/tigergraph/tigergraph-k8s:3.10.0 LoadBalancer Normal True 4d1h +test-cluster1 3 3 1 docker.io/tigergraph/tigergraph-k8s:3.9.3 Ingress InitializePost False 15m +test-cluster2 4 3 1 docker.io/tigergraph/tigergraph-k8s:3.9.3 NodePort ExpandPost Unknown 12h +``` + +You can also get the status of a specific cluster by running the following command: + +```bash +# if you have installed kubectl tg command, use this one +kubectl tg status -n $NAMESPACE -c $CLUSTER_NAME +# alternatively, you can use kubectl describe command +kubectl describe tg $CLUSTER_NAME -n $NAMESPACE +``` + +Check the `Status` field: + +```bash +Status: + Cluster Size: 3 + Conditions: + Last Transition Time: 2023-12-29T07:27:13Z + Message: Cluster is in Normal condition type and status is True + Reason: ClusterNormalTrue + Status: True + Type: Normal +``` + +The field CONDITION-TYPE shows the current condition of the cluster. The field CONDITION-STATUS shows the status of the condition, the possible values are: True, False, Unknown. These two fields are used to indicate the current status of the cluster. We can use (*CONDITION-TYPE,CONDITION-STATUS*) to represent the status of the cluster. When the CONDITION-STATUS is True, the cluster is in a good status. When the CONDITION-STATUS is False, the cluster is in a bad status which means some errors occurred. When the CONDITION-STATUS is Unknown, the cluster is doing some operations, such as initializing, expanding, shrinking, etc. It will then transfer to True if the operation is successful, or False if the operation failed. + +For example, when you create a new cluster, the status will be (*InitializeRoll,Unknown*). In this status all pods will be created and initialized. After all pods are ready, the status will be (*InitializeRoll,True*) and then transfer to (*InitializePost,Unknown*) immediately. In this status, an init-job will be created by operator to initialize TigerGraph system. If the init-job is successful, the status will be (*InitializePost,True*) and transfer to (*Normal,True*) immediately. If the init-job failed, the status will be (*InitializePost,False*). + +## All possible status of TigerGraph + +To make it clear, we list all possible status of TigerGraph cluster here, and describe the meaning of each status. + +| Condition Type | Possible Condition Status | Description | Note | +| ---------------|---------------------------|-------------|------| +| Normal | True | TigerGraph cluster is in ready state, it's allowed to do any cluster operations | The normal state doesn’t mean that all of TG services like RESTPP, GSE and so on are ready, even though Operator mark the cluster status to normal, client application still need to check the service status by itself. | +| InitializeRoll | Unknown,True | Create all pods and wait for all pods to be ready | It is allowed to update or correct the configurations(CPU, memory, cluster size and so on) in this status | +| InitializePost | Unknown,True,False | Create an init-job to initialize TigerGraph system | The service status of TG is uncertain until initialization is completed. | +| UpdateRoll | Unknown,True | TigerGraph cluster is in rolling update that indicates you update the CPU, Memory, and other TG pod configurations. | The RESTPP and GUI services are available during UpdateRoll status | +| UpgradeRoll | Unknown,True | TigerGraph cluster is in rolling update to pull new version image | The service status of TG is uncertain. | +| UpgradePost | Unknown,True,False | Create an upgrade-job to upgrade TigerGraph system | The service status of TG is uncertain until upgrade-job is completed. | +| ExpandRoll | Unknown,True | TigerGraph cluster is in rolling update to create more pods for expansion | The service status of TG is uncertain | +| ExpandPost | Unknown,True,False | Create an expand-job to expand TigerGraph cluster | The service status of TG is uncertain until expand-job is completed. | +| ExpandRollBack | Unknown,True | When expand-job failed, you can set the cluster size back to original one, then cluster will transfer to this status to remove unnecessary pods | | +| ShrinkPre | Unknown,True,False | Create a shrink-pre-job to shrink TigerGraph cluster | The service status of TG is uncertain until shrink-pre-job is completed | +| ShrinkRoll | Unknown,True | TigerGraph cluster is in rolling update to remove pods for shrinking | | +| ConfigUpdate | Unknown,True,False |Create a config-update-job to update configurations and license of TigerGraph system | All services will be restarted to apply new configurations. | +| PauseRoll | Unknown,True | Delete all TigerGraph pods and services and wait them to be terminated | In this status, it's not allowed to update size/ha/image/tigergraphConfig in TigerGraph CR | +| Paused | True | All TigerGraph pods and services are terminated | In this status, it's not allowed to update size/ha/image/tigergraphConfig in TigerGraph CR | +| ResumeRoll | Unknown,True | Create all pods and wait for all pods to be ready | You can update `.spec.pause` to true to pause the cluster again | +| HAUpdate | Unknown,True,False | Create a ha-update-job to update HA of TigerGraph system | Updating cluster size is not allowed when cluster is in HAUpdate | + +Meaning of condition status: + +* True: The cluster is in good status, the operation has been performed successfully. +* False: The cluster is in bad status, the operation is failed. You need to check logs of the failed pod to find the reason. +* Unknown: The cluster is doing some operations, such as initializing, expanding, shrinking, etc. It will then transfer to True if the operation is successful, or False if the operation failed. diff --git a/k8s/docs/07-reference/configure-tigergraph-cluster-cr-with-yaml-manifests.md b/k8s/docs/07-reference/configure-tigergraph-cluster-cr-with-yaml-manifests.md index 096316c0..01ecccb6 100644 --- a/k8s/docs/07-reference/configure-tigergraph-cluster-cr-with-yaml-manifests.md +++ b/k8s/docs/07-reference/configure-tigergraph-cluster-cr-with-yaml-manifests.md @@ -6,6 +6,28 @@ This document introduces how to configure the TG cluster using TigerGraph CR. It - Configure TigerGraph deployment +- [How to configure TG Cluster on K8s using TigerGraph CR](#how-to-configure-tg-cluster-on-k8s-using-tigergraph-cr) + - [Configure resources](#configure-resources) + - [Configure TG deployment](#configure-tg-deployment) + - [Cluster name](#cluster-name) + - [TigerGraph cluster version](#tigergraph-cluster-version) + - [TigerGraph cluster size and HA factor](#tigergraph-cluster-size-and-ha-factor) + - [TigerGraph Cluster license](#tigergraph-cluster-license) + - [Service account name of TigerGraph pod(Optional)](#service-account-name-of-tigergraph-podoptional) + - [Private ssh key name of TigerGraph Cluster](#private-ssh-key-name-of-tigergraph-cluster) + - [Storage volumes of TigerGraph Cluster](#storage-volumes-of-tigergraph-cluster) + - [Resource requests and limits of TigerGraph pod](#resource-requests-and-limits-of-tigergraph-pod) + - [External access service](#external-access-service) + - [Customized labels and annotations for external service](#customized-labels-and-annotations-for-external-service) + - [Container Customization of TigerGraph pods](#container-customization-of-tigergraph-pods) + - [NodeSelector, Affinity, and Toleration configuration](#nodeselector-affinity-and-toleration-configuration) + - [TigerGraph Configurations](#tigergraph-configurations) + - [Pod Labels and Annotations](#pod-labels-and-annotations) + - [Security Context of TigerGraph Containers](#security-context-of-tigergraph-containers) + - [Lifecycle Hooks of TigerGraph](#lifecycle-hooks-of-tigergraph) + - [PostInitAction](#postinitaction) + - [API reference of TigerGraphSpec](#api-reference-of-tigergraphspec) + ## Configure resources Before deploying a TG cluster, it is necessary to be familiar with the hardware and software requirements depending on your needs. For details, refer to [Hardware and Software Requirements](https://docs.tigergraph.com/tigergraph-server/current/installation/hw-and-sw-requirements). @@ -22,14 +44,10 @@ kind: TigerGraph metadata: name: test-cluster spec: - image: docker.io/tginternal/tigergraph-k8s:3.9.3 + image: docker.io/tigergraph/tigergraph-k8s:3.9.3 imagePullPolicy: IfNotPresent - initJob: - image: docker.io/tginternal/tigergraph-k8s-init:0.0.9 - imagePullPolicy: IfNotPresent - initTGConfig: - ha: 2 - license: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + ha: 2 + license: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx listener: type: LoadBalancer privateKeyName: ssh-key-secret @@ -61,13 +79,11 @@ Besides, you also need to specify the TG version by changing `spec.initTGConfig. The TigerGraph cluster version can be configured by changing `spec.replicas` in the `TigerGraph` CR, -and the HA factor can be configured by changing `spec.initTGConfig.ha`, the default value of HA factor is 1. +and the HA factor can be configured by changing `spec.ha`. ### TigerGraph Cluster license -The TigerGraph cluster license is required for TigerGraph deployment, and it can be configured by changing `spec.initTGConfig.`license in the `TigerGraph` CR. - -A free license is available through this link [ftp://ftp.graphtiger.com/lic/license3.txt](ftp://ftp.graphtiger.com/lic/license3.txt), which has 14 days expiration date. +The TigerGraph cluster license is required for TigerGraph deployment, and it can be configured by changing `spec.license` in the TigerGraph CR. ### Service account name of TigerGraph pod(Optional) @@ -97,6 +113,8 @@ Then you can specify the value of `spec.privateKeyName` to the secret name you c Storage volumes configurations can be configured by changing `spec.storage` , there are two types of storage, `persistent-claim` and `ephemeral`. For production, you should use the `persistent-claim` type to store the data on persistent volumes. +Moreover, since Operator 0.1.0, you can mount multiple PVs for TigerGraph pods. + - persistent-claim ```yaml @@ -118,6 +136,116 @@ spec: type: ephemeral ``` +- Mounting a dedicated PV for Kafka and TigerGraph logs + +```yaml +spec: + storage: + type: persistent-claim + volumeClaimTemplate: + resources: + requests: + storage: 100G + storageClassName: pd-standard + volumeMode: Filesystem + additionalStorages: + - name: tg-kafka + storageClassName: pd-ssd + storageSize: 10Gi + - name: tg-log + storageClassName: pd-standard + storageSize: 5Gi +``` + +- Mounting PVs for custom containers(Init, sidecar containers, and TigerGraph containers) + +```yaml +spec: + storage: + type: persistent-claim + volumeClaimTemplate: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100G + storageClassName: gp2 + volumeMode: Filesystem + additionalStorages: + - name: tg-sidecar + storageClassName: efs-sc + storageSize: 5Gi + accessMode: ReadWriteMany + volumeMode: Filesystem + - name: tg-backup + storageSize: 5Gi + mountPath: /home/tigergraph/backup + accessMode: ReadWriteOnce + volumeMode: Filesystem + initContainers: + - image: alpine:3.17.2 + name: init-container + args: + - /bin/sh + - -c + - echo hello + sidecarContainers: + - args: # sidecar will execute this + - /bin/sh + - -c + - | + while true; do + echo "$(date) INFO hello from main-container" >> /tg-sidecar/myapp.log ; + sleep 1; + done + image: alpine:3.17.2 + name: sidecar-container # name of sidecar + readinessProbe: # check if the sidecar is ready + exec: + command: + - sh + - -c + - if [[ -f /tg-sidecar/myapp.log ]];then exit 0; else exit 1;fi + initialDelaySeconds: 10 + periodSeconds: 5 + resources: + requests: # request resouces for sidecar + cpu: 500m + memory: 512Mi + limits: # limit resources + cpu: 500m + memory: 512Mi + env: # inject the environment you need + - name: CLUSTER_NAME + value: test-cluster + volumeMounts: + - mountPath: /tg-sidecar + name: tg-sidecar +``` + +- Mounting Existing PVs to Customize Volume Mounts of TigerGraph Containers + +```YAML +spec: + storage: + type: persistent-claim + volumeClaimTemplate: + resources: + requests: + storage: 100G + storageClassName: efs-sc + volumeMode: Filesystem + customVolumes: + - name: efs-storage + persistentVolumeClaim: + claimName: efs-claim + customVolumeMounts: + - name: efs-storage + mountPath: /efs-data +``` + +Details on how to mount multiple PVs for TigerGraph Pods, see [Multiple persistent volumes mounting](../03-deploy/multiple-persistent-volumes-mounting.md) + ### Resource requests and limits of TigerGraph pod The Resource requests and limits of TG Cluster pod can be configured by changing `spec.resources.requests` and `spec.resources.limits` in the `TigerGraph` CR. @@ -151,8 +279,7 @@ spec: spec: listener: type: NodePort - restNodePort: 30090 - studioNodePort: 30240 + nginxNodePort: 30240 ``` - Ingress @@ -160,9 +287,9 @@ spec: ```yaml spec: listener: + ingressClassName: INGRESS_CLASS_NAME type: Ingress - restHost: tigergraph-api.k8s.company.com - studioHost: tigergraph-studio.k8s.company.com + nginxHost: tigergraph-api.k8s.company.com secretName: k8s.company.com ``` @@ -180,21 +307,6 @@ spec: annotation-key: annotation-value ``` -### Initialize Job configuration of TigerGraph cluster - -It’s required to run a special job to initialize the TigerGraph cluster when deploying TigerGraph on K8s, you need to specify the image version of the Init Job, usually, the version is the same as the Operator version you installed. - -It can be configured by changing `spec.initjob` in the `TigerGraph` CR. imagePullPolicy and imagePullSecrets are optional configurations, you can omit them if you don’t need them. - -```yaml -spec: - initJob: - image: docker.io/tginternal/tigergraph-k8s-init:${OPERATOR_VERSION} - imagePullPolicy: IfNotPresent - imagePullSecrets: - - name: tigergraph-image-pull-secret -``` - ### Container Customization of TigerGraph pods TigerGraph CR support customizing the containers of TG pods, including the Init container, Sidecar container, and container volumes. To know more about this feature, you can refer to [InitContainers,SidecarContainers and CustomVolumes](../03-deploy/custom-containers.md) @@ -330,6 +442,64 @@ spec: topologyKey: topology.kubernetes.io/zone ``` +### TigerGraph Configurations + +TigerGraph configurations can be adjusted by modifying `spec.tigergraphConfig` in the `TigerGraph` Custom Resource (CR). These configurations will be applied to the TigerGraph (TG) cluster during initialization. If the cluster is already running, you can update the TG cluster configurations by altering `.spec.tigergraphConfig`. This will trigger the creation of a config-update job to apply the new configurations to the TG cluster. + +> [!NOTE] +> The configurations you provide must be compatible with the `gadmin config set` command. All values should be of string type, enclosed in double quotes (""). + +```yaml +spec: + tigergraphConfig: + # Values must be strings, enclosed in double quotes ("") + GSQL.UDF.Policy.Enable: "false" + Controller.BasicConfig.LogConfig.LogFileMaxDurationDay: "40" +``` + +### Pod Labels and Annotations + +You can customize the labels and annotations of the TigerGraph pods by modifying `spec.podLabels` and `spec.podAnnotations` in the `TigerGraph` CR. These labels and annotations will be applied to all TigerGraph pods. + +```yaml +spec: + podLabels: + key.tg.com: value + podAnnotations: + key.tg.com: value +``` + +### Security Context of TigerGraph Containers + +You can customize the security context of the TigerGraph containers by modifying `spec.securityContext` in the `TigerGraph` CR. These security contexts will be applied to all TigerGraph containers. For more information about security context, refer to [Set the security context for a Container](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container). + +```yaml +spec: + securityContext: + capabilities: + add: + - SYS_PTRACE + - SYSLOG + - SYS_ADMIN +``` + +### Lifecycle Hooks of TigerGraph + +You can customize the lifecycle hooks of TigerGraph by modifying `spec.lifecycle` in the `TigerGraph` CR. + +#### PostInitAction + +You can specify a bash script in `spec.lifecycle.postInitAction`, and the script will be put into the init-job and be executed in the first TigerGraph pod(whose suffix is `-0`) after the TigerGraph system is initialized. For example: + +```yaml +spec: + lifecycle: + postInitAction: | + echo "This is a post init action" >> /tmp/post-init-action.log +``` + +For more information about lifecycle hooks, refer to [Configure Lifecycle Hooks in TigerGraph CR](../03-deploy/lifecycle-of-tigergraph.md). + ## API reference of TigerGraphSpec TigerGraphSpec contains the details of TigerGraph members @@ -340,23 +510,29 @@ TigerGraphSpec contains the details of TigerGraph members | image | The desired TG docker image | | imagePullPolicy | (*Optional*)The image pull policy of TG docker image, default is IfNotPresent | | imagePullSecrets | (*Optional*)The own keys can access the private registry | -| initJob.image | The desired TG Init docker image | -| initJob.imagePullPolicy | (*Optional*)The image pull policy of TG docker image, default is IfNotPresent | -| initJob.imagePullSecrets | (*Optional*)The own keys can access the private registry | | serviceAccountName | (*Optional*)The service account name of pod which is used to acquire special permission | | privateKeyName | The secret name of private ssh key files | -| initTGConfig.ha | The replication factor of TG cluster | -| initTGConfig.license | The license of TG cluster | -| initTGConfig.version | The TG cluster version to initialize or upgrade | +| ha | The replication factor of TG cluster | +| license | The license of TG cluster | | listener.type | The type of external access service, which can be set to LoadBalancer, NodePort, and Ingress | -| listener.restNodePort | The rest service port which is required when setting listener.type to NodePort | -| listener.studioNodePort | The gui service port which is required when setting listener.type to NodePort | -| listener.restHost | The domain name of rest service which is required when setting listener.type to Ingress | -| listener.studioHost| The domain name of gui service which is required when setting listener.type to Ingress | +| listener.nginxNodePort | The nginx service port which is required when setting listener.type to NodePort | +| listener.ingressClassName | (Optional)The ingress class name of nginx service which can be set optionally when setting listener.type to Ingress | +| listener.nginxHost | The domain name of nginx service which is required when setting listener.type to Ingress | | listener.secretName | (*Optional*)The secretName is the name of the secret used to terminate TLS traffic on port 443 when setting listener.type to Ingress | | listener.labels | (*Optional*)The customized labels will be added to external service | | listener.annotations | (*Optional*)The customized annotations will be added to external service | | resources | [The compute resource requirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#resourcerequirements-v1-core) | +| storage | The persistent volumes for TigerGraph pods | +| storage.type | The type of persistent volume, which can be set to ephemeral or persistent-claim| +| storage.volumeClaimTemplate | The persistent volume claim template for TigerGraph main storage. [PersistentVolumeClaimSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#persistentvolumeclaimspec-v1-core)| +| storage.additionalStorages | (*Optional*)Additional storages for TigerGraph pods, it's an array list of StorageVolume| +| `StorageVolume`.name | Additional storage name| +| `StorageVolume`.storageClassName | (*Optional*)The `StorageClassName` of an additional storage| +| `StorageVolume`.storageSize | The storage size of an additional storage| +| `StorageVolume`.mountPath | (*Optional*)The mount path of TigerGraph container for an additional storage| +| `StorageVolume`.accessMode | (*Optional*)The access mode of an additional storage, which can be set to ReadWriteOnce, ReadOnlyMany, ReadWriteMany, or ReadWriteOncePod| +| `StorageVolume`.volumeMode | (*Optional*) The volume mode of an additional storage, which can be set to Filesystem or Block| +| customVolumeMounts | (*Optional*)The custom [volume mount](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#persistentvolumeclaimspec-v1-core:~:text=error%20was%20encountered.-,VolumeMount%20v1%20core,-Group) of TigerGraph container.| | initContainers | The [init containers](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#Container) run in TigerGraph pods. | | sidecarContainers | (*Optional*)The [sidecar containers](https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#Container) run in TG pods | | customVolumes | (*Optional*)The custom [volumes](https://kubernetes.io/docs/concepts/storage/volumes/) used in init container and sidecar container | @@ -364,3 +540,7 @@ TigerGraphSpec contains the details of TigerGraph members | affinityConfiguration.nodeSelector | (*Optional*)The configuration of assigning pods to special nodes using [NodeSelector](https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes/) | | affinityConfiguration.tolerations | (*Optional*)The [tolerations](https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/) configuration of TigerGraph pod | | affinityConfiguration.affinity | (*Optional*)The [affinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity) configuration of TigerGraph pod | +| podLabels | (*Optional*)The customized labels will be added to TigerGraph pods | +| podAnnotations | (*Optional*)The customized annotations will be added to TigerGraph pods | +| securityContext | (*Optional*)The [security context](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) of TigerGraph containers | +| lifecycle.postInitAction | (*Optional*)The bash script will be executed in the first TigerGraph pod whose prefix is `-0` after the TigerGraph system is initialized | diff --git a/k8s/docs/07-reference/integrate-envoy-sidecar.md b/k8s/docs/07-reference/integrate-envoy-sidecar.md index df6148f4..45bc70e4 100644 --- a/k8s/docs/07-reference/integrate-envoy-sidecar.md +++ b/k8s/docs/07-reference/integrate-envoy-sidecar.md @@ -1,11 +1,8 @@ -

How to integrate the envoy sidecar with TG Pod

+# How to integrate the envoy sidecar with TG Pod Starting from Operator version 0.0.6, we support adding sidecar containers to the TG Pod. This guide is dedicated to the integration process of the envoy sidecar with the TG Pod. To proceed, please ensure that you have Operator version 0.0.6 or a newer version installed. Additionally, please note that this document does not delve into the intricacies of envoy, such as TLS configuration. Instead, its primary focus is to describe the configuration of envoy sidecar containers for accessing TG services. - - -Configuration of Envoy sidecar container -======================================== +## Configuration of Envoy sidecar container The initial step involves the creation of a ConfigMap resource and its subsequent mounting onto the pod as the Envoy's configuration. @@ -68,17 +65,15 @@ data: port_value: 14240 ``` -* Add listener to forward the requests to the API gateway of TG - - * `listener_1` is listening on port 12000 which is used for routing to the Nginx service, in `rout_config` part, we use cluster nginx\_service as the route. - -* Add cluster to configure the endpoint for the above listener - - * cluster `nginx_service` specifies the `endpoint` to address 127.0.0.1 and port 14240 where the NGINX service will listen. - +* Add listener to forward the requests to the API gateway of TG + + * `listener_1` is listening on port 12000 which is used for routing to the Nginx service, in `rout_config` part, we use cluster nginx\_service as the route. -Add `sidecarContainers` and `customVolumes` to the TigerGraph CR -================================================================ +* Add cluster to configure the endpoint for the above listener + + * cluster `nginx_service` specifies the `endpoint` to address 127.0.0.1 and port 14240 where the NGINX service will listen. + +## Add `sidecarContainers` and `customVolumes` to the TigerGraph CR ```yaml sidecarContainers: @@ -105,8 +100,7 @@ Add `sidecarContainers` and `customVolumes` to the TigerGraph CR name: sidecar-test-configmap ``` -Validation -========== +## Validation Finally, to ensure the proper functionality of the Envoy sidecar service and its access to the RESTPP and Metric services, we will establish a Kubernetes (K8s) Service. This service will facilitate the verification process. @@ -133,24 +127,21 @@ spec: targetPort: 12000 ``` -* RESTPP - +* RESTPP ```bash curl http://${LBS_EXTERNAL_IP}:12000/restpp/echo # it will return {"error":false, "message":"Hello GSQL"} if accessing successfully. ``` -* Metric - +* Metric ```bash # it will return the latest metrics of cpu and mem. curl http://${LBS_EXTERNAL_IP}:12000/informant/metrics/get/cpu-memory -d '{"ServiceDescriptor":{"ServiceName":"gse","Partition": 1,"Replica":1}}' ``` -* WEB API and console - +* WEB API and console ```bash # it will return {"error":false,"message":"pong","results":null} if accessing successfully @@ -158,4 +149,4 @@ curl http://${LBS_EXTERNAL_IP}:12000/api/ping # Web console # open the the url http://${LBS_EXTERNAL_IP}:12000 in Chrome or other browser. -``` \ No newline at end of file +``` diff --git a/k8s/docs/07-reference/labels-used-by-tg.md b/k8s/docs/07-reference/labels-used-by-tg.md index 75f58e03..42593a0d 100644 --- a/k8s/docs/07-reference/labels-used-by-tg.md +++ b/k8s/docs/07-reference/labels-used-by-tg.md @@ -1,7 +1,8 @@ # Labels used by TigerGraph Operator + TigerGraph utilizes specific labels for different purposes in Kubernetes: -### TigerGraph Cluster Pods +## TigerGraph Cluster Pods | Label | Usage | |----------------------------------------|---------------------------------------------------------------------| @@ -10,18 +11,18 @@ TigerGraph utilizes specific labels for different purposes in Kubernetes: | `tigergraph.com/gui-service=true` | Labeled on pods running the GUI service. | | `tigergraph.com/restpp-service=true` | Labeled on pods running the RESTPP service. | -### TigerGraph Job Pods +## TigerGraph Job Pods | Label | Usage | |-------------------------------------------------|------------------------------------------------------------------------------| | `tigergraph.com/cluster-name=CLUSTER_NAME` | Indicates which cluster the job is for. | -| `tigergraph.com/cluster-job={CLUSTER_NAME}-{JOB_TYPE}-job` | Specifies the type of job and the cluster it's associated with (JOB_TYPE: init, upgrade, expand, shrink-pre, shrink-post). | +| `tigergraph.com/cluster-job={CLUSTER_NAME}-{JOB_TYPE}-job` | Specifies the type of job and the cluster it's associated with (JOB_TYPE: init, upgrade, expand, shrink-pre, config-update, ha-update). | -### TigerGraph Backup/Restore Job Pods +## TigerGraph Backup/Restore Job Pods | Label | Usage | |--------------------------------------------------|------------------------------------------------------------------------------| | `tigergraph.com/backup-cluster=CLUSTER_NAME` | Labeled on pods running backup jobs for the specified cluster. | | `tigergraph.com/restore-cluster=CLUSTER_NAME` | Labeled on pods running restore jobs for the specified cluster. | -These labels help identify the purpose and affiliation of various pods within the Kubernetes environment, making it easier to manage and monitor different components of TigerGraph clusters, jobs, backups, and restores. \ No newline at end of file +These labels help identify the purpose and affiliation of various pods within the Kubernetes environment, making it easier to manage and monitor different components of TigerGraph clusters, jobs, backups, and restores. diff --git a/k8s/docs/07-reference/static-and-dynamic-persistent-volume-storage.md b/k8s/docs/07-reference/static-and-dynamic-persistent-volume-storage.md index 347cd830..f4c18547 100644 --- a/k8s/docs/07-reference/static-and-dynamic-persistent-volume-storage.md +++ b/k8s/docs/07-reference/static-and-dynamic-persistent-volume-storage.md @@ -1,7 +1,22 @@ -# How to use static & dynamic persistent volume storage +# How to use static and dynamic persistent volume storage This document describes how to deploy a TigerGraph on K8s with static or dynamic persistent volume storage. +- [How to use static and dynamic persistent volume storage](#how-to-use-static-and-dynamic-persistent-volume-storage) + - [GKE](#gke) + - [Static persistent volume storage on GKE](#static-persistent-volume-storage-on-gke) + - [Creating Persistent Volumes From Existing Google Compute Disks](#creating-persistent-volumes-from-existing-google-compute-disks) + - [Create PVs and PVCs for TigerGraph pods on GKE](#create-pvs-and-pvcs-for-tigergraph-pods-on-gke) + - [Dynamically persistent volume storage on GKE](#dynamically-persistent-volume-storage-on-gke) + - [EKS](#eks) + - [Static persistent volume storage on eks](#static-persistent-volume-storage-on-eks) + - [Creating ESB Persistent Volumes](#creating-esb-persistent-volumes) + - [Create PVs and PVCs for TigerGraph pods on EKS](#create-pvs-and-pvcs-for-tigergraph-pods-on-eks) + - [Dynamically persistent volume storage on EKS](#dynamically-persistent-volume-storage-on-eks) + - [Creating Persistent Volumes using the local file system of the local node](#creating-persistent-volumes-using-the-local-file-system-of-the-local-node) + - [Create a persistent volume with a local filesystem](#create-a-persistent-volume-with-a-local-filesystem) + - [Create TG cluster with storage class name pv-local](#create-tg-cluster-with-storage-class-name-pv-local) + ## GKE ### Static persistent volume storage on GKE @@ -11,78 +26,179 @@ You can follow these steps to set up and use static persistent volume storage fo 1. Provision a Persistent volume using a special storage class name. 2. Deploy TigerGraph with persistent volume. -### Creating Persistent Volumes From Existing Google Compute Disks +#### Creating Persistent Volumes From Existing Google Compute Disks - Create disk -Consider a scenario where you are creating a TigerGraph cluster comprising three nodes. To achieve this, you can create three compute disks named tg-pv-1, tg-pv-2, and tg-pv-3, each with a size of 10GB. + Consider a scenario where you are creating a TigerGraph cluster comprising three nodes. To achieve this, you can create three compute disks named tg-pv-1, tg-pv-2, and tg-pv-3, each with a size of 10GB. -```bash -gcloud compute disks create tg-pv-1 --zone=us-central1-a --size=10GB -gcloud compute disks create tg-pv-2 --zone=us-central1-a --size=10GB -gcloud compute disks create tg-pv-3 --zone=us-central1-a --size=10GB - -# delete gcd -gcloud compute disks delete tg-pv-1 --zone=us-central1-a -gcloud compute disks delete tg-pv-2 --zone=us-central1-a -gcloud compute disks delete tg-pv-3 --zone=us-central1-a -``` + ```bash + gcloud compute disks create tg-pv-1 --zone=us-central1-a --size=10GB + gcloud compute disks create tg-pv-2 --zone=us-central1-a --size=10GB + gcloud compute disks create tg-pv-3 --zone=us-central1-a --size=10GB -Now you have three disks available to be used as PV (Persistent Volume) in GKE. + # delete gcd + gcloud compute disks delete tg-pv-1 --zone=us-central1-a + gcloud compute disks delete tg-pv-2 --zone=us-central1-a + gcloud compute disks delete tg-pv-3 --zone=us-central1-a + ``` -- Create static persistent pv +#### Create PVs and PVCs for TigerGraph pods on GKE -```java -apiVersion: v1 -kind: PersistentVolume -metadata: - name: tg-pv-storage1 -spec: - storageClassName: "tg-pv" - capacity: - storage: 10Gi - accessModes: - - ReadWriteOnce - gcePersistentDisk: - pdName: tg-pv-1 - fsType: ext4 ---- -apiVersion: v1 -kind: PersistentVolume -metadata: - name: tg-pv-storage2 -spec: - storageClassName: "tg-pv" - capacity: - storage: 10Gi - accessModes: - - ReadWriteOnce - gcePersistentDisk: - pdName: tg-pv-2 - fsType: ext4 ---- -apiVersion: v1 -kind: PersistentVolume -metadata: - name: tg-pv-storage3 -spec: - storageClassName: "tg-pv" - capacity: - storage: 10Gi - accessModes: - - ReadWriteOnce - gcePersistentDisk: - pdName: tg-pv-3 - fsType: ext4 -``` +Now you have three disks available to be used as PV (Persistent Volume) in GKE. + +There are two methods to associate the created PV with the corresponding PVC of the TigerGraph Pod. Please choose the one you prefer and be aware of the warning information. + +- Create static persistent volume; It will be bound to the specific PVC of TigerGraph pod by the Operator automatically. + +> [!IMPORTANT] +> After creating cluster successfully, please don’t delete PVCs when deleting cluster, otherwise, operator will not be able to create PVC and bind it to the existing PV when recreating cluster. More importantly, disk mount order errors may occur when reproducing associated PVCs and PVs. + + ```yaml + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage1 + spec: + storageClassName: "tg-pv" + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + gcePersistentDisk: + pdName: tg-pv-1 + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage2 + spec: + storageClassName: "tg-pv" + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + gcePersistentDisk: + pdName: tg-pv-2 + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage3 + spec: + storageClassName: "tg-pv" + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + gcePersistentDisk: + pdName: tg-pv-3 + fsType: ext4 + ``` + +- Create static persistent volumes and persistent volume claims, and bind them together. The name of PersistentVolumeClaim must meet the following rule: `tg-data--` + +> [!IMPORTANT] +> The persistent volume claim of TigerGraph pod has a specific name, and you must ensure that the PercientVolume's calimRef has the correct namespace and name. + + ```yaml + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage1 + spec: + storageClassName: "tg-pv" + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + claimRef: + namespace: tigergraph + name: tg-data-test-pv-tg-cluster-0 + gcePersistentDisk: + pdName: tg-pv-1 + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: tg-data-test-pv-tg-cluster-0 + spec: + storageClassName: "tg-pv" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + --- + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage2 + spec: + storageClassName: "tg-pv" + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + claimRef: + namespace: tigergraph + name: tg-data-test-pv-tg-cluster-1 + gcePersistentDisk: + pdName: tg-pv-2 + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: tg-data-test-pv-tg-cluster-1 + spec: + storageClassName: "tg-pv" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + --- + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage3 + spec: + storageClassName: "tg-pv" + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + claimRef: + namespace: tigergraph + name: tg-data-test-pv-tg-cluster-2 + gcePersistentDisk: + pdName: tg-pv-3 + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: tg-data-test-pv-tg-cluster-2 + spec: + storageClassName: "tg-pv" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + ``` - Create TG cluster with storage class name tg-pv -```bash -kubectl tg create --namespace tigergraph --cluster-name test-pv-tg-cluster -k ssh-key-secret --license xxxxxx --size 3 --ha 2 --version 3.9.1 --storage-class tg-pv --cpu 2000m --memory 8G --storage-size 10G -``` + ```bash + kubectl tg create --namespace tigergraph --cluster-name test-pv-tg-cluster -k ssh-key-secret --license xxxxxx --size 3 --ha 2 --version 3.9.1 --storage-class tg-pv --cpu 2000m --memory 8G --storage-size 10G + ``` -### Dynamically persistent volume storage +### Dynamically persistent volume storage on GKE To enable and utilize dynamic persistent volume storage for Google Kubernetes Engine (GKE), follow these steps: @@ -105,28 +221,28 @@ By following these steps, you can efficiently configure and leverage dynamic per - Create a storage class -Save the following manifest as `storage-class.yaml` - -```java -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: gold -provisioner: kubernetes.io/gce-pd -volumeBindingMode: Immediate -allowVolumeExpansion: true -reclaimPolicy: Delete -parameters: - type: pd-ssd - fstype: ext4 - replication-type: none -``` + Save the following manifest as `storage-class.yaml` + + ```yaml + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: gold + provisioner: kubernetes.io/gce-pd + volumeBindingMode: Immediate + allowVolumeExpansion: true + reclaimPolicy: Delete + parameters: + type: pd-ssd + fstype: ext4 + replication-type: none + ``` - Create the storage class. -```bash -kubectl apply -f storage-class.yaml -``` + ```bash + kubectl apply -f storage-class.yaml + ``` A little explanation about the parameters. @@ -141,9 +257,9 @@ A little explanation about the parameters. - Deploy TG with the specific Storage class name -```bash -kubectl tg create --namespace tigergraph --cluster-name dynamic-pv-tg-cluster -k ssh-key-secret --license xxxxxx --size 1 --ha 1 --version 3.9.1 --storage-class gold --cpu 4000m --memory 8G --storage-size 10G -``` + ```bash + kubectl tg create --namespace tigergraph --cluster-name dynamic-pv-tg-cluster -k ssh-key-secret --license xxxxxx --size 1 --ha 1 --version 3.9.1 --storage-class gold --cpu 4000m --memory 8G --storage-size 10G + ``` ## EKS @@ -154,141 +270,245 @@ You can follow these steps to set up and use static persistent volume storage fo 1. Provision a Persistent volume using a special storage class name. 2. Deploy TG with persistent volume. -### **Creating ESB Persistent Volumes** +#### Creating ESB Persistent Volumes - Create ESB volumes -Consider a scenario where you are creating a TigerGraph cluster comprising three nodes. To achieve this, you can create three compute disks named tg-pv-1, tg-pv-2, and tg-pv-3, each with a size of 10GB. - -```bash -$ aws ec2 create-volume --volume-type gp2 --size 10 --availability-zone us-west-1b -{ - "AvailabilityZone": "us-west-1b", - "CreateTime": "2023-05-04T09:00:21+00:00", - "Encrypted": false, - "Size": 10, - "SnapshotId": "", - "State": "creating", - "VolumeId": "vol-01b4da831ee293eb7", - "Iops": 100, - "Tags": [], - "VolumeType": "gp2", - "MultiAttachEnabled": false -} - -$ aws ec2 create-volume --volume-type gp2 --size 10 --availability-zone us-west-1b -{ - "AvailabilityZone": "us-west-1b", - "CreateTime": "2023-05-04T09:00:51+00:00", - "Encrypted": false, - "Size": 10, - "SnapshotId": "", - "State": "creating", - "VolumeId": "vol-0cf5cb04ce0b30eee", - "Iops": 100, - "Tags": [], - "VolumeType": "gp2", - "MultiAttachEnabled": false -} - -$ aws ec2 create-volume --volume-type gp2 --size 10 --availability-zone us-west-1b -{ - "AvailabilityZone": "us-west-1b", - "CreateTime": "2023-05-04T09:01:18+00:00", - "Encrypted": false, - "Size": 10, - "SnapshotId": "", - "State": "creating", - "VolumeId": "vol-056ddf237f6bfe122", - "Iops": 100, - "Tags": [], - "VolumeType": "gp2", - "MultiAttachEnabled": false -} - -# delete esb volume -aws ec2 delete-volume --volume-id vol-01b4da831ee293eb7 -aws ec2 delete-volume --volume-id vol-0cf5cb04ce0b30eee -aws ec2 delete-volume --volume-id vol-056ddf237f6bfe122 -``` - -Now there are three ESB volumes available to be used as PV in GKE. - -- Create static persistent pv - -```yaml -apiVersion: v1 -kind: PersistentVolume -metadata: - name: tg-pv-storage1 -spec: - capacity: - storage: 10Gi - accessModes: - - ReadWriteOnce - persistentVolumeReclaimPolicy: Retain - storageClassName: "tg-pv" - awsElasticBlockStore: - volumeID: vol-01b4da831ee293eb7 - fsType: ext4 ---- -apiVersion: v1 -kind: PersistentVolume -metadata: - name: tg-pv-storage2 -spec: - capacity: - storage: 10Gi - accessModes: - - ReadWriteOnce - persistentVolumeReclaimPolicy: Retain - storageClassName: "tg-pv" - awsElasticBlockStore: - volumeID: vol-0cf5cb04ce0b30eee - fsType: ext4 ---- -apiVersion: v1 -kind: PersistentVolume -metadata: - name: tg-pv-storage3 -spec: - capacity: - storage: 10Gi - accessModes: - - ReadWriteOnce - persistentVolumeReclaimPolicy: Retain - storageClassName: "tg-pv" - awsElasticBlockStore: - volumeID: vol-056ddf237f6bfe122 - fsType: ext4 -``` + Consider a scenario where you are creating a TigerGraph cluster comprising three nodes. To achieve this, you can create three compute disks named tg-pv-1, tg-pv-2, and tg-pv-3, each with a size of 10GB. + + ```bash + $ aws ec2 create-volume --volume-type gp2 --size 10 --availability-zone us-west-1b + { + "AvailabilityZone": "us-west-1b", + "CreateTime": "2023-05-04T09:00:21+00:00", + "Encrypted": false, + "Size": 10, + "SnapshotId": "", + "State": "creating", + "VolumeId": "vol-01b4da831ee293eb7", + "Iops": 100, + "Tags": [], + "VolumeType": "gp2", + "MultiAttachEnabled": false + } + + $ aws ec2 create-volume --volume-type gp2 --size 10 --availability-zone us-west-1b + { + "AvailabilityZone": "us-west-1b", + "CreateTime": "2023-05-04T09:00:51+00:00", + "Encrypted": false, + "Size": 10, + "SnapshotId": "", + "State": "creating", + "VolumeId": "vol-0cf5cb04ce0b30eee", + "Iops": 100, + "Tags": [], + "VolumeType": "gp2", + "MultiAttachEnabled": false + } + + $ aws ec2 create-volume --volume-type gp2 --size 10 --availability-zone us-west-1b + { + "AvailabilityZone": "us-west-1b", + "CreateTime": "2023-05-04T09:01:18+00:00", + "Encrypted": false, + "Size": 10, + "SnapshotId": "", + "State": "creating", + "VolumeId": "vol-056ddf237f6bfe122", + "Iops": 100, + "Tags": [], + "VolumeType": "gp2", + "MultiAttachEnabled": false + } + + # delete esb volume + aws ec2 delete-volume --volume-id vol-01b4da831ee293eb7 + aws ec2 delete-volume --volume-id vol-0cf5cb04ce0b30eee + aws ec2 delete-volume --volume-id vol-056ddf237f6bfe122 + ``` + +#### Create PVs and PVCs for TigerGraph pods on EKS + +Now there are three ESB volumes available to be used as PV in EKS. + +There are two methods to associate the created PVs with the corresponding PVCs of the TigerGraph Pod. Please choose the one you prefer and be aware of the warning information. + +- Create static persistent volume; It will be bound to the specific PVC of TigerGraph pod by the Operator automatically. + +> [!IMPORTANT] +> After creating cluster successfully, please don’t delete PVCs when deleting cluster, otherwise, operator will not be able to create PVC and bind it to the existing PV when recreating cluster. More importantly, disk mount order errors may occur when reproducing associated PVCs and PVs. + + ```yaml + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage1 + spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: "tg-pv" + awsElasticBlockStore: + volumeID: vol-01b4da831ee293eb7 + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage2 + spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: "tg-pv" + awsElasticBlockStore: + volumeID: vol-0cf5cb04ce0b30eee + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage3 + spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Retain + storageClassName: "tg-pv" + awsElasticBlockStore: + volumeID: vol-056ddf237f6bfe122 + fsType: ext4 + ``` + +- Create static persistent volumes and persistent volume claims, and bind them together. The name of PersistentVolumeClaim must meet the following rule: `tg-data--`. + +> [!IMPORTANT] +> The persistent volume claim of TigerGraph pod has a specific name, and you must ensure that the PercientVolume's calimRef has the correct namespace and name. + + ```yaml + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage1 + spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + claimRef: + namespace: tigergraph + name: tg-data-test-pv-tg-cluster-0 + persistentVolumeReclaimPolicy: Retain + storageClassName: "tg-pv" + awsElasticBlockStore: + volumeID: vol-01b4da831ee293eb7 + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: tg-data-test-pv-tg-cluster-0 + spec: + storageClassName: "tg-pv" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + --- + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage2 + spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + claimRef: + namespace: tigergraph + name: tg-data-test-pv-tg-cluster-1 + persistentVolumeReclaimPolicy: Retain + storageClassName: "tg-pv" + awsElasticBlockStore: + volumeID: vol-0cf5cb04ce0b30eee + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: tg-data-test-pv-tg-cluster-1 + spec: + storageClassName: "tg-pv" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + --- + apiVersion: v1 + kind: PersistentVolume + metadata: + name: tg-pv-storage3 + spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteOnce + claimRef: + namespace: tigergraph + name: tg-data-test-pv-tg-cluster-2 + persistentVolumeReclaimPolicy: Retain + storageClassName: "tg-pv" + awsElasticBlockStore: + volumeID: vol-056ddf237f6bfe122 + fsType: ext4 + --- + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: tg-data-test-pv-tg-cluster-2 + spec: + storageClassName: "tg-pv" + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + ``` - Create TigerGraph cluster with storage class name tg-pv -The ESB volumes are located in zone us-west-1b, configuring the node affinity to ensure the TG pods are scheduled to the nodes of this zone. + The ESB volumes are located in zone us-west-1b, configuring the node affinity to ensure the TG pods are scheduled to the nodes of this zone. -creating an affinity configuration file like this: + creating an affinity configuration file like this: -tg-affinity.yaml + tg-affinity.yaml -```yaml -affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: topology.kubernetes.io/zone - operator: In - values: - - us-west-1b -``` + ```yaml + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: topology.kubernetes.io/zone + operator: In + values: + - us-west-1b + ``` -```bash -kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --private-key-secret ${YOUR_SSH_KEY_SECRET_NAME} --size 3 --ha 2 --version 3.9.1 --license ${LICENSE} --service-account-name ${SERVICE_ACCOUNT_NAME} \ - --storage-class pv-local --storage-size 10G --cpu 4000m --memory 8Gi --namespace ${YOUR_NAMESPACE} --affinity tg-affinity.yaml -``` + ```bash + kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} --private-key-secret ${YOUR_SSH_KEY_SECRET_NAME} --size 3 --ha 2 --version 3.9.1 --license ${LICENSE} --service-account-name ${SERVICE_ACCOUNT_NAME} \ + --storage-class pv-local --storage-size 10G --cpu 4000m --memory 8Gi --namespace ${YOUR_NAMESPACE} --affinity tg-affinity.yaml + ``` -- Dynamically persistent volume storage +### Dynamically persistent volume storage on EKS You can follow these steps to set up and use dynamic persistent volume storage for EKS: @@ -310,26 +530,26 @@ These storage class segregations are completely based on the project requirement - Create a storage class -Save the following manifest as `storage-class.yaml` - -```java -apiVersion: storage.k8s.io/v1 -kind: StorageClass -metadata: - name: gold -parameters: - type: gp3 - fsType: ext4 -provisioner: ebs.csi.aws.com -reclaimPolicy: Delete -volumeBindingMode: WaitForFirstConsumer -``` + Save the following manifest as `storage-class.yaml` + + ```yaml + apiVersion: storage.k8s.io/v1 + kind: StorageClass + metadata: + name: gold + parameters: + type: gp3 + fsType: ext4 + provisioner: ebs.csi.aws.com + reclaimPolicy: Delete + volumeBindingMode: WaitForFirstConsumer + ``` - Create the storage class. -```bash -kubectl apply -f storage-class.yaml -``` + ```bash + kubectl apply -f storage-class.yaml + ``` ## Creating Persistent Volumes using the local file system of the local node @@ -344,7 +564,7 @@ TigerGraph container will mount data from the path "/home/tigergraph/tigergraph/ You can set the storageClassName to pv-local, if you modify the name, you should use the same when creating TigerGraph cluster -```java +```yaml apiVersion: v1 kind: PersistentVolume metadata: diff --git a/k8s/docs/08-release-notes/README.md b/k8s/docs/08-release-notes/README.md index bd446564..fed59dca 100644 --- a/k8s/docs/08-release-notes/README.md +++ b/k8s/docs/08-release-notes/README.md @@ -4,10 +4,11 @@ Those document describes the new features, improvements, bugfixes for all of ope Please see the detailed documentation of each operator version release notes as follows: +- [Operator 0.1.0](./operator-0.1.0.md) - [Operator 0.0.9](./operator-0.0.9.md) - [Operator 0.0.7](./operator-0.0.7.md) - [Operator 0.0.6](./operator-0.0.6.md) - [Operator 0.0.5](./operator-0.0.5.md) - [Operator 0.0.4](./operator-0.0.4.md) - [Operator 0.0.3](./operator-0.0.3.md) -- [Operator 0.0.2](./operator-0.0.2.md) \ No newline at end of file +- [Operator 0.0.2](./operator-0.0.2.md) diff --git a/k8s/docs/08-release-notes/operator-0.1.0.md b/k8s/docs/08-release-notes/operator-0.1.0.md new file mode 100644 index 00000000..f716944a --- /dev/null +++ b/k8s/docs/08-release-notes/operator-0.1.0.md @@ -0,0 +1,135 @@ +# Operator 0.1.0 Release notes + +## Overview + +**Operator 0.1.0** is now available, designed to work seamlessly with **TigerGraph version 3.10.0**. + +> [!WARNING] +> Operator 0.1.0 introduces a breaking change to TigerGraph CRD, TigerGraphBackup, and TigerGraphRestore CRD. It is crucial to uninstall the previous Operator version, remove all old CRDs, and install the new version. Remember to retain the PVC of the existing cluster to recreate it after the upgrade. + +In this release, **Operator 0.1.0** brings significant enhancements: + +- Customizing and updating TigerGraph configurations via TigerGraph CR or kubectl-tg plugin +- Pausing and resuming TigerGraph cluster +- Customizing SecurityContext of TigerGraph Container +- Customizing labels and annotations of TigerGraph Pod +- Introducing lifecycle hooks for TigerGraph CR with the addition of postInitAction +- Independent modification of replication factor +- Mounting multiple PVC and PV for pods of TigerGraph +- Customizing volume mounts for TigerGraph container +- Customizing ingressClassName of ingress external service + +Operator 0.1.0 has refactored TigerGraph CRD, TigerGraphBackup, and TigerGraphRestore CRD, simplifying cluster provision and improving usability. These changes have two significant impacts: + +1. If you have deployed TigerGraph using the Operator and wish to upgrade to 0.1.0, carefully follow the upgrade documentation. [How to upgrade TigerGraph Kubernetes Operator](../04-manage/operator-upgrade.md) +2. For accessing TigerGraph services outside Kubernetes, only one external service is now used to access RESTPP, GUI, and Metrics services. Update related configurations if your client application depends on these services. + +### kubectl plugin installation + +To install the kubectl plugin for Operator 0.1.0, execute the following command: + +```bash +curl https://dl.tigergraph.com/k8s/0.1.0/kubectl-tg -o kubectl-tg +sudo install kubectl-tg /usr/local/bin/ +``` + +### Operator upgrading + +This new operator version upgrade brings breaking changes. Refer to the documentation [How to upgrade TigerGraph Kubernetes Operator](../04-manage/operator-upgrade.md) for details. + +- Delete the existing TG cluster and retain the PVCs: + +```bash +# You should take note of the cluster size, HA and so on before you delete it, you'll use it when you recreate the cluster +# You can export the yaml resource file of TG cluster for the later restoring +kubectl tg export --cluster-name ${YOUR_CLUSTER_NAME} -n ${NAMESPACE_OF_CLUSTER} +kubectl tg delete --cluster-name ${YOUR_CLUSTER_NAME} -n ${NAMESPACE_OF_CLUSTER} +``` + +- Uninstall the old version of the Operator: + +```bash +kubectl tg uninstall -n ${NAMESPACE_OF_OPERATOR} +``` + +- Delete old versions of TG CRDs: + +```bash +kubectl delete crd tigergraphs.graphdb.tigergraph.com +kubectl delete crd tigergraphbackups.graphdb.tigergraph.com +kubectl delete crd tigergraphbackupschedules.graphdb.tigergraph.com +kubectl delete crd tigergraphrestores.graphdb.tigergraph.com +``` + +- Reinstall the new version of the Operator: + +```bash +kubectl tg init -n ${NAMESPACE_OF_OPERATOR} +``` + +- Recreate the TigerGraph cluster if necessary: + +Extract parameters from the backup YAML resource file generated in step 1, or modify the YAML resource file and apply it directly. + +```bash +# You can get the following parameters from the backup yaml resoure file in step 1 +kubectl tg create --cluster-name ${YOUR_CLUSTER_NAME} -n ${NAMESPACE_OF_CLUSTER} \ +--size ${CLUSTER_size} --ha ${CLUSTER_HA} --private-key-secret ${YOUR_PRIVATE_KEY_SECRET} \ +--version ${TG_VERSION} --storage-class ${YOUR_STORAGE_CLASS} --storage-size ${YOUR_STORAGE_SIZE} --cpu 3000m --memory 6Gi +``` + +## New features + +- Support customizing and updating TigerGraph configurations via TigerGraph CR or kubectl-tg plugin([TP-4166](https://graphsql.atlassian.net/browse/TP-4166) and [TP-4189](https://graphsql.atlassian.net/browse/TP-4189)) + +- Support pausing and resuming TigerGraph cluster ([TP-4263](https://graphsql.atlassian.net/browse/TP-4263)) + +- Support customizing SecurityContext of TigerGraph ([TP-4515](https://graphsql.atlassian.net/browse/TP-4515)) + +- Support customizing labels and annotations of TigerGraph Pod ([TP-4309](https://graphsql.atlassian.net/browse/TP-4309)) + +- Support Lifecycle hooks for TigerGraph CR: postInitAction ([TP-4308](https://graphsql.atlassian.net/browse/TP-4308)) + +- Support mounting multiple PVC and PV for pods of TG([TP-3590](https://graphsql.atlassian.net/browse/TP-3590)) + +- Support customized volume mounts for TG container([TP-4352](https://graphsql.atlassian.net/browse/TP-4352)) + +- Support configuring additional storage and custom volume mounts of the TG container in kubectl-tg plugin([TP-4363](https://graphsql.atlassian.net/browse/TP-4363)) + +- Support customizing ingressClassName of ingress external service([TP-4244](https://graphsql.atlassian.net/browse/TP-4244)) + +- Support customizing custom volume with a new option --custom-volume in kubectl-tg plugin([TP-4531](https://graphsql.atlassian.net/browse/TP-4531)) + +- Supports creating and updating clusters without external services via kubectl-tg([TP-4435](https://graphsql.atlassian.net/browse/TP-4435)) + +- Supports independent modification of replication factor via TigerGraph CR([TP-4443](https://graphsql.atlassian.net/browse/TP-4443)) and kubectl-tg plugin([TP-4448](https://graphsql.atlassian.net/browse/TP-4448)) + +## Improvements + +- Improve state transitions of TigerGraph CR ([TP-4211](https://graphsql.atlassian.net/browse/TP-4211)) + +- Remove the redundant configuration of Tigergraph CRD and refactor the status output([TP-3710](https://graphsql.atlassian.net/browse/TP-3710)) + +- Removing dynamic pod labels and RESTPP external service([TP-3623](https://graphsql.atlassian.net/browse/TP-3623)); kubectl-tg plugin updated accordingly([TP-3729](https://graphsql.atlassian.net/browse/TP-3729)) + +- Remove field spec.InitTGConfig and place its remaining fields into subfields of spec and Status([TP-4230](https://graphsql.atlassian.net/browse/TP-4230)) + +- Remove the InitJob field to improve usability([TP-3585](https://graphsql.atlassian.net/browse/TP-3585)) + +- Refactor and improve the status output of TG CR([TP-3740](https://graphsql.atlassian.net/browse/TP-3740)) + +- Improve TigerGraphBackup and TigerGraphRestore CRD([TP-3726](https://graphsql.atlassian.net/browse/TP-3726))) + +- Removing the pods of scale down automatically after executing shrinking successfully([TP-4245](https://graphsql.atlassian.net/browse/TP-4245)) + +- Invokes switch_version.sh to switch new version of TG to decouple the DB upgrade business logic([TP-4291](https://graphsql.atlassian.net/browse/TP-4291)) + +- Improve the failover process of expansion/shrinking for the new error code of ETCD([TP-4360](https://graphsql.atlassian.net/browse/TP-4360)) + +- Use `gadmin start all --local` to start local services([TP-4327](https://graphsql.atlassian.net/browse/TP-4327)) + +## Bugfixes + +- Fix the issue of readiness check when the license is expired([TP-4451](https://graphsql.atlassian.net/browse/TP-4451)) + +- Add retry logic when resetting services and apply new configuration during expansion/shrinking([TP-4588](https://graphsql.atlassian.net/browse/TP-4588) TigerGraph 3.10.0 and above required) diff --git a/k8s/docs/09-samples/backup-restore/backup-schedule-s3.yaml b/k8s/docs/09-samples/backup-restore/backup-schedule-s3.yaml index c8b3e6da..c6bfde6d 100644 --- a/k8s/docs/09-samples/backup-restore/backup-schedule-s3.yaml +++ b/k8s/docs/09-samples/backup-restore/backup-schedule-s3.yaml @@ -22,8 +22,7 @@ spec: s3Bucket: # specify the bucket you want to use bucketName: operator-backup - secretKey: - name: s3-secret + secretKeyName: s3-secret # Configure the name of backup files and the path storing temporary files backupConfig: tag: s3 diff --git a/k8s/docs/09-samples/backup-restore/backup-to-s3.yaml b/k8s/docs/09-samples/backup-restore/backup-to-s3.yaml index 0fd9366f..7df6e588 100644 --- a/k8s/docs/09-samples/backup-restore/backup-to-s3.yaml +++ b/k8s/docs/09-samples/backup-restore/backup-to-s3.yaml @@ -10,8 +10,7 @@ spec: # Specify the name of the S3 bucket you want to use bucketName: operator-backup # Specify the Secret containing the S3 access key and secret access key - secretKey: - name: aws-secret + secretKeyName: aws-secret # Configure the name of backup files and the path storing temporary files backupConfig: diff --git a/k8s/docs/09-samples/backup-restore/restore-from-s3.yaml b/k8s/docs/09-samples/backup-restore/restore-from-s3.yaml index 03355024..a8ed0bc1 100644 --- a/k8s/docs/09-samples/backup-restore/restore-from-s3.yaml +++ b/k8s/docs/09-samples/backup-restore/restore-from-s3.yaml @@ -14,7 +14,6 @@ spec: s3Bucket: # specify the bucket you want to use bucketName: operator-backup - secretKey: - name: aws-secret + secretKeyName: aws-secret # Specify the name of cluster clusterName: test-cluster \ No newline at end of file diff --git a/k8s/docs/09-samples/deploy/tigergraph-cluster.yaml b/k8s/docs/09-samples/deploy/tigergraph-cluster.yaml index 5fd41525..fa800f4f 100644 --- a/k8s/docs/09-samples/deploy/tigergraph-cluster.yaml +++ b/k8s/docs/09-samples/deploy/tigergraph-cluster.yaml @@ -8,15 +8,8 @@ spec: imagePullPolicy: Always imagePullSecrets: - name: tigergraph-image-pull-secret - initJob: - image: docker.io/tigergraph/tigergraph-k8s-init:0.0.7 - imagePullPolicy: Always - imagePullSecrets: - - name: tigergraph-image-pull-secret - initTGConfig: - ha: 2 - license: xxxxxxxxxxxxxxxxx - version: 3.9.2 + ha: 2 + license: xxxxxxxxxxxxxxxxx listener: type: LoadBalancer privateKeyName: ssh-key-secret