From 5d5cb3eb318cbe3eb58d5313324cfee2845e83c3 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Tue, 31 Dec 2024 10:37:55 +0800 Subject: [PATCH] docs --- docs/deployment/engine_on_kubernetes.md | 21 +++++++++++++++++++ .../engine/spark/SparkProcessBuilder.scala | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/docs/deployment/engine_on_kubernetes.md b/docs/deployment/engine_on_kubernetes.md index 2d46cda7cb0..c7cbb25fd51 100644 --- a/docs/deployment/engine_on_kubernetes.md +++ b/docs/deployment/engine_on_kubernetes.md @@ -54,6 +54,27 @@ directory from growing indefinitely. Since Kyuubi v1.11.0, you can configure `sp placeholders `{{YEAR}}`, `{{MONTH}}` and `{{DAY}}`, and enable `kyuubi.kubernetes.spark.autoCreateFileUploadPath.enabled` to let Kyuubi server create the directory with 777 permission automatically before submitting Spark application. +Note that, Spark would create sub dir `s"spark-upload-${UUID.randomUUID()}"` under the `spark.kubernetes.file.upload.path` +for each uploading, the administer still needs to clean up the staging directory periodically. + +For example, the user can configure the below configurations in `kyuubi-defaults.conf` to enable monthly rolling support +for `spark.kubernetes.file.upload.path` + +``` +kyuubi.kubernetes.spark.autoCreateFileUploadPath.enabled=true +spark.kubernetes.file.upload.path=hdfs://hadoop-cluster/spark-upload-{{YEAR}}{{MONTH}} +``` + +and the staging files would be like + +``` +hdfs://hadoop-cluster/spark-upload-202412/spark-upload-f2b71340-dc1d-4940-89e2-c5fc31614eb4 +hdfs://hadoop-cluster/spark-upload-202412/spark-upload-173a8653-4d3e-48c0-b8ab-b7f92ae582d6 +hdfs://hadoop-cluster/spark-upload-202501/spark-upload-3b22710f-a4a0-40bb-a3a8-16e481038a63 +``` + +then the administer can safely delete the `hdfs://hadoop-cluster/spark-upload-202412` after 20250101. + ### Docker Image Spark ships a `./bin/docker-image-tool.sh` script to build and publish the Docker images for running Spark applications on Kubernetes. diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala index b9b029c249b..552c495523a 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/spark/SparkProcessBuilder.scala @@ -338,7 +338,7 @@ class SparkProcessBuilder( def isK8sClusterMode: Boolean = { clusterManager().exists(cm => cm.toLowerCase(Locale.ROOT).startsWith("k8s")) && - deployMode().exists(_.toLowerCase(Locale.ROOT) == "cluster") + deployMode().exists(_.toLowerCase(Locale.ROOT) == "cluster") } def kubernetesContext(): Option[String] = {