From 9a4c251b2fd334131b9ac5944c610919b92a0f98 Mon Sep 17 00:00:00 2001 From: vsoch Date: Fri, 3 Jan 2025 08:15:46 -0700 Subject: [PATCH] fluxion: add service to provide scheduler This build is a bit tricky, because we want to share packages for jgf/jobspecs along with protos but do not want to require flux-sched libs locally to build charts, etc. The trick is to store them alongside the fluxion build with underscore directories (so they are ignored on the local machine) and then move those directories to be found and used in the Dockerfile. It is imperfect, but an OK solution for now. Signed-off-by: vsoch --- .github/LICENSE.fluence | 202 +++++ Dockerfile | 2 +- Makefile | 153 +--- README.md | 6 +- build/fluxion/Dockerfile | 46 ++ build/fluxion/cmd/_fluxion/main.go | 80 ++ build/fluxion/pkg/_fluxion/fluxion.go | 140 ++++ build/fluxion/pkg/_fluxion/utils/types.go | 15 + build/fluxion/pkg/_fluxion/utils/utils.go | 319 ++++++++ chart/templates/configmap.yaml | 136 ++++ chart/templates/deployment.yaml | 33 +- chart/values-template.yaml | 24 + chart/values.yaml | 39 +- cmd/{ => manager}/main.go | 0 config/manager/kustomization.yaml | 3 + config/manager/manager.yaml | 44 +- {examples/dist => dist}/fluxqueue-dev.yaml | 0 {examples/dist => dist}/fluxqueue.yaml | 0 go.mod | 1 + go.sum | 2 + hack/quick-build-kind.sh | 3 +- pkg/defaults/defaults.go | 5 + pkg/fluxion-grpc/fluxion.pb.go | 888 +++++++++++++++++++++ pkg/fluxion-grpc/fluxion.proto | 85 ++ pkg/fluxion-grpc/fluxion_grpc.pb.go | 143 ++++ pkg/jgf/jgf.go | 265 ++++++ pkg/jgf/jgf_test.go | 77 ++ pkg/jgf/types.go | 147 ++++ pkg/jobspec/jobspec.go | 129 +++ pkg/jobspec/types.go | 53 ++ pkg/service-grpc/service.pb.go | 354 ++++++++ pkg/service-grpc/service.proto | 32 + pkg/service-grpc/service_grpc.pb.go | 181 +++++ pkg/service/service.go | 61 ++ 34 files changed, 3538 insertions(+), 130 deletions(-) create mode 100644 .github/LICENSE.fluence create mode 100644 build/fluxion/Dockerfile create mode 100644 build/fluxion/cmd/_fluxion/main.go create mode 100644 build/fluxion/pkg/_fluxion/fluxion.go create mode 100644 build/fluxion/pkg/_fluxion/utils/types.go create mode 100644 build/fluxion/pkg/_fluxion/utils/utils.go create mode 100644 chart/templates/configmap.yaml create mode 100644 chart/values-template.yaml rename cmd/{ => manager}/main.go (100%) rename {examples/dist => dist}/fluxqueue-dev.yaml (100%) rename {examples/dist => dist}/fluxqueue.yaml (100%) create mode 100644 pkg/defaults/defaults.go create mode 100644 pkg/fluxion-grpc/fluxion.pb.go create mode 100644 pkg/fluxion-grpc/fluxion.proto create mode 100644 pkg/fluxion-grpc/fluxion_grpc.pb.go create mode 100644 pkg/jgf/jgf.go create mode 100644 pkg/jgf/jgf_test.go create mode 100644 pkg/jgf/types.go create mode 100644 pkg/jobspec/jobspec.go create mode 100644 pkg/jobspec/types.go create mode 100644 pkg/service-grpc/service.pb.go create mode 100644 pkg/service-grpc/service.proto create mode 100644 pkg/service-grpc/service_grpc.pb.go create mode 100644 pkg/service/service.go diff --git a/.github/LICENSE.fluence b/.github/LICENSE.fluence new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/.github/LICENSE.fluence @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Dockerfile b/Dockerfile index a48973e..368df33 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,7 @@ COPY go.sum go.sum RUN go mod download # Copy the go source -COPY cmd/main.go cmd/main.go +COPY cmd/manager/main.go cmd/main.go COPY api/ api/ COPY internal/controller/ internal/controller/ diff --git a/Makefile b/Makefile index fef72bd..28c904f 100644 --- a/Makefile +++ b/Makefile @@ -4,47 +4,7 @@ # - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2) # - use environment variables to overwrite this value (e.g export VERSION=0.0.2) VERSION ?= 0.0.1 - -# CHANNELS define the bundle channels used in the bundle. -# Add a new line here if you would like to change its default config. (E.g CHANNELS = "candidate,fast,stable") -# To re-generate a bundle for other specific channels without changing the standard setup, you can: -# - use the CHANNELS as arg of the bundle target (e.g make bundle CHANNELS=candidate,fast,stable) -# - use environment variables to overwrite this value (e.g export CHANNELS="candidate,fast,stable") -ifneq ($(origin CHANNELS), undefined) -BUNDLE_CHANNELS := --channels=$(CHANNELS) -endif - -# DEFAULT_CHANNEL defines the default channel used in the bundle. -# Add a new line here if you would like to change its default config. (E.g DEFAULT_CHANNEL = "stable") -# To re-generate a bundle for any other default channel without changing the default setup, you can: -# - use the DEFAULT_CHANNEL as arg of the bundle target (e.g make bundle DEFAULT_CHANNEL=stable) -# - use environment variables to overwrite this value (e.g export DEFAULT_CHANNEL="stable") -ifneq ($(origin DEFAULT_CHANNEL), undefined) -BUNDLE_DEFAULT_CHANNEL := --default-channel=$(DEFAULT_CHANNEL) -endif -BUNDLE_METADATA_OPTS ?= $(BUNDLE_CHANNELS) $(BUNDLE_DEFAULT_CHANNEL) - -# IMAGE_TAG_BASE defines the docker.io namespace and part of the image name for remote images. -# This variable is used to construct full image tags for bundle and catalog images. -# -# For example, running 'make bundle-build bundle-push catalog-build catalog-push' will build and push both -# converged-computing.org/fluxqueue-bundle:$VERSION and converged-computing.org/fluxqueue-catalog:$VERSION. -IMAGE_TAG_BASE ?= converged-computing.org/fluxqueue - -# BUNDLE_IMG defines the image:tag used for the bundle. -# You can use it as an arg. (E.g make bundle-build BUNDLE_IMG=/:) -BUNDLE_IMG ?= $(IMAGE_TAG_BASE)-bundle:v$(VERSION) - -# BUNDLE_GEN_FLAGS are the flags passed to the operator-sdk generate bundle command -BUNDLE_GEN_FLAGS ?= -q --overwrite --version $(VERSION) $(BUNDLE_METADATA_OPTS) - -# USE_IMAGE_DIGESTS defines if images are resolved via tags or digests -# You can enable this value if you would like to use SHA Based Digests -# To enable set flag to true -USE_IMAGE_DIGESTS ?= false -ifeq ($(USE_IMAGE_DIGESTS), true) - BUNDLE_GEN_FLAGS += --use-image-digests -endif +RELEASE_VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags --match "v*") # Set the Operator SDK version to use. By default, what is installed on the system is used. # This is useful for CI or a project to utilize a specific version of the operator-sdk toolkit. @@ -55,6 +15,14 @@ REGISTRY ?= ghcr.io/converged-computing IMG ?= $(REGISTRY)/fluxqueue:latest DEVIMG ?= $(REGISTRY)/fluxqueue:test POSTGRES_IMAGE ?= $(REGISTRY)/fluxqueue-postgres:latest +FLUXION_IMAGE ?= $(REGISTRY)/fluxnetes-scheduler:latest + +# Build for fluxion (scheduler) +FLUX_SCHED_ROOT ?= /opt/flux-sched +INSTALL_PREFIX ?= /usr +LIB_PREFIX ?= /usr/lib +COMMONENVVAR=GOOS=$(shell uname -s | tr A-Z a-z) +BUILDENVVAR=CGO_CFLAGS="-I${FLUX_SCHED_ROOT} -I${FLUX_SCHED_ROOT}/resource/reapi/bindings/c" CGO_LDFLAGS="-L${LIB_PREFIX} -L${LIB_PREFIX}/flux -L${FLUX_SCHED_ROOT}/resource/reapi/bindings -lreapi_cli -lflux-idset -lstdc++ -ljansson -lhwloc -lflux-hostlist -lboost_graph -lyaml-cpp" # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. ENVTEST_K8S_VERSION = 1.30.0 @@ -136,11 +104,18 @@ lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes .PHONY: build build: manifests generate fmt vet ## Build manager binary. - go build -o bin/manager cmd/main.go + go build -o bin/manager cmd/manager/main.go + +# This should only be run in the docker build - the fluxion package has flux dependencies +# that typically aren't on the host. +# The go.build is removed at build time to enable the package +.PHONY: fluxion +fluxion: + $(COMMONENVVAR) $(BUILDENVVAR) go build -ldflags '-w' -o bin/fluxion-service cmd/fluxion/main.go .PHONY: run run: manifests generate fmt vet ## Run a controller from your host. - go run ./cmd/main.go + go run ./cmd/manager/main.go # If you wish to build the manager image targeting other platforms you can use the --platform flag. # (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it. @@ -149,9 +124,15 @@ run: manifests generate fmt vet ## Run a controller from your host. docker-build: ## Build docker image with the manager. $(CONTAINER_TOOL) build -t ${IMG} . +.PHONY: build-fluxion +build-fluxion: + docker build -f ./build/fluxion/Dockerfile --build-arg ARCH="amd64" --build-arg RELEASE_VERSION="$(RELEASE_VERSION)" -t $(REGISTRY)/$(FLUXION_IMAGE) . + +.PHONY build-postgres: + docker build -f build/postgres/Dockerfile -t ${POSTGRES_IMAGE} . .PHONY: build-all -build-all: docker-build build-postgres +build-all: docker-build build-postgres build-fluxion .PHONY: docker-push docker-push: ## Push docker image with the manager. @@ -174,11 +155,16 @@ docker-buildx: ## Build and push docker image for the manager for cross-platform - $(CONTAINER_TOOL) buildx rm fluxqueue-builder rm Dockerfile.cross -.PHONY: build-installer -build-installer: manifests generate kustomize ## Generate a consolidated YAML with CRDs and deployment. - mkdir -p dist - cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} - $(KUSTOMIZE) build config/default > dist/install.yaml +.PHONY: protoc +protoc: $(LOCALBIN) + GOBIN=$(LOCALBIN) go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.28 + GOBIN=$(LOCALBIN) go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.2 + +# You can use make protoc to download proto +.PHONY: proto +proto: protoc + PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative pkg/fluxion-grpc/fluxion.proto + PATH=$(LOCALBIN):${PATH} protoc --go_out=. --go_opt=paths=source_relative --go-grpc_out=. --go-grpc_opt=paths=source_relative pkg/service-grpc/service.proto ##@ Deployment @@ -202,10 +188,7 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in .PHONY: build-config build-config: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config. cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} - $(KUSTOMIZE) build config/default > examples/dist/fluxqueue.yaml - -.PHONY build-postgres: - docker build -f build/postgres/Dockerfile -t ${POSTGRES_IMAGE} . + $(KUSTOMIZE) build config/default > dist/fluxqueue.yaml # Build a test image, push to the registry at test, and apply the build-config .PHONY: test-deploy @@ -213,12 +196,12 @@ test-deploy: manifests kustomize build-postgres docker build --no-cache -t ${DEVIMG} . docker push ${DEVIMG} cd config/manager && $(KUSTOMIZE) edit set image controller=${DEVIMG} - $(KUSTOMIZE) build config/default > examples/dist/fluxqueue-dev.yaml + $(KUSTOMIZE) build config/default > dist/fluxqueue-dev.yaml .PHONY: test-deploy-recreate test-deploy-recreate: test-deploy - kubectl delete -f ./examples/dist/fluxqueue-dev.yaml || echo "Already deleted" - kubectl apply -f ./examples/dist/fluxqueue-dev.yaml + kubectl delete -f ./dist/fluxqueue-dev.yaml || echo "Already deleted" + kubectl apply -f ./dist/fluxqueue-dev.yaml .PHONY: undeploy undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. @@ -299,50 +282,6 @@ OPERATOR_SDK = $(shell which operator-sdk) endif endif -.PHONY: bundle -bundle: manifests kustomize operator-sdk ## Generate bundle manifests and metadata, then validate generated files. - $(OPERATOR_SDK) generate kustomize manifests -q - cd config/manager && $(KUSTOMIZE) edit set image controller=$(IMG) - $(KUSTOMIZE) build config/manifests | $(OPERATOR_SDK) generate bundle $(BUNDLE_GEN_FLAGS) - $(OPERATOR_SDK) bundle validate ./bundle - -.PHONY: bundle-build -bundle-build: ## Build the bundle image. - docker build -f bundle.Dockerfile -t $(BUNDLE_IMG) . - -.PHONY: bundle-push -bundle-push: ## Push the bundle image. - $(MAKE) docker-push IMG=$(BUNDLE_IMG) - -.PHONY: opm -OPM = $(LOCALBIN)/opm -opm: ## Download opm locally if necessary. -ifeq (,$(wildcard $(OPM))) -ifeq (,$(shell which opm 2>/dev/null)) - @{ \ - set -e ;\ - mkdir -p $(dir $(OPM)) ;\ - OS=$(shell go env GOOS) && ARCH=$(shell go env GOARCH) && \ - curl -sSLo $(OPM) https://github.com/operator-framework/operator-registry/releases/download/v1.23.0/$${OS}-$${ARCH}-opm ;\ - chmod +x $(OPM) ;\ - } -else -OPM = $(shell which opm) -endif -endif - -# A comma-separated list of bundle images (e.g. make catalog-build BUNDLE_IMGS=example.com/operator-bundle:v0.1.0,example.com/operator-bundle:v0.2.0). -# These images MUST exist in a registry and be pull-able. -BUNDLE_IMGS ?= $(BUNDLE_IMG) - -# The image tag given to the resulting catalog image (e.g. make catalog-build CATALOG_IMG=example.com/operator-catalog:v0.2.0). -CATALOG_IMG ?= $(IMAGE_TAG_BASE)-catalog:v$(VERSION) - -# Set CATALOG_BASE_IMG to an existing catalog image tag to add $BUNDLE_IMGS to that image. -ifneq ($(origin CATALOG_BASE_IMG), undefined) -FROM_INDEX_OPT := --from-index $(CATALOG_BASE_IMG) -endif - .PHONY: helmify helmify: $(HELMIFY) ## Download helmify locally if necessary. $(HELMIFY): $(LOCALBIN) @@ -350,18 +289,8 @@ $(HELMIFY): $(LOCALBIN) helm: manifests kustomize helmify $(KUSTOMIZE) build config/default | $(HELMIFY) + cat ./chart/values-template.yaml ./chart/values.yaml > ./chart/tmp-values.yaml + mv ./chart/tmp-values.yaml ./chart/values.yaml .PHONY: pre-push pre-push: generate api build-config helm - -# Build a catalog image by adding bundle images to an empty catalog using the operator package manager tool, 'opm'. -# This recipe invokes 'opm' in 'semver' bundle add mode. For more information on add modes, see: -# https://github.com/operator-framework/community-operators/blob/7f1438c/docs/packaging-operator.md#updating-your-existing-operator -.PHONY: catalog-build -catalog-build: opm ## Build a catalog image. - $(OPM) index add --container-tool docker --mode semver --tag $(CATALOG_IMG) --bundles $(BUNDLE_IMGS) $(FROM_INDEX_OPT) - -# Push the catalog image. -.PHONY: catalog-push -catalog-push: ## Push a catalog image. - $(MAKE) docker-push IMG=$(CATALOG_IMG) diff --git a/README.md b/README.md index a115dcf..c396fd2 100644 --- a/README.md +++ b/README.md @@ -21,11 +21,11 @@ See the [docs](docs) for some detail on design choices. Fluxqueue builds three primary containers: - - `ghcr.io/converged-computing/fluxqueue`: contains a custom kube-scheduler build with fluxqueue as the primary scheduler. - - `ghcr.io/converged-computing/fluxqueue-sidecar`: provides the fluxion service, queue for pods and groups, and a second service that will expose a kubectl command for inspection of state. + - `ghcr.io/converged-computing/fluxqueue`: contains the webhook and operator with a flux queue for pods and groups that interacts with fluxion + - `ghcr.io/converged-computing/fluxqueue-scheduler`: provides the fluxion service - `ghcr.io/converged-computing/fluxqueue-postgres`: holds the worker queue and provisional queue tables -Instead of doing an out of tree scheduler plugin, for this project I am adding directly to Kubernetes and building. I'm curious to see if this will be easier or harder to maintain than an out of tree plugin, which seems to break frequently as the upstream for scheduler-plugins changes. +Not yet developed yet is the custom scheduler plugin, which needs to go somewhere! ## Deploy diff --git a/build/fluxion/Dockerfile b/build/fluxion/Dockerfile new file mode 100644 index 0000000..3d07e48 --- /dev/null +++ b/build/fluxion/Dockerfile @@ -0,0 +1,46 @@ +FROM fluxrm/flux-sched:jammy AS builder + +# This builds the fluxion scheduler service + +USER root +ENV DEBIAN_FRONTEND=noninteractive +ENV GO_VERSION=1.21.9 + +RUN apt-get update && apt-get clean -y && apt -y autoremove + +# Install go +RUN wget https://go.dev/dl/go${GO_VERSION}.linux-amd64.tar.gz && tar -xvf go${GO_VERSION}.linux-amd64.tar.gz && \ + mv go /usr/local && rm go${GO_VERSION}.linux-amd64.tar.gz + +# ENV GOROOT=/usr/local/go +# ENV GOPATH=/go +ENV PATH=/usr/local/go/bin:$PATH +RUN flux keygen +RUN git clone https://github.com/flux-framework/flux-sched.git /opt/flux-sched + +# Go dependencies for protobuf +RUN apt -y update && apt -y upgrade && apt install --no-install-recommends -y protobuf-compiler curl && \ + go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.26 && \ + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@v1.1 + +# These need to be on the LD_LIBRARY_PATH for the server to find at runtime +ENV LD_LIBRARY_PATH=/usr/lib:/usr/lib/flux +WORKDIR /code +COPY . ./ + +# These package imports are kept separately from fluxqueue since they require import +# of fluxion-go, which requires flux, and is unlikely to work on a host +RUN mv ./build/fluxion/cmd/_fluxion ./cmd/fluxion && \ + mv ./build/fluxion/pkg/_fluxion ./pkg/fluxion && \ + go mod tidy && \ + go mod vendor && \ + make fluxion FLUX_SCHED_ROOT=/opt/flux-sched + +# minimize build! +FROM fluxrm/flux-sched:jammy +COPY --from=builder /code/bin/fluxion-service /bin/fluxion-service +COPY --from=builder /usr/lib/flux/ /usr/lib/flux +COPY --from=builder /usr/lib/libflux* /usr/lib/ + +USER root +ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/lib:/usr/lib/flux \ No newline at end of file diff --git a/build/fluxion/cmd/_fluxion/main.go b/build/fluxion/cmd/_fluxion/main.go new file mode 100644 index 0000000..ba3bb9e --- /dev/null +++ b/build/fluxion/cmd/_fluxion/main.go @@ -0,0 +1,80 @@ +package main + +import ( + "flag" + "fmt" + "net" + "strings" + "time" + + "google.golang.org/grpc" + "google.golang.org/grpc/keepalive" + + // This package is hidden the root with go.build since it requires fluxion-go + // And cannot be built easily outside of the container + "github.com/converged-computing/fluxqueue/pkg/fluxion" + pb "github.com/converged-computing/fluxqueue/pkg/fluxion-grpc" + "github.com/converged-computing/fluxqueue/pkg/service" + svcPb "github.com/converged-computing/fluxqueue/pkg/service-grpc" +) + +const ( + defaultPort = ":4242" + enableExternalService = false +) + +var responsechan chan string + +func main() { + fmt.Println("This is the fluxion grpc server") + policy := flag.String("policy", "", "Match policy") + label := flag.String("label", "", "Label name for fluxnetes dedicated nodes") + grpcPort := flag.String("port", defaultPort, "Port for grpc service") + enableServicePlugin := flag.Bool("external-service", enableExternalService, "Flag to enable the external service (defaults to false)") + + flag.Parse() + + // Ensure our port starts with : + port := *grpcPort + if !strings.HasPrefix(":", port) { + port = fmt.Sprintf(":%s", port) + } + + // Fluxion GRPC + flux := fluxion.Fluxion{} + flux.InitFluxion(*policy, *label) + + lis, err := net.Listen("tcp", port) + if err != nil { + fmt.Printf("[GRPCServer] failed to listen: %v\n", err) + } + + responsechan = make(chan string) + server := grpc.NewServer( + grpc.KeepaliveParams(keepalive.ServerParameters{ + MaxConnectionIdle: 5 * time.Minute, + }), + ) + pb.RegisterFluxionServiceServer(server, &flux) + + // External plugin (Kubectl) GRPC + // This will eventually be an external GRPC module that can + // be shared by fluxnetes (flux-k8s) and fluxnetes-kubectl + // We give it a handle to Flux to get the state of groups + // and job Ids. The direct interaction with Fluxion + // happens through the other service handle + if *enableServicePlugin { + plugin := service.ExternalService{} + plugin.Init() + svcPb.RegisterExternalPluginServiceServer(server, &plugin) + } + + fmt.Printf("[GRPCServer] gRPC Listening on %s\n", lis.Addr().String()) + err = server.Serve(lis) + if err != nil { + fmt.Printf("[GRPCServer] failed to serve: %v\n", err) + } + + flux.Close() + fmt.Printf("[GRPCServer] Exiting\n") +} diff --git a/build/fluxion/pkg/_fluxion/fluxion.go b/build/fluxion/pkg/_fluxion/fluxion.go new file mode 100644 index 0000000..c142885 --- /dev/null +++ b/build/fluxion/pkg/_fluxion/fluxion.go @@ -0,0 +1,140 @@ +package fluxion + +import ( + "os" + + "github.com/converged-computing/fluxqueue/pkg/defaults" + pb "github.com/converged-computing/fluxqueue/pkg/fluxion-grpc" + utils "github.com/converged-computing/fluxqueue/pkg/fluxion/utils" + "github.com/converged-computing/fluxqueue/pkg/jobspec" + "github.com/flux-framework/fluxion-go/pkg/fluxcli" + klog "k8s.io/klog/v2" + + "context" + "errors" +) + +type Fluxion struct { + cli *fluxcli.ReapiClient + pb.UnimplementedFluxionServiceServer +} + +// InitFluxion creates a new client to interaction with the fluxion API (via go bindings) +func (fluxion *Fluxion) InitFluxion(policy, label string) { + fluxion.cli = fluxcli.NewReapiClient() + + klog.Infof("[fluxqueue] Created flux resource client %s", fluxion.cli) + err := utils.CreateInClusterJGF(defaults.KubernetesJsonGraphFormat, label) + if err != nil { + return + } + + // This file needs to be written for GetResources to read later + jgf, err := os.ReadFile(defaults.KubernetesJsonGraphFormat) + if err != nil { + klog.Error("Error reading JGF") + return + } + + p := "{}" + if policy != "" { + p = string("{\"matcher_policy\": \"" + policy + "\"}") + klog.Infof("[fluxqueue] match policy: %s", p) + } + fluxion.cli.InitContext(string(jgf), p) +} + +// Destroys properly closes (destroys) the fluxion client handle +func (fluxion *Fluxion) Close() { + fluxion.cli.Destroy() +} + +// Cancel wraps the Cancel function of the fluxion go bindings +func (fluxion *Fluxion) Cancel( + ctx context.Context, + in *pb.CancelRequest, +) (*pb.CancelResponse, error) { + + klog.Infof("[fluxqueue] received cancel request %v\n", in) + err := fluxion.cli.Cancel(int64(in.FluxID), in.NoExistOK) + if err != nil { + return nil, err + } + + // Why would we have an error code here if we check above? + // This (I think) should be an error code for the specific job + dr := &pb.CancelResponse{FluxID: in.FluxID} + klog.Infof("[fluxqueue] sending cancel response %v\n", dr) + klog.Infof("[fluxqueue] cancel errors so far: %s\n", fluxion.cli.GetErrMsg()) + + reserved, at, overhead, mode, fluxerr := fluxion.cli.Info(int64(in.FluxID)) + klog.Infof("\n\t----Job Info output---") + klog.Infof("jobid: %d\nreserved: %t\nat: %d\noverhead: %f\nmode: %s\nerror: %d\n", in.FluxID, reserved, at, overhead, mode, fluxerr) + + klog.Infof("[GRPCServer] Sending Cancel response %v\n", dr) + return dr, nil +} + +// Match wraps the MatchAllocate function of the fluxion go bindings +// If a match is not possible, we return an empty response with allocated false +// This should only return an error if there is some issue with Fluxion +// or the task of matching. +func (fluxion *Fluxion) Match(ctx context.Context, in *pb.MatchRequest) (*pb.MatchResponse, error) { + + emptyResponse := &pb.MatchResponse{} + + // Prepare an empty match response (that can still be serialized) + klog.Infof("[fluxqueue] Received Match request %v\n", in) + + // Generate the jobspec, array of bytes converted to string + spec, err := jobspec.CreateJobSpecYaml(in.Podspec, in.Count) + if err != nil { + return emptyResponse, err + } + + // Ask flux to match allocate, either with or without a reservation + reserved, allocated, at, overhead, jobid, fluxerr := fluxion.cli.MatchAllocate(in.Reserve, string(spec)) + utils.PrintOutput(reserved, allocated, at, overhead, jobid, fluxerr) + + // Be explicit about errors (or not) + // These errors are related to matching, not whether it is possible or not, + // and should not happen. + errorMessages := fluxion.cli.GetErrMsg() + if errorMessages == "" { + klog.Info("[fluxqueue] There are no errors") + } else { + klog.Infof("[fluxqueue] Match errors so far %s", errorMessages) + } + if fluxerr != nil { + klog.Errorf("[fluxqueue] Match Flux err is", fluxerr) + return emptyResponse, errors.New("[fluxqueue] Error in ReapiCliMatchAllocate") + } + + // It's OK if we can't allocate, it means we can reserve and let the client + // handle this information how they see fit. If we can allocate, we return + // the nodes. + nodelist := []*pb.NodeAlloc{} + haveAllocation := allocated != "" + + if haveAllocation { + // Pass the job name (the group) for inspection/ordering later + nodetasks := utils.ParseAllocResult(allocated, in.JobName) + nodelist = make([]*pb.NodeAlloc, len(nodetasks)) + for i, result := range nodetasks { + nodelist[i] = &pb.NodeAlloc{ + NodeID: result.Basename, + Tasks: int32(result.CoreCount) / in.Podspec.Cpu, + } + } + } + + mr := &pb.MatchResponse{ + Nodelist: nodelist, + FluxID: uint64(jobid), + Reserved: reserved, + ReservedAt: at, + Allocated: haveAllocation, + } + klog.Infof("[fluxqueue] Match response %v \n", mr) + return mr, nil +} diff --git a/build/fluxion/pkg/_fluxion/utils/types.go b/build/fluxion/pkg/_fluxion/utils/types.go new file mode 100644 index 0000000..a525a6a --- /dev/null +++ b/build/fluxion/pkg/_fluxion/utils/types.go @@ -0,0 +1,15 @@ +package utils + +// PodSpec is a temporary holder for the protobuf +// variant that it will be converted to. We could +// remove it, but since we need to refactor to use +// an entire group I'm leaving for now +type PodSpec struct { + Id string + Container string + Cpu int32 + Memory int64 + Gpu int64 + Storage int64 + Labels []string +} diff --git a/build/fluxion/pkg/_fluxion/utils/utils.go b/build/fluxion/pkg/_fluxion/utils/utils.go new file mode 100644 index 0000000..e524e96 --- /dev/null +++ b/build/fluxion/pkg/_fluxion/utils/utils.go @@ -0,0 +1,319 @@ +package utils + +import ( + "context" + "fmt" + + klog "k8s.io/klog/v2" + + "encoding/json" + + "github.com/converged-computing/fluxqueue/pkg/jgf" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + resourcehelper "k8s.io/kubectl/pkg/util/resource" +) + +var ( + controlPlaneLabel = "node-role.kubernetes.io/control-plane" + defaultClusterName = "k8scluster" +) + +// RegisterExisting uses the in cluster API to get existing pods +// This is actually the same as computeTotalRequests but I wanted to compare the two +// It is currently not being used. The main difference is that below, we are essentially +// rounding the cpu to the smaller unit (logically for the graph) but losing some +// granularity, if we think "milli" values have feet. +func RegisterExisting(clientset *kubernetes.Clientset, ctx context.Context) (map[string]PodSpec, error) { + + // We are using PodSpec as a holder for a *summary* of cpu/memory being used + // by the node, it is a summation across pods we find on each one + nodes := map[string]PodSpec{} + + // get pods in all the namespaces by omitting namespace + // Or specify namespace to get pods in particular namespace + pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{}) + if err != nil { + klog.Infof("Error listing pods: %s\n", err) + return nodes, err + } + klog.Infof("Found %d existing pods in the cluster\n", len(pods.Items)) + + // Create a new PodSpec for each + for _, pod := range pods.Items { + + // Add the node to our lookup if we don't have it yet + _, ok := nodes[pod.Spec.NodeName] + if !ok { + nodes[pod.Spec.NodeName] = PodSpec{} + } + ps := nodes[pod.Spec.NodeName] + + for _, container := range pod.Spec.Containers { + specRequests := container.Resources.Requests + ps.Cpu += int32(specRequests.Cpu().Value()) + ps.Memory += specRequests.Memory().Value() + ps.Storage += specRequests.StorageEphemeral().Value() + + specLimits := container.Resources.Limits + gpuSpec := specLimits["nvidia.com/gpu"] + ps.Gpu += gpuSpec.Value() + } + nodes[pod.Spec.NodeName] = ps + } + return nodes, nil +} + +// CreateInClusterJGF creates the Json Graph Format from the Kubernetes API +func CreateInClusterJGF(filename, skipLabel string) error { + ctx := context.Background() + config, err := rest.InClusterConfig() + if err != nil { + fmt.Println("Error getting InClusterConfig") + return err + } + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + fmt.Printf("Error getting ClientSet: %s", err) + return err + } + nodes, err := clientset.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) + if err != nil { + fmt.Printf("Error listing nodes: %s", err) + return err + } + + // Create a Flux Json Graph Format (JGF) with all cluster nodes + fluxgraph := jgf.NewFluxJGF() + + // Initialize the cluster. The top level of the graph is the cluster + // This assumes fluxion is only serving one cluster. + // previous comments indicate that we choose between the level + // of a rack and a subnet. A rack doesn't make sense (the nodes could + // be on multiple racks) so subnet is likely the right abstraction + clusterNode, err := fluxgraph.InitCluster(defaultClusterName) + if err != nil { + return err + } + fmt.Println("Number nodes ", len(nodes.Items)) + + // TODO for follow up / next PR: + // Metrics / summary should be an attribute of the JGF outer flux graph + // Resources should come in from entire group (and not repres. pod) + var totalAllocCpu int64 + totalAllocCpu = 0 + + // Keep a lookup of subnet nodes in case we see one twice + // We don't want to create a new entity for it in the graph + subnetLookup := map[string]jgf.Node{} + var subnetCounter int64 = 0 + + for nodeCount, node := range nodes.Items { + + // We should not be scheduling to the control plane + _, ok := node.Labels[controlPlaneLabel] + if ok { + fmt.Println("Skipping control plane node ", node.GetName()) + continue + } + + // Anything labeled with "skipLabel" meaning it is present, + // should be skipped + if skipLabel != "" { + _, ok := node.Labels[skipLabel] + if ok { + fmt.Printf("Skipping node %s\n", node.GetName()) + continue + } + } + + if node.Spec.Unschedulable { + fmt.Printf("Skipping node %s, unschedulable\n", node.GetName()) + continue + } + + fieldselector, err := fields.ParseSelector("spec.nodeName=" + node.GetName() + ",status.phase!=" + string(corev1.PodSucceeded) + ",status.phase!=" + string(corev1.PodFailed)) + if err != nil { + return err + } + pods, err := clientset.CoreV1().Pods("").List(ctx, metav1.ListOptions{ + FieldSelector: fieldselector.String(), + }) + if err != nil { + return err + } + + // Have we seen this subnet node before? + subnetName := node.Labels["topology.kubernetes.io/zone"] + subnetNode, exists := subnetLookup[subnetName] + if !exists { + // Build the subnet according to topology.kubernetes.io/zone label + subnetNode = fluxgraph.MakeSubnet(subnetName, subnetCounter) + subnetCounter += 1 + + // This is one example of bidirectional, I won't document in + // all following occurrences but this is what the function does + // [cluster] -> contains -> [subnet] + // [subnet] -> in -> [cluster] + fluxgraph.MakeBidirectionalEdge(clusterNode.Id, subnetNode.Id) + } + + // These are requests for existing pods, for cpu and memory + reqs := computeTotalRequests(pods) + cpuReqs := reqs[corev1.ResourceCPU] + memReqs := reqs[corev1.ResourceMemory] + + // Actual values that we have available (minus requests) + totalCpu := node.Status.Allocatable.Cpu().MilliValue() + totalMem := node.Status.Allocatable.Memory().Value() + + // Values accounting for requests + availCpu := int64((totalCpu - cpuReqs.MilliValue()) / 1000) + availMem := totalMem - memReqs.Value() + + // Show existing to compare to + fmt.Printf("\n📦️ %s\n", node.GetName()) + fmt.Printf(" allocated cpu: %d\n", cpuReqs.Value()) + fmt.Printf(" allocated mem: %d\n", memReqs.Value()) + fmt.Printf(" available cpu: %d\n", availCpu) + fmt.Printf(" running pods: %d\n", len(pods.Items)) + + // keep track of overall total + totalAllocCpu += availCpu + fmt.Printf(" available mem: %d\n", availMem) + gpuAllocatable, hasGpuAllocatable := node.Status.Allocatable["nvidia.com/gpu"] + + // TODO possibly look at pod resources vs. node.Status.Allocatable + // Make the compute node, which is a child of the subnet + // The parameters here are the node name, and the parent path + computeNode := fluxgraph.MakeNode(node.Name, subnetNode.Metadata.Name, int64(nodeCount)) + + // [subnet] -> contains -> [compute node] + fluxgraph.MakeBidirectionalEdge(subnetNode.Id, computeNode.Id) + + // Here we are adding GPU resources under nodes + if hasGpuAllocatable { + fmt.Println("GPU Resource quantity ", gpuAllocatable.Value()) + for index := 0; index < int(gpuAllocatable.Value()); index++ { + + // The subpath (from and not including root) is the subnet -> node + subpath := fmt.Sprintf("%s/%s", subnetNode.Metadata.Name, computeNode.Metadata.Name) + + // TODO: can this size be greater than 1? + gpuNode := fluxgraph.MakeGPU(jgf.NvidiaGPU, subpath, 1, int64(index)) + + // [compute] -> contains -> [gpu] + fluxgraph.MakeBidirectionalEdge(computeNode.Id, gpuNode.Id) + } + + } + + // Here is where we are adding cores + for index := 0; index < int(availCpu); index++ { + subpath := fmt.Sprintf("%s/%s", subnetNode.Metadata.Name, computeNode.Metadata.Name) + coreNode := fluxgraph.MakeCore(jgf.CoreType, subpath, int64(index)) + fluxgraph.MakeBidirectionalEdge(computeNode.Id, coreNode.Id) + } + + // Here is where we are adding memory + fractionMem := availMem >> 30 + for i := 0; i < int(fractionMem); i++ { + subpath := fmt.Sprintf("%s/%s", subnetNode.Metadata.Name, computeNode.Metadata.Name) + memoryNode := fluxgraph.MakeMemory(jgf.MemoryType, subpath, 1<<10, int64(i)) + fluxgraph.MakeBidirectionalEdge(computeNode.Id, memoryNode.Id) + } + } + fmt.Printf("\nCan request at most %d exclusive cpu", totalAllocCpu) + + // Get the jgf back as bytes, and we will return string + err = fluxgraph.WriteJGF(filename) + if err != nil { + return err + } + return nil +} + +// computeTotalRequests sums up the pod requests for the list. We do not consider limits. +func computeTotalRequests(podList *corev1.PodList) map[corev1.ResourceName]resource.Quantity { + total := map[corev1.ResourceName]resource.Quantity{} + for _, pod := range podList.Items { + podReqs, _ := resourcehelper.PodRequestsAndLimits(&pod) + for podReqName, podReqValue := range podReqs { + if v, ok := total[podReqName]; !ok { + total[podReqName] = podReqValue + } else { + v.Add(podReqValue) + total[podReqName] = v + } + } + } + return total +} + +type allocation struct { + Type string + Basename string + CoreCount int +} + +// ParseAllocResult takes an allocated (string) and parses into a list of allocation +// We include the pod namespace/name for debugging later +func ParseAllocResult(allocated, groupName string) []allocation { + var dat map[string]interface{} + result := []allocation{} + + // Keep track of total core count across allocated + corecount := 0 + + // This should not happen - the string we get back should parse. + if err := json.Unmarshal([]byte(allocated), &dat); err != nil { + panic(err) + } + // Parse graph and nodes into interfaces + // TODO look at github.com/mitchellh/mapstructure + // that might make this easier + nodes := dat["graph"] + str1 := nodes.(map[string]interface{}) + str2 := str1["nodes"].([]interface{}) + + for _, item := range str2 { + str1 = item.(map[string]interface{}) + metadata := str1["metadata"].(map[string]interface{}) + if metadata["type"].(string) == jgf.CoreType { + corecount = corecount + 1 + } + if metadata["type"].(string) == jgf.NodeType { + result = append(result, allocation{ + Type: metadata["type"].(string), + Basename: metadata["basename"].(string), + CoreCount: corecount, + }) + + // Reset the corecount once we've added to a node + corecount = 0 + } + } + fmt.Printf("Final node result for %s\n", groupName) + for i, alloc := range result { + fmt.Printf("Node %d: %s\n", i, alloc.Basename) + fmt.Printf(" Type: %s\n Basename: %s\n CoreCount: %d\n", + alloc.Type, alloc.Basename, alloc.CoreCount) + + } + return result +} + +// Utility functions +func PrintOutput(reserved bool, allocated string, at int64, overhead float64, jobid uint64, fluxerr error) { + fmt.Println("\n\t----Match Allocate output---") + fmt.Printf("jobid: %d\nreserved: %t\nallocated: %s\nat: %d\noverhead: %f\n", jobid, reserved, allocated, at, overhead) + + // Only print error if we had one + if fluxerr != nil { + fmt.Printf("error: %s\n", fluxerr) + } +} diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml new file mode 100644 index 0000000..c25bf8d --- /dev/null +++ b/chart/templates/configmap.yaml @@ -0,0 +1,136 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: scheduler-config + namespace: {{ .Release.Namespace }} +data: + scheduler-config.yaml: | + apiVersion: kubescheduler.config.k8s.io/v1 + kind: KubeSchedulerConfiguration + leaderElection: + leaderElect: false + profiles: + - schedulerName: Fluxion + plugins: + queueSort: + enabled: + - name: Fluxion + preBind: + disabled: + - name: Fluxion + filter: + disabled: + - name: NodePorts + - name: VolumeRestrictions + - name: EBSLimits + - name: GCEPDLimits + - name: NodeVolumeLimits + - name: AzureDiskLimits + - name: VolumeZone + - name: PodTopologySpread + - name: InterPodAffinity + - name: NodeAffinity + - name: NodeUnschedulable + - name: NodeName + - name: TaintToleration + - name: DefaultPreemtion + - name: NodeResourcesBalancedAllocation + - name: ImageLocality + reserve: + disabled: + - name: NodePorts + - name: VolumeRestrictions + - name: EBSLimits + - name: GCEPDLimits + - name: NodeVolumeLimits + - name: AzureDiskLimits + - name: VolumeZone + - name: PodTopologySpread + - name: InterPodAffinity + - name: NodeAffinity + - name: NodeUnschedulable + - name: NodeName + - name: TaintToleration + - name: DefaultPreemtion + - name: NodeResourcesBalancedAllocation + - name: ImageLocality + score: + disabled: + - name: NodePorts + - name: VolumeRestrictions + - name: EBSLimits + - name: GCEPDLimits + - name: NodeVolumeLimits + - name: AzureDiskLimits + - name: VolumeZone + - name: PodTopologySpread + - name: InterPodAffinity + - name: NodeAffinity + - name: NodeUnschedulable + - name: NodeName + - name: TaintToleration + - name: DefaultPreemtion + - name: NodeResourcesBalancedAllocation + - name: ImageLocality + preScore: + disabled: + - name: NodePorts + - name: VolumeRestrictions + - name: EBSLimits + - name: GCEPDLimits + - name: NodeVolumeLimits + - name: AzureDiskLimits + - name: VolumeZone + - name: PodTopologySpread + - name: InterPodAffinity + - name: NodeAffinity + - name: NodeUnschedulable + - name: NodeName + - name: TaintToleration + - name: DefaultPreemtion + - name: NodeResourcesBalancedAllocation + - name: ImageLocality + postFilter: + disabled: + - name: NodePorts + - name: VolumeRestrictions + - name: EBSLimits + - name: GCEPDLimits + - name: NodeVolumeLimits + - name: AzureDiskLimits + - name: VolumeZone + - name: PodTopologySpread + - name: InterPodAffinity + - name: NodeAffinity + - name: NodeUnschedulable + - name: NodeName + - name: TaintToleration + - name: DefaultPreemtion + - name: NodeResourcesBalancedAllocation + - name: ImageLocality + preFilter: + disabled: + - name: NodePorts + - name: VolumeRestrictions + - name: EBSLimits + - name: GCEPDLimits + - name: NodeVolumeLimits + - name: AzureDiskLimits + - name: VolumeZone + - name: PodTopologySpread + - name: InterPodAffinity + - name: NodeAffinity + - name: NodeUnschedulable + - name: NodeName + - name: TaintToleration + - name: DefaultPreemtion + - name: NodeResourcesBalancedAllocation + - name: ImageLocality + multiPoint: + disabled: + - name: CapacityScheduling + - name: NodeResourceTopologyMatch + - name: NodeResourcesAllocatable + - name: PrioritySort + - name: Coscheduling + diff --git a/chart/templates/deployment.yaml b/chart/templates/deployment.yaml index 60e04f1..22f46ff 100644 --- a/chart/templates/deployment.yaml +++ b/chart/templates/deployment.yaml @@ -24,6 +24,16 @@ spec: command: - /manager env: + - name: DATABASE_URL + value: {{ quote .Values.controllerManager.manager.env.databaseUrl }} + - name: PGHOST + value: {{ quote .Values.controllerManager.manager.env.pghost }} + - name: PGDATABASE + value: {{ quote .Values.controllerManager.manager.env.pgdatabase }} + - name: PGPORT + value: {{ quote .Values.controllerManager.manager.env.pgport }} + - name: PGPASSWORD + value: {{ quote .Values.controllerManager.manager.env.pgpassword }} - name: KUBERNETES_CLUSTER_DOMAIN value: {{ quote .Values.kubernetesClusterDomain }} image: {{ .Values.controllerManager.manager.image.repository }}:{{ .Values.controllerManager.manager.image.tag @@ -41,11 +51,12 @@ spec: name: webhook-server protocol: TCP readinessProbe: - httpGet: - path: /readyz - port: 8081 - initialDelaySeconds: 5 - periodSeconds: 10 + exec: + command: + - sh + - -c + - | + status=$(curl -ks https://localhost:8081/readyz); if [ "$status" -ne "ok" ]; then exit 1; fi pg_isready -d postgres -h postgres -p 5432 -U postgres; resources: {{- toYaml .Values.controllerManager.manager.resources | nindent 10 }} securityContext: {{- toYaml .Values.controllerManager.manager.containerSecurityContext @@ -54,6 +65,18 @@ spec: - mountPath: /tmp/k8s-webhook-server/serving-certs name: cert readOnly: true + - command: + - /bin/fluxion-service + - --policy=lonode + - --port=4242 + env: + - name: KUBERNETES_CLUSTER_DOMAIN + value: {{ quote .Values.kubernetesClusterDomain }} + image: {{ .Values.controllerManager.fluxion.image.repository }}:{{ .Values.controllerManager.fluxion.image.tag + | default .Chart.AppVersion }} + imagePullPolicy: {{ .Values.controllerManager.fluxion.imagePullPolicy }} + name: fluxion + resources: {} securityContext: {{- toYaml .Values.controllerManager.podSecurityContext | nindent 8 }} serviceAccountName: {{ include "chart.fullname" . }}-controller-manager diff --git a/chart/values-template.yaml b/chart/values-template.yaml new file mode 100644 index 0000000..bc5d9dd --- /dev/null +++ b/chart/values-template.yaml @@ -0,0 +1,24 @@ +# Note that this is no longer built from the scheduler-plugins repo - it's built +# directly from Kubernetes (in tree) +# scheduler: +# name: fluxnetes +# image: ghcr.io/flux-framework/fluxnetes:latest +# replicaCount: 1 +# pullPolicy: Always +# leaderElect: false + +postgres: + image: ghcr.io/flux-framework/fluxqueue-postgres:latest + pullPolicy: Always + +# LoadVariationRiskBalancing and TargetLoadPacking are not enabled by default +# as they need extra RBAC privileges on metrics.k8s.io. + +#enableCertManager: true +#kubernetesClusterDomain: cluster.local +#webhookService: +# ports: +# - port: 9443 +# protocol: TCP +# targetPort: 9443 +# type: ClusterIP diff --git a/chart/values.yaml b/chart/values.yaml index d6a59b7..01c42ba 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -1,4 +1,33 @@ +# Note that this is no longer built from the scheduler-plugins repo - it's built +# directly from Kubernetes (in tree) +# scheduler: +# name: fluxnetes +# image: ghcr.io/flux-framework/fluxnetes:latest +# replicaCount: 1 +# pullPolicy: Always +# leaderElect: false + +postgres: + image: ghcr.io/flux-framework/fluxqueue-postgres:latest + pullPolicy: Always + +# LoadVariationRiskBalancing and TargetLoadPacking are not enabled by default +# as they need extra RBAC privileges on metrics.k8s.io. + +#enableCertManager: true +#kubernetesClusterDomain: cluster.local +#webhookService: +# ports: +# - port: 9443 +# protocol: TCP +# targetPort: 9443 +# type: ClusterIP controllerManager: + fluxion: + image: + repository: ghcr.io/converged-computing/fluxqueue-scheduler + tag: latest + imagePullPolicy: IfNotPresent manager: args: - --metrics-bind-address=:8443 @@ -9,10 +38,16 @@ controllerManager: capabilities: drop: - ALL + env: + databaseUrl: postgres://postgres:postgres@postgres:5432/postgres + pgdatabase: postgres + pghost: postgres + pgpassword: postgres + pgport: "5432" image: repository: ghcr.io/converged-computing/fluxqueue - tag: test - imagePullPolicy: Always + tag: latest + imagePullPolicy: IfNotPresent resources: limits: cpu: 500m diff --git a/cmd/main.go b/cmd/manager/main.go similarity index 100% rename from cmd/main.go rename to cmd/manager/main.go diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 4f34c95..dc9def7 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -6,3 +6,6 @@ images: - name: controller newName: ghcr.io/converged-computing/fluxqueue newTag: latest +- name: scheduler + newName: ghcr.io/converged-computing/fluxqueue-scheduler + newTag: latest diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 7605259..def6258 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -64,8 +64,19 @@ spec: - --leader-elect - --health-probe-bind-address=:8081 image: controller:latest - imagePullPolicy: Always + imagePullPolicy: IfNotPresent name: manager + env: + - name: DATABASE_URL + value: postgres://postgres:postgres@postgres:5432/postgres + - name: PGHOST + value: postgres + - name: PGDATABASE + value: postgres + - name: PGPORT + value: "5432" + - name: PGPASSWORD + value: postgres securityContext: allowPrivilegeEscalation: false capabilities: @@ -77,12 +88,21 @@ spec: port: 8081 initialDelaySeconds: 15 periodSeconds: 20 + # The operator queue will need to talk to the database readinessProbe: - httpGet: - path: /readyz - port: 8081 - initialDelaySeconds: 5 - periodSeconds: 10 + exec: + command: + - "sh" + - "-c" + - > + status=$(curl -ks https://localhost:8081/readyz); if [ "$status" -ne "ok" ]; then exit 1; fi + pg_isready -d postgres -h postgres -p 5432 -U postgres; + #readinessProbe: + # httpGet: + # path: /readyz + # port: 8081 + # initialDelaySeconds: 5 + # periodSeconds: 10 # TODO(user): Configure the resources accordingly based on the project requirements. # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ resources: @@ -92,5 +112,17 @@ spec: requests: cpu: 10m memory: 64Mi + - image: scheduler:latest + imagePullPolicy: IfNotPresent + command: + - /bin/fluxion-service + - --policy=lonode + - --port=4242 + # - --external-service + name: fluxion + # These are exposed for the kubectl plugin + # ports: + # - containerPort: xxxx + # hostPort: xxxx serviceAccountName: controller-manager terminationGracePeriodSeconds: 10 diff --git a/examples/dist/fluxqueue-dev.yaml b/dist/fluxqueue-dev.yaml similarity index 100% rename from examples/dist/fluxqueue-dev.yaml rename to dist/fluxqueue-dev.yaml diff --git a/examples/dist/fluxqueue.yaml b/dist/fluxqueue.yaml similarity index 100% rename from examples/dist/fluxqueue.yaml rename to dist/fluxqueue.yaml diff --git a/go.mod b/go.mod index 5a5ace3..39c856f 100644 --- a/go.mod +++ b/go.mod @@ -21,6 +21,7 @@ require ( github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/evanphx/json-patch/v5 v5.9.0 // indirect github.com/felixge/httpsnoop v1.0.3 // indirect + github.com/flux-framework/fluxion-go v0.40.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-logr/logr v1.4.1 // indirect github.com/go-logr/stdr v1.2.2 // indirect diff --git a/go.sum b/go.sum index 9ce9c0d..eb43c73 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,8 @@ github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0 github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/flux-framework/fluxion-go v0.40.0 h1:mF2/Uu5ODPn3DQyigJTYoN8XyjZrrYCoRpk9GNtg8S4= +github.com/flux-framework/fluxion-go v0.40.0/go.mod h1:jA5+kOSLxchFzixzYEvMAGjkXB5yszO/HxUwdhX/5/U= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= diff --git a/hack/quick-build-kind.sh b/hack/quick-build-kind.sh index f39697b..1aad6e4 100755 --- a/hack/quick-build-kind.sh +++ b/hack/quick-build-kind.sh @@ -1,6 +1,6 @@ #!/bin/bash -REGISTRY="${1:-ghcr.io/vsoch}" +REGISTRY="${1:-ghcr.io/converged-computing}" HERE=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) ROOT=$(dirname ${HERE}) @@ -17,6 +17,7 @@ make build-all REGISTRY=${REGISTRY} # SCHEDULER_IMAGE=fluxqueue # SIDECAR_IMAGE= # kind load docker-image ${REGISTRY}/fluxnetes-sidecar:latest kind load docker-image ${REGISTRY}/fluxqueue:latest kind load docker-image ${REGISTRY}/fluxqueue-postgres:latest +kind load docker-image ${REGISTRY}/fluxqueue-scheduler:latest # And then install using the charts. The pull policy ensures we use the loaded ones # --set scheduler.image=${REGISTRY}/fluxnetes:latest \ diff --git a/pkg/defaults/defaults.go b/pkg/defaults/defaults.go new file mode 100644 index 0000000..f4fc8f2 --- /dev/null +++ b/pkg/defaults/defaults.go @@ -0,0 +1,5 @@ +package defaults + +var ( + KubernetesJsonGraphFormat = "/home/data/jgf/kubecluster.json" +) diff --git a/pkg/fluxion-grpc/fluxion.pb.go b/pkg/fluxion-grpc/fluxion.pb.go new file mode 100644 index 0000000..4ec5b55 --- /dev/null +++ b/pkg/fluxion-grpc/fluxion.pb.go @@ -0,0 +1,888 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.28.1 +// protoc v3.20.3 +// source: pkg/fluxion-grpc/fluxion.proto + +package fluxion_grpc + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type PodSpec struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` + Container string `protobuf:"bytes,2,opt,name=container,proto3" json:"container,omitempty"` + Cpu int32 `protobuf:"varint,3,opt,name=cpu,proto3" json:"cpu,omitempty"` + Memory int64 `protobuf:"varint,4,opt,name=memory,proto3" json:"memory,omitempty"` + Gpu int64 `protobuf:"varint,5,opt,name=gpu,proto3" json:"gpu,omitempty"` + Storage int64 `protobuf:"varint,6,opt,name=storage,proto3" json:"storage,omitempty"` + Labels []string `protobuf:"bytes,7,rep,name=labels,proto3" json:"labels,omitempty"` +} + +func (x *PodSpec) Reset() { + *x = PodSpec{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *PodSpec) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*PodSpec) ProtoMessage() {} + +func (x *PodSpec) ProtoReflect() protoreflect.Message { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use PodSpec.ProtoReflect.Descriptor instead. +func (*PodSpec) Descriptor() ([]byte, []int) { + return file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP(), []int{0} +} + +func (x *PodSpec) GetId() string { + if x != nil { + return x.Id + } + return "" +} + +func (x *PodSpec) GetContainer() string { + if x != nil { + return x.Container + } + return "" +} + +func (x *PodSpec) GetCpu() int32 { + if x != nil { + return x.Cpu + } + return 0 +} + +func (x *PodSpec) GetMemory() int64 { + if x != nil { + return x.Memory + } + return 0 +} + +func (x *PodSpec) GetGpu() int64 { + if x != nil { + return x.Gpu + } + return 0 +} + +func (x *PodSpec) GetStorage() int64 { + if x != nil { + return x.Storage + } + return 0 +} + +func (x *PodSpec) GetLabels() []string { + if x != nil { + return x.Labels + } + return nil +} + +// The Match request message (allocate, allocate_orelse_reserve) +// TODO: this currently takes a podspec, and we multiply by a count +// we should ideally support having a list of different pods +type MatchRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Podspec *PodSpec `protobuf:"bytes,1,opt,name=podspec,proto3" json:"podspec,omitempty"` + Count int32 `protobuf:"varint,3,opt,name=count,proto3" json:"count,omitempty"` + Reserve bool `protobuf:"varint,4,opt,name=reserve,proto3" json:"reserve,omitempty"` + JobName string `protobuf:"bytes,5,opt,name=jobName,proto3" json:"jobName,omitempty"` +} + +func (x *MatchRequest) Reset() { + *x = MatchRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *MatchRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*MatchRequest) ProtoMessage() {} + +func (x *MatchRequest) ProtoReflect() protoreflect.Message { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use MatchRequest.ProtoReflect.Descriptor instead. +func (*MatchRequest) Descriptor() ([]byte, []int) { + return file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP(), []int{1} +} + +func (x *MatchRequest) GetPodspec() *PodSpec { + if x != nil { + return x.Podspec + } + return nil +} + +func (x *MatchRequest) GetCount() int32 { + if x != nil { + return x.Count + } + return 0 +} + +func (x *MatchRequest) GetReserve() bool { + if x != nil { + return x.Reserve + } + return false +} + +func (x *MatchRequest) GetJobName() string { + if x != nil { + return x.JobName + } + return "" +} + +// The Nodes/Cluster Update Status +type NodeAlloc struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + NodeID string `protobuf:"bytes,1,opt,name=nodeID,proto3" json:"nodeID,omitempty"` + Tasks int32 `protobuf:"varint,2,opt,name=tasks,proto3" json:"tasks,omitempty"` +} + +func (x *NodeAlloc) Reset() { + *x = NodeAlloc{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *NodeAlloc) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*NodeAlloc) ProtoMessage() {} + +func (x *NodeAlloc) ProtoReflect() protoreflect.Message { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use NodeAlloc.ProtoReflect.Descriptor instead. +func (*NodeAlloc) Descriptor() ([]byte, []int) { + return file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP(), []int{2} +} + +func (x *NodeAlloc) GetNodeID() string { + if x != nil { + return x.NodeID + } + return "" +} + +func (x *NodeAlloc) GetTasks() int32 { + if x != nil { + return x.Tasks + } + return 0 +} + +// The Match response message +type MatchResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + FluxID uint64 `protobuf:"varint,1,opt,name=fluxID,proto3" json:"fluxID,omitempty"` + Nodelist []*NodeAlloc `protobuf:"bytes,2,rep,name=nodelist,proto3" json:"nodelist,omitempty"` + Reserved bool `protobuf:"varint,3,opt,name=reserved,proto3" json:"reserved,omitempty"` + ReservedAt int64 `protobuf:"varint,4,opt,name=reserved_at,json=reservedAt,proto3" json:"reserved_at,omitempty"` + // Only needed if we want stats or similar + // float overhead = 5; + // boolean to indicate allocated or not + Allocated bool `protobuf:"varint,5,opt,name=allocated,proto3" json:"allocated,omitempty"` +} + +func (x *MatchResponse) Reset() { + *x = MatchResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *MatchResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*MatchResponse) ProtoMessage() {} + +func (x *MatchResponse) ProtoReflect() protoreflect.Message { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use MatchResponse.ProtoReflect.Descriptor instead. +func (*MatchResponse) Descriptor() ([]byte, []int) { + return file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP(), []int{3} +} + +func (x *MatchResponse) GetFluxID() uint64 { + if x != nil { + return x.FluxID + } + return 0 +} + +func (x *MatchResponse) GetNodelist() []*NodeAlloc { + if x != nil { + return x.Nodelist + } + return nil +} + +func (x *MatchResponse) GetReserved() bool { + if x != nil { + return x.Reserved + } + return false +} + +func (x *MatchResponse) GetReservedAt() int64 { + if x != nil { + return x.ReservedAt + } + return 0 +} + +func (x *MatchResponse) GetAllocated() bool { + if x != nil { + return x.Allocated + } + return false +} + +type CancelRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + FluxID uint64 `protobuf:"varint,1,opt,name=fluxID,proto3" json:"fluxID,omitempty"` + // It's ok if it doesn't exist (don't issue an error) + NoExistOK bool `protobuf:"varint,2,opt,name=NoExistOK,proto3" json:"NoExistOK,omitempty"` +} + +func (x *CancelRequest) Reset() { + *x = CancelRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CancelRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CancelRequest) ProtoMessage() {} + +func (x *CancelRequest) ProtoReflect() protoreflect.Message { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CancelRequest.ProtoReflect.Descriptor instead. +func (*CancelRequest) Descriptor() ([]byte, []int) { + return file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP(), []int{4} +} + +func (x *CancelRequest) GetFluxID() uint64 { + if x != nil { + return x.FluxID + } + return 0 +} + +func (x *CancelRequest) GetNoExistOK() bool { + if x != nil { + return x.NoExistOK + } + return false +} + +// The Match response message +type CancelResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + FluxID uint64 `protobuf:"varint,1,opt,name=fluxID,proto3" json:"fluxID,omitempty"` + Error int32 `protobuf:"varint,2,opt,name=error,proto3" json:"error,omitempty"` +} + +func (x *CancelResponse) Reset() { + *x = CancelResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *CancelResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*CancelResponse) ProtoMessage() {} + +func (x *CancelResponse) ProtoReflect() protoreflect.Message { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use CancelResponse.ProtoReflect.Descriptor instead. +func (*CancelResponse) Descriptor() ([]byte, []int) { + return file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP(), []int{5} +} + +func (x *CancelResponse) GetFluxID() uint64 { + if x != nil { + return x.FluxID + } + return 0 +} + +func (x *CancelResponse) GetError() int32 { + if x != nil { + return x.Error + } + return 0 +} + +// The Nodes/Cluster Update Status +type NodeStatus struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + CpuAvail int32 `protobuf:"varint,1,opt,name=cpuAvail,proto3" json:"cpuAvail,omitempty"` + GpuAvail int32 `protobuf:"varint,2,opt,name=gpuAvail,proto3" json:"gpuAvail,omitempty"` + StorageAvail int64 `protobuf:"varint,3,opt,name=storageAvail,proto3" json:"storageAvail,omitempty"` + MemoryAvail int64 `protobuf:"varint,4,opt,name=memoryAvail,proto3" json:"memoryAvail,omitempty"` + AllowedPods int64 `protobuf:"varint,5,opt,name=allowedPods,proto3" json:"allowedPods,omitempty"` + NodeIP string `protobuf:"bytes,6,opt,name=nodeIP,proto3" json:"nodeIP,omitempty"` + Replication int32 `protobuf:"varint,7,opt,name=replication,proto3" json:"replication,omitempty"` +} + +func (x *NodeStatus) Reset() { + *x = NodeStatus{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *NodeStatus) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*NodeStatus) ProtoMessage() {} + +func (x *NodeStatus) ProtoReflect() protoreflect.Message { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use NodeStatus.ProtoReflect.Descriptor instead. +func (*NodeStatus) Descriptor() ([]byte, []int) { + return file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP(), []int{6} +} + +func (x *NodeStatus) GetCpuAvail() int32 { + if x != nil { + return x.CpuAvail + } + return 0 +} + +func (x *NodeStatus) GetGpuAvail() int32 { + if x != nil { + return x.GpuAvail + } + return 0 +} + +func (x *NodeStatus) GetStorageAvail() int64 { + if x != nil { + return x.StorageAvail + } + return 0 +} + +func (x *NodeStatus) GetMemoryAvail() int64 { + if x != nil { + return x.MemoryAvail + } + return 0 +} + +func (x *NodeStatus) GetAllowedPods() int64 { + if x != nil { + return x.AllowedPods + } + return 0 +} + +func (x *NodeStatus) GetNodeIP() string { + if x != nil { + return x.NodeIP + } + return "" +} + +func (x *NodeStatus) GetReplication() int32 { + if x != nil { + return x.Replication + } + return 0 +} + +// The JGF response message +type JGFRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Jgf string `protobuf:"bytes,1,opt,name=jgf,proto3" json:"jgf,omitempty"` +} + +func (x *JGFRequest) Reset() { + *x = JGFRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *JGFRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*JGFRequest) ProtoMessage() {} + +func (x *JGFRequest) ProtoReflect() protoreflect.Message { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use JGFRequest.ProtoReflect.Descriptor instead. +func (*JGFRequest) Descriptor() ([]byte, []int) { + return file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP(), []int{7} +} + +func (x *JGFRequest) GetJgf() string { + if x != nil { + return x.Jgf + } + return "" +} + +// The JGF response message +type JGFResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Jgf string `protobuf:"bytes,1,opt,name=jgf,proto3" json:"jgf,omitempty"` +} + +func (x *JGFResponse) Reset() { + *x = JGFResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *JGFResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*JGFResponse) ProtoMessage() {} + +func (x *JGFResponse) ProtoReflect() protoreflect.Message { + mi := &file_pkg_fluxion_grpc_fluxion_proto_msgTypes[8] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use JGFResponse.ProtoReflect.Descriptor instead. +func (*JGFResponse) Descriptor() ([]byte, []int) { + return file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP(), []int{8} +} + +func (x *JGFResponse) GetJgf() string { + if x != nil { + return x.Jgf + } + return "" +} + +var File_pkg_fluxion_grpc_fluxion_proto protoreflect.FileDescriptor + +var file_pkg_fluxion_grpc_fluxion_proto_rawDesc = []byte{ + 0x0a, 0x1e, 0x70, 0x6b, 0x67, 0x2f, 0x66, 0x6c, 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x2d, 0x67, 0x72, + 0x70, 0x63, 0x2f, 0x66, 0x6c, 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x07, 0x66, 0x6c, 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x22, 0xa5, 0x01, 0x0a, 0x07, 0x50, 0x6f, + 0x64, 0x53, 0x70, 0x65, 0x63, 0x12, 0x0e, 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x02, 0x69, 0x64, 0x12, 0x1c, 0x0a, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, 0x6e, + 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x63, 0x6f, 0x6e, 0x74, 0x61, 0x69, + 0x6e, 0x65, 0x72, 0x12, 0x10, 0x0a, 0x03, 0x63, 0x70, 0x75, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x03, 0x63, 0x70, 0x75, 0x12, 0x16, 0x0a, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x12, 0x10, 0x0a, + 0x03, 0x67, 0x70, 0x75, 0x18, 0x05, 0x20, 0x01, 0x28, 0x03, 0x52, 0x03, 0x67, 0x70, 0x75, 0x12, + 0x18, 0x0a, 0x07, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x03, + 0x52, 0x07, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x61, 0x62, + 0x65, 0x6c, 0x73, 0x18, 0x07, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, + 0x73, 0x22, 0x84, 0x01, 0x0a, 0x0c, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, + 0x73, 0x74, 0x12, 0x2a, 0x0a, 0x07, 0x70, 0x6f, 0x64, 0x73, 0x70, 0x65, 0x63, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x10, 0x2e, 0x66, 0x6c, 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x2e, 0x50, 0x6f, + 0x64, 0x53, 0x70, 0x65, 0x63, 0x52, 0x07, 0x70, 0x6f, 0x64, 0x73, 0x70, 0x65, 0x63, 0x12, 0x14, + 0x0a, 0x05, 0x63, 0x6f, 0x75, 0x6e, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x63, + 0x6f, 0x75, 0x6e, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x12, 0x18, + 0x0a, 0x07, 0x6a, 0x6f, 0x62, 0x4e, 0x61, 0x6d, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x07, 0x6a, 0x6f, 0x62, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0x39, 0x0a, 0x09, 0x4e, 0x6f, 0x64, 0x65, + 0x41, 0x6c, 0x6c, 0x6f, 0x63, 0x12, 0x16, 0x0a, 0x06, 0x6e, 0x6f, 0x64, 0x65, 0x49, 0x44, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x6e, 0x6f, 0x64, 0x65, 0x49, 0x44, 0x12, 0x14, 0x0a, + 0x05, 0x74, 0x61, 0x73, 0x6b, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x74, 0x61, + 0x73, 0x6b, 0x73, 0x22, 0xb2, 0x01, 0x0a, 0x0d, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x66, 0x6c, 0x75, 0x78, 0x49, 0x44, 0x18, + 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x66, 0x6c, 0x75, 0x78, 0x49, 0x44, 0x12, 0x2e, 0x0a, + 0x08, 0x6e, 0x6f, 0x64, 0x65, 0x6c, 0x69, 0x73, 0x74, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x12, 0x2e, 0x66, 0x6c, 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x2e, 0x4e, 0x6f, 0x64, 0x65, 0x41, 0x6c, + 0x6c, 0x6f, 0x63, 0x52, 0x08, 0x6e, 0x6f, 0x64, 0x65, 0x6c, 0x69, 0x73, 0x74, 0x12, 0x1a, 0x0a, + 0x08, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x08, 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x12, 0x1f, 0x0a, 0x0b, 0x72, 0x65, 0x73, + 0x65, 0x72, 0x76, 0x65, 0x64, 0x5f, 0x61, 0x74, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0a, + 0x72, 0x65, 0x73, 0x65, 0x72, 0x76, 0x65, 0x64, 0x41, 0x74, 0x12, 0x1c, 0x0a, 0x09, 0x61, 0x6c, + 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x61, + 0x6c, 0x6c, 0x6f, 0x63, 0x61, 0x74, 0x65, 0x64, 0x22, 0x45, 0x0a, 0x0d, 0x43, 0x61, 0x6e, 0x63, + 0x65, 0x6c, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x66, 0x6c, 0x75, + 0x78, 0x49, 0x44, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x06, 0x66, 0x6c, 0x75, 0x78, 0x49, + 0x44, 0x12, 0x1c, 0x0a, 0x09, 0x4e, 0x6f, 0x45, 0x78, 0x69, 0x73, 0x74, 0x4f, 0x4b, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x4e, 0x6f, 0x45, 0x78, 0x69, 0x73, 0x74, 0x4f, 0x4b, 0x22, + 0x3e, 0x0a, 0x0e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x12, 0x16, 0x0a, 0x06, 0x66, 0x6c, 0x75, 0x78, 0x49, 0x44, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x06, 0x66, 0x6c, 0x75, 0x78, 0x49, 0x44, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x72, 0x72, + 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x22, + 0xe6, 0x01, 0x0a, 0x0a, 0x4e, 0x6f, 0x64, 0x65, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x1a, + 0x0a, 0x08, 0x63, 0x70, 0x75, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x08, 0x63, 0x70, 0x75, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x12, 0x1a, 0x0a, 0x08, 0x67, 0x70, + 0x75, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x67, 0x70, + 0x75, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x12, 0x22, 0x0a, 0x0c, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, + 0x65, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x0c, 0x73, 0x74, + 0x6f, 0x72, 0x61, 0x67, 0x65, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x12, 0x20, 0x0a, 0x0b, 0x6d, 0x65, + 0x6d, 0x6f, 0x72, 0x79, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, + 0x0b, 0x6d, 0x65, 0x6d, 0x6f, 0x72, 0x79, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x12, 0x20, 0x0a, 0x0b, + 0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x65, 0x64, 0x50, 0x6f, 0x64, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, + 0x03, 0x52, 0x0b, 0x61, 0x6c, 0x6c, 0x6f, 0x77, 0x65, 0x64, 0x50, 0x6f, 0x64, 0x73, 0x12, 0x16, + 0x0a, 0x06, 0x6e, 0x6f, 0x64, 0x65, 0x49, 0x50, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, + 0x6e, 0x6f, 0x64, 0x65, 0x49, 0x50, 0x12, 0x20, 0x0a, 0x0b, 0x72, 0x65, 0x70, 0x6c, 0x69, 0x63, + 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0b, 0x72, 0x65, 0x70, + 0x6c, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x1e, 0x0a, 0x0a, 0x4a, 0x47, 0x46, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x6a, 0x67, 0x66, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6a, 0x67, 0x66, 0x22, 0x1f, 0x0a, 0x0b, 0x4a, 0x47, 0x46, 0x52, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x10, 0x0a, 0x03, 0x6a, 0x67, 0x66, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6a, 0x67, 0x66, 0x32, 0x87, 0x01, 0x0a, 0x0e, 0x46, 0x6c, + 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x38, 0x0a, 0x05, + 0x4d, 0x61, 0x74, 0x63, 0x68, 0x12, 0x15, 0x2e, 0x66, 0x6c, 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x2e, + 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x66, + 0x6c, 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x2e, 0x4d, 0x61, 0x74, 0x63, 0x68, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x06, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, + 0x12, 0x16, 0x2e, 0x66, 0x6c, 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x2e, 0x43, 0x61, 0x6e, 0x63, 0x65, + 0x6c, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x17, 0x2e, 0x66, 0x6c, 0x75, 0x78, 0x69, + 0x6f, 0x6e, 0x2e, 0x43, 0x61, 0x6e, 0x63, 0x65, 0x6c, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x22, 0x00, 0x42, 0x3b, 0x5a, 0x39, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, + 0x6d, 0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x67, 0x65, 0x64, 0x2d, 0x63, 0x6f, 0x6d, 0x70, + 0x75, 0x74, 0x69, 0x6e, 0x67, 0x2f, 0x66, 0x6c, 0x75, 0x78, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x2f, + 0x70, 0x6b, 0x67, 0x2f, 0x66, 0x6c, 0x75, 0x78, 0x69, 0x6f, 0x6e, 0x2d, 0x67, 0x72, 0x70, 0x63, + 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_pkg_fluxion_grpc_fluxion_proto_rawDescOnce sync.Once + file_pkg_fluxion_grpc_fluxion_proto_rawDescData = file_pkg_fluxion_grpc_fluxion_proto_rawDesc +) + +func file_pkg_fluxion_grpc_fluxion_proto_rawDescGZIP() []byte { + file_pkg_fluxion_grpc_fluxion_proto_rawDescOnce.Do(func() { + file_pkg_fluxion_grpc_fluxion_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_fluxion_grpc_fluxion_proto_rawDescData) + }) + return file_pkg_fluxion_grpc_fluxion_proto_rawDescData +} + +var file_pkg_fluxion_grpc_fluxion_proto_msgTypes = make([]protoimpl.MessageInfo, 9) +var file_pkg_fluxion_grpc_fluxion_proto_goTypes = []interface{}{ + (*PodSpec)(nil), // 0: fluxion.PodSpec + (*MatchRequest)(nil), // 1: fluxion.MatchRequest + (*NodeAlloc)(nil), // 2: fluxion.NodeAlloc + (*MatchResponse)(nil), // 3: fluxion.MatchResponse + (*CancelRequest)(nil), // 4: fluxion.CancelRequest + (*CancelResponse)(nil), // 5: fluxion.CancelResponse + (*NodeStatus)(nil), // 6: fluxion.NodeStatus + (*JGFRequest)(nil), // 7: fluxion.JGFRequest + (*JGFResponse)(nil), // 8: fluxion.JGFResponse +} +var file_pkg_fluxion_grpc_fluxion_proto_depIdxs = []int32{ + 0, // 0: fluxion.MatchRequest.podspec:type_name -> fluxion.PodSpec + 2, // 1: fluxion.MatchResponse.nodelist:type_name -> fluxion.NodeAlloc + 1, // 2: fluxion.FluxionService.Match:input_type -> fluxion.MatchRequest + 4, // 3: fluxion.FluxionService.Cancel:input_type -> fluxion.CancelRequest + 3, // 4: fluxion.FluxionService.Match:output_type -> fluxion.MatchResponse + 5, // 5: fluxion.FluxionService.Cancel:output_type -> fluxion.CancelResponse + 4, // [4:6] is the sub-list for method output_type + 2, // [2:4] is the sub-list for method input_type + 2, // [2:2] is the sub-list for extension type_name + 2, // [2:2] is the sub-list for extension extendee + 0, // [0:2] is the sub-list for field type_name +} + +func init() { file_pkg_fluxion_grpc_fluxion_proto_init() } +func file_pkg_fluxion_grpc_fluxion_proto_init() { + if File_pkg_fluxion_grpc_fluxion_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_fluxion_grpc_fluxion_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*PodSpec); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_fluxion_grpc_fluxion_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*MatchRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_fluxion_grpc_fluxion_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*NodeAlloc); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_fluxion_grpc_fluxion_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*MatchResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_fluxion_grpc_fluxion_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CancelRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_fluxion_grpc_fluxion_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*CancelResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_fluxion_grpc_fluxion_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*NodeStatus); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_fluxion_grpc_fluxion_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*JGFRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_fluxion_grpc_fluxion_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*JGFResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_fluxion_grpc_fluxion_proto_rawDesc, + NumEnums: 0, + NumMessages: 9, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_pkg_fluxion_grpc_fluxion_proto_goTypes, + DependencyIndexes: file_pkg_fluxion_grpc_fluxion_proto_depIdxs, + MessageInfos: file_pkg_fluxion_grpc_fluxion_proto_msgTypes, + }.Build() + File_pkg_fluxion_grpc_fluxion_proto = out.File + file_pkg_fluxion_grpc_fluxion_proto_rawDesc = nil + file_pkg_fluxion_grpc_fluxion_proto_goTypes = nil + file_pkg_fluxion_grpc_fluxion_proto_depIdxs = nil +} diff --git a/pkg/fluxion-grpc/fluxion.proto b/pkg/fluxion-grpc/fluxion.proto new file mode 100644 index 0000000..579eaef --- /dev/null +++ b/pkg/fluxion-grpc/fluxion.proto @@ -0,0 +1,85 @@ +syntax = "proto3"; +option go_package = "github.com/converged-computing/fluxqueue/pkg/fluxion-grpc"; + +package fluxion; + +// Service definition for Fluxion Service +service FluxionService { + // Sends a Match command + rpc Match(MatchRequest) returns (MatchResponse) {} + rpc Cancel(CancelRequest) returns (CancelResponse) {} +} + +message PodSpec { + string id = 1; + string container = 2; + int32 cpu = 3; + int64 memory = 4; + int64 gpu = 5; + int64 storage = 6; + repeated string labels = 7; +} + +// The Match request message (allocate, allocate_orelse_reserve) +// TODO: this currently takes a podspec, and we multiply by a count +// we should ideally support having a list of different pods +message MatchRequest { + PodSpec podspec = 1; + int32 count = 3; + bool reserve = 4; + string jobName = 5; +} + +// The Nodes/Cluster Update Status +message NodeAlloc { + string nodeID = 1; + int32 tasks = 2; +} + +// The Match response message +message MatchResponse { + uint64 fluxID = 1; + repeated NodeAlloc nodelist = 2; + bool reserved = 3; + int64 reserved_at = 4; + // Only needed if we want stats or similar + // float overhead = 5; + // boolean to indicate allocated or not + bool allocated = 5; +} + +message CancelRequest { + uint64 fluxID = 1; + // It's ok if it doesn't exist (don't issue an error) + bool NoExistOK = 2; +} + +// The Match response message +message CancelResponse { + uint64 fluxID = 1; + int32 error = 2; +} + + + +// The Nodes/Cluster Update Status +message NodeStatus { + int32 cpuAvail = 1; + int32 gpuAvail = 2; + int64 storageAvail = 3; + int64 memoryAvail = 4; + int64 allowedPods = 5; + string nodeIP = 6; + int32 replication = 7; +} + +// The JGF response message +message JGFRequest { + string jgf = 1; +} + + +// The JGF response message +message JGFResponse { + string jgf = 1; +} diff --git a/pkg/fluxion-grpc/fluxion_grpc.pb.go b/pkg/fluxion-grpc/fluxion_grpc.pb.go new file mode 100644 index 0000000..fbaffcc --- /dev/null +++ b/pkg/fluxion-grpc/fluxion_grpc.pb.go @@ -0,0 +1,143 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.20.3 +// source: pkg/fluxion-grpc/fluxion.proto + +package fluxion_grpc + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +// FluxionServiceClient is the client API for FluxionService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type FluxionServiceClient interface { + // Sends a Match command + Match(ctx context.Context, in *MatchRequest, opts ...grpc.CallOption) (*MatchResponse, error) + Cancel(ctx context.Context, in *CancelRequest, opts ...grpc.CallOption) (*CancelResponse, error) +} + +type fluxionServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewFluxionServiceClient(cc grpc.ClientConnInterface) FluxionServiceClient { + return &fluxionServiceClient{cc} +} + +func (c *fluxionServiceClient) Match(ctx context.Context, in *MatchRequest, opts ...grpc.CallOption) (*MatchResponse, error) { + out := new(MatchResponse) + err := c.cc.Invoke(ctx, "/fluxion.FluxionService/Match", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *fluxionServiceClient) Cancel(ctx context.Context, in *CancelRequest, opts ...grpc.CallOption) (*CancelResponse, error) { + out := new(CancelResponse) + err := c.cc.Invoke(ctx, "/fluxion.FluxionService/Cancel", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// FluxionServiceServer is the server API for FluxionService service. +// All implementations must embed UnimplementedFluxionServiceServer +// for forward compatibility +type FluxionServiceServer interface { + // Sends a Match command + Match(context.Context, *MatchRequest) (*MatchResponse, error) + Cancel(context.Context, *CancelRequest) (*CancelResponse, error) + mustEmbedUnimplementedFluxionServiceServer() +} + +// UnimplementedFluxionServiceServer must be embedded to have forward compatible implementations. +type UnimplementedFluxionServiceServer struct { +} + +func (UnimplementedFluxionServiceServer) Match(context.Context, *MatchRequest) (*MatchResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method Match not implemented") +} +func (UnimplementedFluxionServiceServer) Cancel(context.Context, *CancelRequest) (*CancelResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method Cancel not implemented") +} +func (UnimplementedFluxionServiceServer) mustEmbedUnimplementedFluxionServiceServer() {} + +// UnsafeFluxionServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to FluxionServiceServer will +// result in compilation errors. +type UnsafeFluxionServiceServer interface { + mustEmbedUnimplementedFluxionServiceServer() +} + +func RegisterFluxionServiceServer(s grpc.ServiceRegistrar, srv FluxionServiceServer) { + s.RegisterService(&FluxionService_ServiceDesc, srv) +} + +func _FluxionService_Match_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(MatchRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(FluxionServiceServer).Match(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/fluxion.FluxionService/Match", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(FluxionServiceServer).Match(ctx, req.(*MatchRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _FluxionService_Cancel_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CancelRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(FluxionServiceServer).Cancel(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/fluxion.FluxionService/Cancel", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(FluxionServiceServer).Cancel(ctx, req.(*CancelRequest)) + } + return interceptor(ctx, in, info, handler) +} + +// FluxionService_ServiceDesc is the grpc.ServiceDesc for FluxionService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var FluxionService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "fluxion.FluxionService", + HandlerType: (*FluxionServiceServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "Match", + Handler: _FluxionService_Match_Handler, + }, + { + MethodName: "Cancel", + Handler: _FluxionService_Cancel_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "pkg/fluxion-grpc/fluxion.proto", +} diff --git a/pkg/jgf/jgf.go b/pkg/jgf/jgf.go new file mode 100644 index 0000000..bdc93f0 --- /dev/null +++ b/pkg/jgf/jgf.go @@ -0,0 +1,265 @@ +/* +Copyright © 2021 IBM Corporation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package jgf + +import ( + "encoding/json" + "fmt" + "log" + "os" + filepath "path" +) + +var ( + // Defaults for nodes + defaultExclusive = false + defaultRank = int64(-1) + defaultSize = int64(1) + defaultUnit = "" + + // Relations + ContainsRelation = "contains" + InRelation = "in" + + // Vertex (node) types + // These are public to be used in the utils package + ClusterType = "cluster" + NodeType = "node" + CoreType = "core" + VirtualCoreType = "vcore" + RackType = "rack" + SocketType = "socket" + SubnetType = "subnet" + MemoryType = "memory" + NvidiaGPU = "nvidiagpu" + GPUType = "gpu" + + // Paths + containmentKey = "containment" +) + +// NewFluxJGF creates and returns a new Flux Json Graph Format object +func NewFluxJGF() FluxJGF { + + // Create a new cluster, and count the top level as a resource + // The index 0 (of the element count) is the cluster + counters := map[string]int64{"cluster": int64(1)} + return FluxJGF{ + Graph: graph{}, + NodeMap: make(map[string]Node), + + // Counters and lookup for resources + Resources: ResourceCounter{counts: counters}, + } +} + +// ToJson returns a Json string of the graph +func (g *FluxJGF) ToJson() (string, error) { + toprint, err := json.MarshalIndent(g.Graph, "", "\t") + return string(toprint), err +} + +// GetNodePath returns the node containment path +func getNodePath(root, subpath string) string { + var path string + if subpath == "" { + path = fmt.Sprintf("/%s", root) + } else { + path = fmt.Sprintf("/%s/%s", root, subpath) + } + return filepath.Clean(path) +} + +// getContainmentPath returns a new map with containment metadata +func (g *FluxJGF) getContainmentPath(subpath string) map[string]string { + return map[string]string{containmentKey: getNodePath(g.Resources.RootName, subpath)} +} + +// MakeBidirectionalEdge makes an edge for a parent and child +func (g *FluxJGF) MakeBidirectionalEdge(parent, child string) { + g.MakeEdge(parent, child, ContainsRelation) + g.MakeEdge(child, parent, InRelation) +} + +// MakeEdge creates an edge for the JGF +func (g *FluxJGF) MakeEdge(source string, target string, contains string) { + newedge := edge{ + Source: source, + Target: target, + Metadata: edgeMetadata{Subsystem: containmentKey}, + } + g.Graph.Edges = append(g.Graph.Edges, newedge) +} + +// MakeSubnet creates a subnet for the graph +// The name is typically the ip address +func (g *FluxJGF) MakeSubnet(name string, index int64) Node { + + // Get a resource counter for the subnet + resource := g.Resources.getCounter(name, SubnetType) + resource.Index = index + subpath := resource.NameWithIndex() + return g.makeNewNode(resource, subpath, defaultUnit, defaultSize) +} + +// makeNewNode is a shared function to make a new node from a resource spec +// subpath is the subpath to add to the graph root, e.g., / +// Since there is some variability to this structure, it is assembled by +// the calling function +func (g *FluxJGF) makeNewNode( + resource ResourceCount, + subpath, unit string, + size int64) Node { + + // A subnet comes directly under the cluster, which is the parent + newNode := Node{ + + // Global identifier in graph, as a string + Id: resource.StringElementId(), + Metadata: nodeMetadata{ + Type: resource.Type, + + // The original name without an index + Basename: resource.Name, + + // The name with an index + Name: resource.NameWithIndex(), + + // Integer resource index + Id: resource.Index, + + // Integer global element index + Uniq_id: resource.ElementId, + Rank: defaultRank, + Exclusive: defaultExclusive, + Unit: unit, + Size: size, + + // subnet is one above root graph, so just need it's name + Paths: g.getContainmentPath(subpath), + }, + } + + // Add the new node to the graph + g.Graph.Nodes = append(g.Graph.Nodes, newNode) + g.NodeMap[newNode.Id] = newNode + return newNode +} + +// MakeNode creates a new node for the graph +func (g *FluxJGF) MakeNode(name, subpath string, index int64) Node { + + // Get a resource counter for the node, which is under the subnet + resource := g.Resources.getCounter(name, NodeType) + resource.Index = index + + // Here the full containment path will be: + // // + subpath = fmt.Sprintf("%s/%s", subpath, resource.NameWithIndex()) + return g.makeNewNode(resource, subpath, defaultUnit, defaultSize) +} + +// MakeCore creates a core for the graph +func (g *FluxJGF) MakeCore(name, subpath string, index int64) Node { + + // A core is located at the subnet->node->core + resource := g.Resources.getCounter(name, CoreType) + resource.Index = index + + // Here the full containment path will be: + // /// + subpath = fmt.Sprintf("%s/%s", subpath, resource.NameWithIndex()) + return g.makeNewNode(resource, subpath, defaultUnit, defaultSize) +} + +// MakeMemory creates memory for the graph +// Flux doesn't understand memory? Not sure if this is doing anything +func (g *FluxJGF) MakeMemory( + name, subpath string, + size, index int64) Node { + + // unit is assumed to be MB + unit := "MB" + + // A core is located at the subnet->node->core + resource := g.Resources.getCounter(name, MemoryType) + resource.Index = index + + // Here the full containment path will be: + // /// + subpath = fmt.Sprintf("%s/%s", subpath, resource.NameWithIndex()) + return g.makeNewNode(resource, subpath, unit, size) +} + +// MakeGPU makes a gpu for the graph +func (g *FluxJGF) MakeGPU(name, subpath string, size, index int64) Node { + + // Get a resource counter for the gpu, which is under the subnet->node->gpu + resource := g.Resources.getCounter(name, GPUType) + resource.Index = index + + // Here the full containment path will be: + // // + subpath = fmt.Sprintf("%s/%s", subpath, resource.NameWithIndex()) + return g.makeNewNode(resource, subpath, defaultUnit, size) +} + +// InitCluster creates a new cluster, primarily the root "cluster" node +func (g *FluxJGF) InitCluster(name string) (Node, error) { + if g.Resources.Elements > 0 { + return Node{}, fmt.Errorf("init can only be called for a new cluster") + } + + // The cluster name is the index (always 0) with the original name + g.Resources.RootName = fmt.Sprintf("%s0", name) + resource := g.Resources.getCounter(name, ClusterType) + return g.makeNewNode(resource, "", defaultUnit, defaultSize), nil +} + +// WriteJGF writes the JGF to file +// We need to do this to ensure GetResources can be called to return the graph +func (g *FluxJGF) WriteJGF(path string) error { + encodedJGF, err := g.ToBytes() + if err != nil { + return err + } + + f, err := os.Create(path) + if err != nil { + log.Fatalf("[JGF] Couldn't create JGF file!!\n") + return err + } + defer f.Close() + + _, err = f.Write(encodedJGF) + if err != nil { + log.Fatalf("[JGF] Couldn't write JGF file!!\n") + return err + } + return nil +} + +// ToString returns the JGF as bytes +func (g *FluxJGF) ToBytes() ([]byte, error) { + encodedJGF, err := json.MarshalIndent(g, "", " ") + + // This is only provided as a meaningful error message, otherwise + // we could just return the above + if err != nil { + log.Fatalf("[JGF] json.Marshal failed with '%s'\n", err) + } + return encodedJGF, nil +} diff --git a/pkg/jgf/jgf_test.go b/pkg/jgf/jgf_test.go new file mode 100644 index 0000000..173bfcc --- /dev/null +++ b/pkg/jgf/jgf_test.go @@ -0,0 +1,77 @@ +package jgf + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewFluxJGF(t *testing.T) { + + // Create a new FluxGraph, assert that it is empty + fluxgraph := NewFluxJGF() + assert.Equal(t, len(fluxgraph.Graph.Nodes), 0) + assert.Equal(t, fluxgraph.Resources.Elements, int64(0)) + assert.Equal(t, len(fluxgraph.NodeMap), 0) + + out, err := fluxgraph.ToJson() + assert.Nil(t, err) + fmt.Println() + fmt.Println("== Empty graph:") + fmt.Println(out) + + // Init the cluster (make the root node) + clusterNode, err := fluxgraph.InitCluster("keebler") + assert.Nil(t, err) + + out, err = fluxgraph.ToJson() + assert.Nil(t, err) + fmt.Println() + fmt.Println("== Graph with Cluster Root:") + fmt.Println(out) + + // Add subnets to it + subnetNodeA := fluxgraph.MakeSubnet("east", 0) + subnetNodeB := fluxgraph.MakeSubnet("west", 1) + fluxgraph.MakeBidirectionalEdge(clusterNode.Id, subnetNodeA.Id) + fluxgraph.MakeBidirectionalEdge(clusterNode.Id, subnetNodeB.Id) + + out, err = fluxgraph.ToJson() + assert.Nil(t, err) + fmt.Println() + fmt.Println("== Graph with Two Subnets:") + fmt.Println(out) + + // Add some nodes! + computeNodeA := fluxgraph.MakeNode("node", subnetNodeA.Metadata.Type, 0) + computeNodeB := fluxgraph.MakeNode("node", subnetNodeB.Metadata.Type, 1) + fluxgraph.MakeBidirectionalEdge(subnetNodeA.Id, computeNodeA.Id) + fluxgraph.MakeBidirectionalEdge(subnetNodeB.Id, computeNodeB.Id) + + out, err = fluxgraph.ToJson() + assert.Nil(t, err) + fmt.Println() + fmt.Println("== Graph with Two Subnets, Each with a node:") + fmt.Println(out) + + // Add a GPU to one, and cores to the other + subpath := fmt.Sprintf("%s/%s", subnetNodeA.Metadata.Type, computeNodeA.Metadata.Type) + gpuNodeA := fluxgraph.MakeGPU(NvidiaGPU, subpath, 1, 0) + fluxgraph.MakeBidirectionalEdge(computeNodeA.Id, gpuNodeA.Id) + + subpath = fmt.Sprintf("%s/%s", subnetNodeB.Metadata.Type, computeNodeB.Metadata.Type) + coreNode := fluxgraph.MakeCore(CoreType, subpath, 0) + fluxgraph.MakeBidirectionalEdge(computeNodeB.Id, coreNode.Id) + + // Finally, add some memory to the second compute node + memoryNode := fluxgraph.MakeMemory(MemoryType, subpath, 1<<10, 0) + fluxgraph.MakeBidirectionalEdge(computeNodeA.Id, memoryNode.Id) + + out, err = fluxgraph.ToJson() + assert.Nil(t, err) + fmt.Println() + fmt.Println("== Graph with Two Subnets, Two Nodes, with GPU/Core/Memory:") + fmt.Println(out) + +} diff --git a/pkg/jgf/types.go b/pkg/jgf/types.go new file mode 100644 index 0000000..79f5946 --- /dev/null +++ b/pkg/jgf/types.go @@ -0,0 +1,147 @@ +/* +Copyright © 2021 IBM Corporation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package jgf + +import "fmt" + +type Node struct { + Id string `json:"id"` + Label string `json:"label,omitempty"` + Metadata nodeMetadata `json:"metadata,omitempty"` +} + +type edge struct { + Source string `json:"source"` + Relation string `json:"relation,omitempty"` + Target string `json:"target"` + Directed bool `json:"directed,omitempty"` + Metadata edgeMetadata `json:"metadata"` +} + +type edgeMetadata struct { + Subsystem string `json:"subsystem"` +} + +type nodeMetadata struct { + Type string `json:"type"` + Basename string `json:"basename"` + Name string `json:"name"` + Id int64 `json:"id"` + Uniq_id int64 `json:"uniq_id"` + Rank int64 `json:"rank,omitempty"` + Exclusive bool `json:"exclusive"` + Unit string `json:"unit"` + Size int64 `json:"size"` + Paths map[string]string `json:"paths,omitempty"` + Properties map[string]string `json:"properties,omitempty"` +} + +type graph struct { + Nodes []Node `json:"nodes"` + Edges []edge `json:"edges"` + // Metadata metadata `json:"metadata,omitempty"` + Directed bool `json:"directed,omitempty"` +} + +type FluxJGF struct { + Graph graph `json:"graph"` + NodeMap map[string]Node `json:"-"` + + // Counters for specific resource types (e.g., rack, node) + Resources ResourceCounter `json:"-"` +} + +// ResourceCounter keeps track of indices for each resource type +type ResourceCounter struct { + + // count of elements by resource type + counts map[string]int64 + + // Total elements in the graph + Elements int64 + + // Name or path of root + RootName string +} + +// ResourceCount provides complete metadata to populate a new node +// This object is returned by the resourceCounter for a node to use +// to quickly derive values, etc. +type ResourceCount struct { + + // Name of the resource (e.g., "red") + Name string + + // Name of the resource type (e.g., "node") + Type string + + // Element ID, in the context of total elements in the graph + ElementId int64 + + // Index or count for the resource in question + Index int64 +} + +// Return the resource name + resource +// This is scoped to the resource and not global for all the +// elements in the graph +func (r *ResourceCount) NameWithIndex() string { + return fmt.Sprintf("%s%d", r.Name, r.Index) +} + +// StringElementId is the global index as a string +func (r *ResourceCount) StringElementId() string { + return fmt.Sprintf("%d", r.ElementId) +} + +// StringResourceIndex is the string variant of the resource index +func (r *ResourceCount) StringResourceIndex() string { + return fmt.Sprintf("%d", r.Index) +} + +// NextIndex returns the next global index and adds 1 to the count +func (r *ResourceCounter) NextIndex() int64 { + nextIndex := r.Elements + r.Elements = nextIndex + 1 + return nextIndex +} + +// NextIndex returns the next resource index and adds 1 to the count +func (r *ResourceCounter) NextResourceIndex(resourceType string) int64 { + nextIndex, ok := r.counts[resourceType] + if !ok { + nextIndex = int64(0) + } + r.counts[resourceType] = nextIndex + 1 + return nextIndex +} + +// getCounter returns the counter context for a specific resource type +func (r *ResourceCounter) getCounter( + resourceName string, + resourceType string, +) ResourceCount { + resourceCount := ResourceCount{ + Index: r.NextResourceIndex(resourceName), + Type: resourceType, + Name: resourceName, + ElementId: r.NextIndex(), + } + + // Update the count for the next element (global) and resource count + return resourceCount +} diff --git a/pkg/jobspec/jobspec.go b/pkg/jobspec/jobspec.go new file mode 100644 index 0000000..ed26c79 --- /dev/null +++ b/pkg/jobspec/jobspec.go @@ -0,0 +1,129 @@ +/* +Copyright © 2021 IBM Corporation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package jobspec + +import ( + "fmt" + "log" + + pb "github.com/converged-computing/fluxqueue/pkg/fluxion-grpc" + "gopkg.in/yaml.v2" +) + +/* + +Structure of the PodSpec that needs to be generated, for reference +Ps: &pb.PodSpec{ + Id: pod_jobspec.ID, + Container: pod_jobspec.Containers[0].Image, + MilliCPU: pod_jobspec.MilliCPU[0], + Memory: pod_jobspec.Memory[0], + Gpu: pod_jobspec.Gpu[0], + Storage: pod_jobspec.Storage[0], + }, +*/ + +// CreateJobSpecYaml writes the protobuf jobspec into a yaml file +func CreateJobSpecYaml(spec *pb.PodSpec, count int32) ([]byte, error) { + + command := []string{spec.Container} + fmt.Println("Labels ", spec.Labels, " ", len(spec.Labels)) + + js := JobSpec{ + Version: Version{Version: 9999}, + Attributes: Attribute{System{Duration: 3600}}, + + // The name of the task likely needs to correspond with the pod + // Since we can't easily change the proto file, for now it is + // storing the pod namespaced name. + Tasks: []Task{ + { + Command: command, + Slot: "default", + Counts: Count{PerSlot: 1}, + }, + }, + } + + // Assemble resources! + socketResources := createSocketResources(spec) + js.Version.Resources = createResources(spec, socketResources, count) + + // Write bytes to file + yamlbytes, err := yaml.Marshal(&js) + if err != nil { + log.Fatalf("[JobSpec] yaml.Marshal failed with '%s'\n", err) + return yamlbytes, err + } + return yamlbytes, nil +} + +// createSocketResources creates the socket resources for the JobSpec +func createSocketResources(spec *pb.PodSpec) []Resource { + + socketResources := []Resource{ + { + Type: "core", Count: int64(spec.Cpu), + }, + } + + // TODO double check what we are converting from -> to + if spec.Memory > 0 { + toMB := spec.Memory >> 20 + socketResources = append(socketResources, Resource{Type: "memory", Count: toMB}) + } + + if spec.Gpu > 0 { + socketResources = append(socketResources, Resource{Type: "gpu", Count: spec.Gpu}) + } + return socketResources +} + +// createResources assembles the list of JobSpec resources +func createResources(spec *pb.PodSpec, socketResources []Resource, count int32) []Resource { + + slotResource := []Resource{ + { + Type: "slot", + Count: int64(count), + Label: "default", + With: socketResources, + }, + } + + // Presence of the zone label means we need to add a subnet + if len(spec.Labels) > 0 { + for _, label := range spec.Labels { + if label == "zone" { + nodeResource := []Resource{ + { + Type: "subnet", + Count: 1, + With: []Resource{ + { + Type: "node", + Count: 1, + With: slotResource, + }, + }, + }, + } + return nodeResource + } + } + } + return slotResource +} diff --git a/pkg/jobspec/types.go b/pkg/jobspec/types.go new file mode 100644 index 0000000..8d6d06f --- /dev/null +++ b/pkg/jobspec/types.go @@ -0,0 +1,53 @@ +/* +Copyright © 2021 IBM Corporation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package jobspec + +type Version struct { + Version int + Resources []Resource `yaml:"resources,omitempty"` +} + +type Resource struct { + Type string `yaml:"type"` + Count int64 `yaml:"count"` + Label string `yaml:"label,omitempty"` + With []Resource `yaml:"with,omitempty"` +} + +type System struct { + Duration int64 `yaml:"duration,omitempty"` +} + +type Attribute struct { + SystemAttr System `yaml:"system,omitempty"` +} + +type Count struct { + PerSlot int64 `yaml:"per_slot,omitempty"` +} + +type Task struct { + Command []string `yaml:"command,flow"` + Slot string `yaml:"slot"` + Counts Count `yaml:"count"` +} + +type JobSpec struct { + Version Version `yaml:"version,inline"` + Attributes Attribute `yaml:"attributes,omitempty"` + Tasks []Task `yaml:"tasks,omitempty"` +} diff --git a/pkg/service-grpc/service.pb.go b/pkg/service-grpc/service.pb.go new file mode 100644 index 0000000..59c567b --- /dev/null +++ b/pkg/service-grpc/service.pb.go @@ -0,0 +1,354 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.28.1 +// protoc v3.20.3 +// source: pkg/service-grpc/service.proto + +package service_grpc + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// GroupRequest for a group +type GroupRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Group string `protobuf:"bytes,1,opt,name=group,proto3" json:"group,omitempty"` +} + +func (x *GroupRequest) Reset() { + *x = GroupRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_service_grpc_service_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GroupRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GroupRequest) ProtoMessage() {} + +func (x *GroupRequest) ProtoReflect() protoreflect.Message { + mi := &file_pkg_service_grpc_service_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GroupRequest.ProtoReflect.Descriptor instead. +func (*GroupRequest) Descriptor() ([]byte, []int) { + return file_pkg_service_grpc_service_proto_rawDescGZIP(), []int{0} +} + +func (x *GroupRequest) GetGroup() string { + if x != nil { + return x.Group + } + return "" +} + +// GroupResponse +type GroupResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Size int64 `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"` +} + +func (x *GroupResponse) Reset() { + *x = GroupResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_service_grpc_service_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GroupResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GroupResponse) ProtoMessage() {} + +func (x *GroupResponse) ProtoReflect() protoreflect.Message { + mi := &file_pkg_service_grpc_service_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GroupResponse.ProtoReflect.Descriptor instead. +func (*GroupResponse) Descriptor() ([]byte, []int) { + return file_pkg_service_grpc_service_proto_rawDescGZIP(), []int{1} +} + +func (x *GroupResponse) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *GroupResponse) GetSize() int64 { + if x != nil { + return x.Size + } + return 0 +} + +type ResourceRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields +} + +func (x *ResourceRequest) Reset() { + *x = ResourceRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_service_grpc_service_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ResourceRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ResourceRequest) ProtoMessage() {} + +func (x *ResourceRequest) ProtoReflect() protoreflect.Message { + mi := &file_pkg_service_grpc_service_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ResourceRequest.ProtoReflect.Descriptor instead. +func (*ResourceRequest) Descriptor() ([]byte, []int) { + return file_pkg_service_grpc_service_proto_rawDescGZIP(), []int{2} +} + +type ResourceResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Graph string `protobuf:"bytes,1,opt,name=graph,proto3" json:"graph,omitempty"` +} + +func (x *ResourceResponse) Reset() { + *x = ResourceResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_pkg_service_grpc_service_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ResourceResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ResourceResponse) ProtoMessage() {} + +func (x *ResourceResponse) ProtoReflect() protoreflect.Message { + mi := &file_pkg_service_grpc_service_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ResourceResponse.ProtoReflect.Descriptor instead. +func (*ResourceResponse) Descriptor() ([]byte, []int) { + return file_pkg_service_grpc_service_proto_rawDescGZIP(), []int{3} +} + +func (x *ResourceResponse) GetGraph() string { + if x != nil { + return x.Graph + } + return "" +} + +var File_pkg_service_grpc_service_proto protoreflect.FileDescriptor + +var file_pkg_service_grpc_service_proto_rawDesc = []byte{ + 0x0a, 0x1e, 0x70, 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2d, 0x67, 0x72, + 0x70, 0x63, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x07, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x22, 0x24, 0x0a, 0x0c, 0x47, 0x72, 0x6f, + 0x75, 0x70, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x14, 0x0a, 0x05, 0x67, 0x72, 0x6f, + 0x75, 0x70, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x22, + 0x37, 0x0a, 0x0d, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, + 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x04, 0x73, 0x69, 0x7a, 0x65, 0x22, 0x11, 0x0a, 0x0f, 0x52, 0x65, 0x73, 0x6f, + 0x75, 0x72, 0x63, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x22, 0x28, 0x0a, 0x10, 0x52, + 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x14, 0x0a, 0x05, 0x67, 0x72, 0x61, 0x70, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, + 0x67, 0x72, 0x61, 0x70, 0x68, 0x32, 0xda, 0x01, 0x0a, 0x15, 0x45, 0x78, 0x74, 0x65, 0x72, 0x6e, + 0x61, 0x6c, 0x50, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, + 0x45, 0x0a, 0x0c, 0x47, 0x65, 0x74, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x12, + 0x18, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, + 0x63, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x73, 0x65, 0x72, 0x76, + 0x69, 0x63, 0x65, 0x2e, 0x52, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3d, 0x0a, 0x0a, 0x4c, 0x69, 0x73, 0x74, 0x47, 0x72, + 0x6f, 0x75, 0x70, 0x73, 0x12, 0x15, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x47, + 0x72, 0x6f, 0x75, 0x70, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x73, 0x65, + 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x22, 0x00, 0x12, 0x3b, 0x0a, 0x08, 0x47, 0x65, 0x74, 0x47, 0x72, 0x6f, 0x75, + 0x70, 0x12, 0x15, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2e, 0x47, 0x72, 0x6f, 0x75, + 0x70, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x16, 0x2e, 0x73, 0x65, 0x72, 0x76, 0x69, + 0x63, 0x65, 0x2e, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, + 0x22, 0x00, 0x42, 0x3b, 0x5a, 0x39, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, + 0x2f, 0x63, 0x6f, 0x6e, 0x76, 0x65, 0x72, 0x67, 0x65, 0x64, 0x2d, 0x63, 0x6f, 0x6d, 0x70, 0x75, + 0x74, 0x69, 0x6e, 0x67, 0x2f, 0x66, 0x6c, 0x75, 0x78, 0x6e, 0x65, 0x74, 0x65, 0x73, 0x2f, 0x70, + 0x6b, 0x67, 0x2f, 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x2d, 0x67, 0x72, 0x70, 0x63, 0x62, + 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_pkg_service_grpc_service_proto_rawDescOnce sync.Once + file_pkg_service_grpc_service_proto_rawDescData = file_pkg_service_grpc_service_proto_rawDesc +) + +func file_pkg_service_grpc_service_proto_rawDescGZIP() []byte { + file_pkg_service_grpc_service_proto_rawDescOnce.Do(func() { + file_pkg_service_grpc_service_proto_rawDescData = protoimpl.X.CompressGZIP(file_pkg_service_grpc_service_proto_rawDescData) + }) + return file_pkg_service_grpc_service_proto_rawDescData +} + +var file_pkg_service_grpc_service_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_pkg_service_grpc_service_proto_goTypes = []interface{}{ + (*GroupRequest)(nil), // 0: service.GroupRequest + (*GroupResponse)(nil), // 1: service.GroupResponse + (*ResourceRequest)(nil), // 2: service.ResourceRequest + (*ResourceResponse)(nil), // 3: service.ResourceResponse +} +var file_pkg_service_grpc_service_proto_depIdxs = []int32{ + 2, // 0: service.ExternalPluginService.GetResources:input_type -> service.ResourceRequest + 0, // 1: service.ExternalPluginService.ListGroups:input_type -> service.GroupRequest + 0, // 2: service.ExternalPluginService.GetGroup:input_type -> service.GroupRequest + 3, // 3: service.ExternalPluginService.GetResources:output_type -> service.ResourceResponse + 1, // 4: service.ExternalPluginService.ListGroups:output_type -> service.GroupResponse + 1, // 5: service.ExternalPluginService.GetGroup:output_type -> service.GroupResponse + 3, // [3:6] is the sub-list for method output_type + 0, // [0:3] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_pkg_service_grpc_service_proto_init() } +func file_pkg_service_grpc_service_proto_init() { + if File_pkg_service_grpc_service_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_pkg_service_grpc_service_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GroupRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_service_grpc_service_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GroupResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_service_grpc_service_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ResourceRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_pkg_service_grpc_service_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ResourceResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_pkg_service_grpc_service_proto_rawDesc, + NumEnums: 0, + NumMessages: 4, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_pkg_service_grpc_service_proto_goTypes, + DependencyIndexes: file_pkg_service_grpc_service_proto_depIdxs, + MessageInfos: file_pkg_service_grpc_service_proto_msgTypes, + }.Build() + File_pkg_service_grpc_service_proto = out.File + file_pkg_service_grpc_service_proto_rawDesc = nil + file_pkg_service_grpc_service_proto_goTypes = nil + file_pkg_service_grpc_service_proto_depIdxs = nil +} diff --git a/pkg/service-grpc/service.proto b/pkg/service-grpc/service.proto new file mode 100644 index 0000000..1d22357 --- /dev/null +++ b/pkg/service-grpc/service.proto @@ -0,0 +1,32 @@ +syntax = "proto3"; +option go_package = "github.com/converged-computing/fluxqueue/pkg/service-grpc"; + +package service; + + +// Service definition for an external plugin like kubectl +service ExternalPluginService { + + // This is supported via a shared file in the container + rpc GetResources(ResourceRequest) returns (ResourceResponse) {} + + // Note we currently cannot support getting group metadata, need to add handle to get info, etc. + rpc ListGroups(GroupRequest) returns (GroupResponse) {} + rpc GetGroup(GroupRequest) returns (GroupResponse) {} +} + +// GroupRequest for a group +message GroupRequest { + string group = 1; +} + +// GroupResponse +message GroupResponse { + string name = 1; + int64 size = 2; +} + +message ResourceRequest {} +message ResourceResponse { + string graph = 1; +} \ No newline at end of file diff --git a/pkg/service-grpc/service_grpc.pb.go b/pkg/service-grpc/service_grpc.pb.go new file mode 100644 index 0000000..5234446 --- /dev/null +++ b/pkg/service-grpc/service_grpc.pb.go @@ -0,0 +1,181 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.2.0 +// - protoc v3.20.3 +// source: pkg/service-grpc/service.proto + +package service_grpc + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.32.0 or later. +const _ = grpc.SupportPackageIsVersion7 + +// ExternalPluginServiceClient is the client API for ExternalPluginService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type ExternalPluginServiceClient interface { + // This is supported via a shared file in the container + GetResources(ctx context.Context, in *ResourceRequest, opts ...grpc.CallOption) (*ResourceResponse, error) + // Note we currently cannot support getting group metadata, need to add handle to get info, etc. + ListGroups(ctx context.Context, in *GroupRequest, opts ...grpc.CallOption) (*GroupResponse, error) + GetGroup(ctx context.Context, in *GroupRequest, opts ...grpc.CallOption) (*GroupResponse, error) +} + +type externalPluginServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewExternalPluginServiceClient(cc grpc.ClientConnInterface) ExternalPluginServiceClient { + return &externalPluginServiceClient{cc} +} + +func (c *externalPluginServiceClient) GetResources(ctx context.Context, in *ResourceRequest, opts ...grpc.CallOption) (*ResourceResponse, error) { + out := new(ResourceResponse) + err := c.cc.Invoke(ctx, "/service.ExternalPluginService/GetResources", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *externalPluginServiceClient) ListGroups(ctx context.Context, in *GroupRequest, opts ...grpc.CallOption) (*GroupResponse, error) { + out := new(GroupResponse) + err := c.cc.Invoke(ctx, "/service.ExternalPluginService/ListGroups", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *externalPluginServiceClient) GetGroup(ctx context.Context, in *GroupRequest, opts ...grpc.CallOption) (*GroupResponse, error) { + out := new(GroupResponse) + err := c.cc.Invoke(ctx, "/service.ExternalPluginService/GetGroup", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +// ExternalPluginServiceServer is the server API for ExternalPluginService service. +// All implementations must embed UnimplementedExternalPluginServiceServer +// for forward compatibility +type ExternalPluginServiceServer interface { + // This is supported via a shared file in the container + GetResources(context.Context, *ResourceRequest) (*ResourceResponse, error) + // Note we currently cannot support getting group metadata, need to add handle to get info, etc. + ListGroups(context.Context, *GroupRequest) (*GroupResponse, error) + GetGroup(context.Context, *GroupRequest) (*GroupResponse, error) + mustEmbedUnimplementedExternalPluginServiceServer() +} + +// UnimplementedExternalPluginServiceServer must be embedded to have forward compatible implementations. +type UnimplementedExternalPluginServiceServer struct { +} + +func (UnimplementedExternalPluginServiceServer) GetResources(context.Context, *ResourceRequest) (*ResourceResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetResources not implemented") +} +func (UnimplementedExternalPluginServiceServer) ListGroups(context.Context, *GroupRequest) (*GroupResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method ListGroups not implemented") +} +func (UnimplementedExternalPluginServiceServer) GetGroup(context.Context, *GroupRequest) (*GroupResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method GetGroup not implemented") +} +func (UnimplementedExternalPluginServiceServer) mustEmbedUnimplementedExternalPluginServiceServer() {} + +// UnsafeExternalPluginServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to ExternalPluginServiceServer will +// result in compilation errors. +type UnsafeExternalPluginServiceServer interface { + mustEmbedUnimplementedExternalPluginServiceServer() +} + +func RegisterExternalPluginServiceServer(s grpc.ServiceRegistrar, srv ExternalPluginServiceServer) { + s.RegisterService(&ExternalPluginService_ServiceDesc, srv) +} + +func _ExternalPluginService_GetResources_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ResourceRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ExternalPluginServiceServer).GetResources(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/service.ExternalPluginService/GetResources", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ExternalPluginServiceServer).GetResources(ctx, req.(*ResourceRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _ExternalPluginService_ListGroups_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GroupRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ExternalPluginServiceServer).ListGroups(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/service.ExternalPluginService/ListGroups", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ExternalPluginServiceServer).ListGroups(ctx, req.(*GroupRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _ExternalPluginService_GetGroup_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(GroupRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(ExternalPluginServiceServer).GetGroup(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/service.ExternalPluginService/GetGroup", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(ExternalPluginServiceServer).GetGroup(ctx, req.(*GroupRequest)) + } + return interceptor(ctx, in, info, handler) +} + +// ExternalPluginService_ServiceDesc is the grpc.ServiceDesc for ExternalPluginService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var ExternalPluginService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "service.ExternalPluginService", + HandlerType: (*ExternalPluginServiceServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "GetResources", + Handler: _ExternalPluginService_GetResources_Handler, + }, + { + MethodName: "ListGroups", + Handler: _ExternalPluginService_ListGroups_Handler, + }, + { + MethodName: "GetGroup", + Handler: _ExternalPluginService_GetGroup_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "pkg/service-grpc/service.proto", +} diff --git a/pkg/service/service.go b/pkg/service/service.go new file mode 100644 index 0000000..622aba0 --- /dev/null +++ b/pkg/service/service.go @@ -0,0 +1,61 @@ +package service + +import ( + "os" + + "github.com/converged-computing/fluxqueue/pkg/defaults" + pb "github.com/converged-computing/fluxqueue/pkg/service-grpc" + + "k8s.io/klog/v2" + + "context" +) + +type ExternalService struct { + pb.UnimplementedExternalPluginServiceServer +} + +// Init is a helper function for any startup stuff, for which now we have none :) +func (f *ExternalService) Init() { + klog.Infof("[fluxqueue] Created external service.") +} + +// GetGroup gets and returns the group info +// TODO no good way to look up group - we would need to ask Fluxion directly OR put the grpc +// service alongside the scheduler plugin, which seems like a bad design +func (s *ExternalService) GetGroup(ctx context.Context, in *pb.GroupRequest) (*pb.GroupResponse, error) { + klog.Infof("[fluxqueue] Calling get group endpoint! %v\n", in) + + // Prepare an empty match response (that can still be serialized) + emptyResponse := &pb.GroupResponse{} + return emptyResponse, nil +} + +// List group returns existing groups +func (s *ExternalService) ListGroups(ctx context.Context, in *pb.GroupRequest) (*pb.GroupResponse, error) { + + emptyResponse := &pb.GroupResponse{} + + // Prepare an empty match response (that can still be serialized) + klog.Infof("[fluxqueue] Calling list groups endpoint! %v\n", in) + + return emptyResponse, nil +} + +// GetResources gets the current Kubernetes Json Graph Format JGF +// This should be created on init of the scheduler +func (s *ExternalService) GetResources(ctx context.Context, in *pb.ResourceRequest) (*pb.ResourceResponse, error) { + + emptyResponse := &pb.ResourceResponse{} + + // Prepare an empty match response (that can still be serialized) + klog.Infof("[fluxqueue] Calling get resources endpoint! %v\n", in) + + jgf, err := os.ReadFile(defaults.KubernetesJsonGraphFormat) + if err != nil { + klog.Error("Error reading JGF") + return emptyResponse, err + } + emptyResponse.Graph = string(jgf) + return emptyResponse, nil +}