-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This adds the new directory for the Flux Riken tutorial, with the following additions: 1. flux-tree was removed from flux-sched and is added here 2. tutorial files were kept in rse-ops, are now moved here 3. New tutorial content: flux tree and hierarchy section/examples 4. New tutorial content: flux archive (previously flux filemap) 5. images that show a dummy example of job throughout 6. update of names in login page / directory to be more general Signed-off-by: vsoch <[email protected]>
- Loading branch information
Showing
89 changed files
with
8,434 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
180 changes: 180 additions & 0 deletions
180
2024-RIKEN-AWS/JupyterNotebook/aws/cluster-autoscaler-autodiscover.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
--- | ||
apiVersion: v1 | ||
kind: ServiceAccount | ||
metadata: | ||
labels: | ||
k8s-addon: cluster-autoscaler.addons.k8s.io | ||
k8s-app: cluster-autoscaler | ||
name: cluster-autoscaler | ||
namespace: kube-system | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRole | ||
metadata: | ||
name: cluster-autoscaler | ||
labels: | ||
k8s-addon: cluster-autoscaler.addons.k8s.io | ||
k8s-app: cluster-autoscaler | ||
rules: | ||
- apiGroups: [""] | ||
resources: ["events", "endpoints"] | ||
verbs: ["create", "patch"] | ||
- apiGroups: [""] | ||
resources: ["pods/eviction"] | ||
verbs: ["create"] | ||
- apiGroups: [""] | ||
resources: ["pods/status"] | ||
verbs: ["update"] | ||
- apiGroups: [""] | ||
resources: ["endpoints"] | ||
resourceNames: ["cluster-autoscaler"] | ||
verbs: ["get", "update"] | ||
- apiGroups: [""] | ||
resources: ["nodes"] | ||
verbs: ["watch", "list", "get", "update"] | ||
- apiGroups: [""] | ||
resources: | ||
- "namespaces" | ||
- "pods" | ||
- "services" | ||
- "replicationcontrollers" | ||
- "persistentvolumeclaims" | ||
- "persistentvolumes" | ||
verbs: ["watch", "list", "get"] | ||
- apiGroups: ["extensions"] | ||
resources: ["replicasets", "daemonsets"] | ||
verbs: ["watch", "list", "get"] | ||
- apiGroups: ["policy"] | ||
resources: ["poddisruptionbudgets"] | ||
verbs: ["watch", "list"] | ||
- apiGroups: ["apps"] | ||
resources: ["statefulsets", "replicasets", "daemonsets"] | ||
verbs: ["watch", "list", "get"] | ||
- apiGroups: ["storage.k8s.io"] | ||
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"] | ||
verbs: ["watch", "list", "get"] | ||
- apiGroups: ["batch", "extensions"] | ||
resources: ["jobs"] | ||
verbs: ["get", "list", "watch", "patch"] | ||
- apiGroups: ["coordination.k8s.io"] | ||
resources: ["leases"] | ||
verbs: ["create"] | ||
- apiGroups: ["coordination.k8s.io"] | ||
resourceNames: ["cluster-autoscaler"] | ||
resources: ["leases"] | ||
verbs: ["get", "update"] | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: Role | ||
metadata: | ||
name: cluster-autoscaler | ||
namespace: kube-system | ||
labels: | ||
k8s-addon: cluster-autoscaler.addons.k8s.io | ||
k8s-app: cluster-autoscaler | ||
rules: | ||
- apiGroups: [""] | ||
resources: ["configmaps"] | ||
verbs: ["create", "list", "watch"] | ||
- apiGroups: [""] | ||
resources: ["configmaps"] | ||
resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"] | ||
verbs: ["delete", "get", "update", "watch"] | ||
|
||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRoleBinding | ||
metadata: | ||
name: cluster-autoscaler | ||
labels: | ||
k8s-addon: cluster-autoscaler.addons.k8s.io | ||
k8s-app: cluster-autoscaler | ||
roleRef: | ||
apiGroup: rbac.authorization.k8s.io | ||
kind: ClusterRole | ||
name: cluster-autoscaler | ||
subjects: | ||
- kind: ServiceAccount | ||
name: cluster-autoscaler | ||
namespace: kube-system | ||
|
||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: RoleBinding | ||
metadata: | ||
name: cluster-autoscaler | ||
namespace: kube-system | ||
labels: | ||
k8s-addon: cluster-autoscaler.addons.k8s.io | ||
k8s-app: cluster-autoscaler | ||
roleRef: | ||
apiGroup: rbac.authorization.k8s.io | ||
kind: Role | ||
name: cluster-autoscaler | ||
subjects: | ||
- kind: ServiceAccount | ||
name: cluster-autoscaler | ||
namespace: kube-system | ||
|
||
--- | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: cluster-autoscaler | ||
namespace: kube-system | ||
labels: | ||
app: cluster-autoscaler | ||
spec: | ||
replicas: 1 | ||
selector: | ||
matchLabels: | ||
app: cluster-autoscaler | ||
template: | ||
metadata: | ||
labels: | ||
app: cluster-autoscaler | ||
annotations: | ||
prometheus.io/scrape: 'true' | ||
prometheus.io/port: '8085' | ||
spec: | ||
priorityClassName: system-cluster-critical | ||
securityContext: | ||
runAsNonRoot: true | ||
runAsUser: 65534 | ||
fsGroup: 65534 | ||
seccompProfile: | ||
type: RuntimeDefault | ||
serviceAccountName: cluster-autoscaler | ||
containers: | ||
- image: registry.k8s.io/autoscaling/cluster-autoscaler:v1.26.2 | ||
name: cluster-autoscaler | ||
resources: | ||
limits: | ||
cpu: 100m | ||
memory: 600Mi | ||
requests: | ||
cpu: 100m | ||
memory: 600Mi | ||
command: | ||
- ./cluster-autoscaler | ||
- --v=4 | ||
- --stderrthreshold=info | ||
- --cloud-provider=aws | ||
- --skip-nodes-with-local-storage=false | ||
- --expander=least-waste | ||
- --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/jupyterhub | ||
volumeMounts: | ||
- name: ssl-certs | ||
mountPath: /etc/ssl/certs/ca-certificates.crt # /etc/ssl/certs/ca-bundle.crt for Amazon Linux Worker Nodes | ||
readOnly: true | ||
imagePullPolicy: "Always" | ||
securityContext: | ||
allowPrivilegeEscalation: false | ||
capabilities: | ||
drop: | ||
- ALL | ||
readOnlyRootFilesystem: true | ||
volumes: | ||
- name: ssl-certs | ||
hostPath: | ||
path: "/etc/ssl/certs/ca-bundle.crt" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
# A few notes! | ||
# The hub -> authentic class defaults to "dummy" | ||
# We shouldn't need any image pull secrets assuming public | ||
# There is a note about the database being a sqlite pvc | ||
# (and a TODO for better solution for Kubernetes) | ||
|
||
# This is the concurrent spawn limit, likely should be increased (deafults to 64) | ||
hub: | ||
concurrentSpawnLimit: 128 | ||
config: | ||
DummyAuthenticator: | ||
password: butter | ||
JupyterHub: | ||
admin_access: true | ||
authenticator_class: dummy | ||
db: | ||
pvc: | ||
# Defaults to 1Gi | ||
storage: 32Gi | ||
# Add the storageclass name, defaults to gp2 | ||
storageClassName: gp3 | ||
|
||
# This is the image I built based off of jupyterhub/k8s-hub, 3.0.2 at time of writing this | ||
image: | ||
name: ghcr.io/flux-framework/flux-jupyter-hub | ||
tag: "2023" | ||
pullPolicy: Always | ||
|
||
# # https://z2jh.jupyter.org/en/latest/administrator/optimization.html#scaling-up-in-time-user-placeholders | ||
# scheduling: | ||
# podPriority: | ||
# enabled: true | ||
# userPlaceholder: | ||
# # Specify 3 dummy user pods will be used as placeholders | ||
# replicas: 3 | ||
|
||
proxy: | ||
https: | ||
enabled: true | ||
hosts: | ||
- tutorial.flux-framework.org | ||
letsencrypt: | ||
contactEmail: [email protected] | ||
|
||
# This is the "spawn" image | ||
singleuser: | ||
image: | ||
name: ghcr.io/flux-framework/flux-jupyter-spawn | ||
tag: "2023" | ||
pullPolicy: Always | ||
cpu: | ||
limit: 2 | ||
guarantee: 2 | ||
memory: | ||
limit: '4G' | ||
guarantee: '4G' | ||
cmd: /entrypoint.sh | ||
|
||
# This runs as the root user, who clones and changes ownership to uid 1000 | ||
initContainers: | ||
- name: init-myservice | ||
image: ghcr.io/flux-framework/flux-jupyter-init:2023 | ||
command: ["/entrypoint.sh"] | ||
volumeMounts: | ||
- name: flux-tutorial | ||
mountPath: /home/jovyan | ||
|
||
# This is how we get the tutorial files added | ||
storage: | ||
type: none | ||
|
||
# gitRepo volume is deprecated so we need another way | ||
# https://kubernetes.io/docs/concepts/storage/volumes/#gitrepo | ||
extraVolumes: | ||
- name: flux-tutorial | ||
emptyDir: {} | ||
extraVolumeMounts: | ||
- name: flux-tutorial | ||
mountPath: /home/jovyan |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# A few notes! | ||
# The hub -> authentic class defaults to "dummy" | ||
# We shouldn't need any image pull secrets assuming public | ||
# There is a note about the database being a sqlite pvc | ||
# (and a TODO for better solution for Kubernetes) | ||
|
||
# This is the concurrent spawn limit, likely should be increased (deafults to 64) | ||
hub: | ||
concurrentSpawnLimit: 10 | ||
config: | ||
DummyAuthenticator: | ||
password: butter | ||
JupyterHub: | ||
admin_access: true | ||
authenticator_class: dummy | ||
|
||
# This is the image I built based off of jupyterhub/k8s-hub, 3.0.2 at time of writing this | ||
image: | ||
name: ghcr.io/flux-framework/flux-jupyter-hub | ||
tag: "2023" | ||
pullPolicy: Always | ||
|
||
# https://z2jh.jupyter.org/en/latest/administrator/optimization.html#scaling-up-in-time-user-placeholders | ||
scheduling: | ||
podPriority: | ||
enabled: true | ||
userPlaceholder: | ||
# Specify 3 dummy user pods will be used as placeholders | ||
replicas: 3 | ||
|
||
# This is the "spawn" image | ||
singleuser: | ||
image: | ||
name: ghcr.io/flux-framework/flux-jupyter-spawn | ||
tag: "2023" | ||
pullPolicy: Always | ||
cpu: | ||
limit: 1 | ||
memory: | ||
limit: '4G' | ||
cmd: /entrypoint.sh | ||
|
||
# This runs as the root user, who clones and changes ownership to uid 1000 | ||
initContainers: | ||
- name: init-myservice | ||
image: ghcr.io/flux-framework/flux-jupyter-init:2023 | ||
command: ["/entrypoint.sh"] | ||
volumeMounts: | ||
- name: flux-tutorial | ||
mountPath: /home/jovyan | ||
|
||
# This is how we get the tutorial files added | ||
storage: | ||
type: none | ||
# gitRepo volume is deprecated so we need another way | ||
# https://kubernetes.io/docs/concepts/storage/volumes/#gitrepo | ||
extraVolumes: | ||
- name: flux-tutorial | ||
emptyDir: {} | ||
extraVolumeMounts: | ||
- name: flux-tutorial | ||
mountPath: /home/jovyan |
Oops, something went wrong.