Skip to content

Commit

Permalink
add: flux riken tutorial 2024
Browse files Browse the repository at this point in the history
This adds the new directory for the Flux Riken tutorial, with
the following additions:
1. flux-tree was removed from flux-sched and is added here
2. tutorial files were kept in rse-ops, are now moved here
3. New tutorial content: flux tree and hierarchy section/examples
4. New tutorial content: flux archive (previously flux filemap)
5. images that show a dummy example of job throughout
6. update of names in login page / directory to be more general

Signed-off-by: vsoch <[email protected]>
  • Loading branch information
vsoch committed Mar 29, 2024
1 parent 9526fbc commit 23db79f
Show file tree
Hide file tree
Showing 89 changed files with 8,434 additions and 0 deletions.
1,020 changes: 1,020 additions & 0 deletions 2024-RIKEN-AWS/JupyterNotebook/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
name: cluster-autoscaler
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cluster-autoscaler
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: [""]
resources: ["events", "endpoints"]
verbs: ["create", "patch"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: [""]
resources: ["pods/status"]
verbs: ["update"]
- apiGroups: [""]
resources: ["endpoints"]
resourceNames: ["cluster-autoscaler"]
verbs: ["get", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["watch", "list", "get", "update"]
- apiGroups: [""]
resources:
- "namespaces"
- "pods"
- "services"
- "replicationcontrollers"
- "persistentvolumeclaims"
- "persistentvolumes"
verbs: ["watch", "list", "get"]
- apiGroups: ["extensions"]
resources: ["replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["policy"]
resources: ["poddisruptionbudgets"]
verbs: ["watch", "list"]
- apiGroups: ["apps"]
resources: ["statefulsets", "replicasets", "daemonsets"]
verbs: ["watch", "list", "get"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses", "csinodes", "csidrivers", "csistoragecapacities"]
verbs: ["watch", "list", "get"]
- apiGroups: ["batch", "extensions"]
resources: ["jobs"]
verbs: ["get", "list", "watch", "patch"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create"]
- apiGroups: ["coordination.k8s.io"]
resourceNames: ["cluster-autoscaler"]
resources: ["leases"]
verbs: ["get", "update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
rules:
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["create", "list", "watch"]
- apiGroups: [""]
resources: ["configmaps"]
resourceNames: ["cluster-autoscaler-status", "cluster-autoscaler-priority-expander"]
verbs: ["delete", "get", "update", "watch"]

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cluster-autoscaler
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cluster-autoscaler
subjects:
- kind: ServiceAccount
name: cluster-autoscaler
namespace: kube-system

---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
k8s-addon: cluster-autoscaler.addons.k8s.io
k8s-app: cluster-autoscaler
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: cluster-autoscaler
subjects:
- kind: ServiceAccount
name: cluster-autoscaler
namespace: kube-system

---
apiVersion: apps/v1
kind: Deployment
metadata:
name: cluster-autoscaler
namespace: kube-system
labels:
app: cluster-autoscaler
spec:
replicas: 1
selector:
matchLabels:
app: cluster-autoscaler
template:
metadata:
labels:
app: cluster-autoscaler
annotations:
prometheus.io/scrape: 'true'
prometheus.io/port: '8085'
spec:
priorityClassName: system-cluster-critical
securityContext:
runAsNonRoot: true
runAsUser: 65534
fsGroup: 65534
seccompProfile:
type: RuntimeDefault
serviceAccountName: cluster-autoscaler
containers:
- image: registry.k8s.io/autoscaling/cluster-autoscaler:v1.26.2
name: cluster-autoscaler
resources:
limits:
cpu: 100m
memory: 600Mi
requests:
cpu: 100m
memory: 600Mi
command:
- ./cluster-autoscaler
- --v=4
- --stderrthreshold=info
- --cloud-provider=aws
- --skip-nodes-with-local-storage=false
- --expander=least-waste
- --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/jupyterhub
volumeMounts:
- name: ssl-certs
mountPath: /etc/ssl/certs/ca-certificates.crt # /etc/ssl/certs/ca-bundle.crt for Amazon Linux Worker Nodes
readOnly: true
imagePullPolicy: "Always"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: true
volumes:
- name: ssl-certs
hostPath:
path: "/etc/ssl/certs/ca-bundle.crt"
79 changes: 79 additions & 0 deletions 2024-RIKEN-AWS/JupyterNotebook/aws/config-aws-ssl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# A few notes!
# The hub -> authentic class defaults to "dummy"
# We shouldn't need any image pull secrets assuming public
# There is a note about the database being a sqlite pvc
# (and a TODO for better solution for Kubernetes)

# This is the concurrent spawn limit, likely should be increased (deafults to 64)
hub:
concurrentSpawnLimit: 128
config:
DummyAuthenticator:
password: butter
JupyterHub:
admin_access: true
authenticator_class: dummy
db:
pvc:
# Defaults to 1Gi
storage: 32Gi
# Add the storageclass name, defaults to gp2
storageClassName: gp3

# This is the image I built based off of jupyterhub/k8s-hub, 3.0.2 at time of writing this
image:
name: ghcr.io/flux-framework/flux-jupyter-hub
tag: "2023"
pullPolicy: Always

# # https://z2jh.jupyter.org/en/latest/administrator/optimization.html#scaling-up-in-time-user-placeholders
# scheduling:
# podPriority:
# enabled: true
# userPlaceholder:
# # Specify 3 dummy user pods will be used as placeholders
# replicas: 3

proxy:
https:
enabled: true
hosts:
- tutorial.flux-framework.org
letsencrypt:
contactEmail: [email protected]

# This is the "spawn" image
singleuser:
image:
name: ghcr.io/flux-framework/flux-jupyter-spawn
tag: "2023"
pullPolicy: Always
cpu:
limit: 2
guarantee: 2
memory:
limit: '4G'
guarantee: '4G'
cmd: /entrypoint.sh

# This runs as the root user, who clones and changes ownership to uid 1000
initContainers:
- name: init-myservice
image: ghcr.io/flux-framework/flux-jupyter-init:2023
command: ["/entrypoint.sh"]
volumeMounts:
- name: flux-tutorial
mountPath: /home/jovyan

# This is how we get the tutorial files added
storage:
type: none

# gitRepo volume is deprecated so we need another way
# https://kubernetes.io/docs/concepts/storage/volumes/#gitrepo
extraVolumes:
- name: flux-tutorial
emptyDir: {}
extraVolumeMounts:
- name: flux-tutorial
mountPath: /home/jovyan
62 changes: 62 additions & 0 deletions 2024-RIKEN-AWS/JupyterNotebook/aws/config-aws.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# A few notes!
# The hub -> authentic class defaults to "dummy"
# We shouldn't need any image pull secrets assuming public
# There is a note about the database being a sqlite pvc
# (and a TODO for better solution for Kubernetes)

# This is the concurrent spawn limit, likely should be increased (deafults to 64)
hub:
concurrentSpawnLimit: 10
config:
DummyAuthenticator:
password: butter
JupyterHub:
admin_access: true
authenticator_class: dummy

# This is the image I built based off of jupyterhub/k8s-hub, 3.0.2 at time of writing this
image:
name: ghcr.io/flux-framework/flux-jupyter-hub
tag: "2023"
pullPolicy: Always

# https://z2jh.jupyter.org/en/latest/administrator/optimization.html#scaling-up-in-time-user-placeholders
scheduling:
podPriority:
enabled: true
userPlaceholder:
# Specify 3 dummy user pods will be used as placeholders
replicas: 3

# This is the "spawn" image
singleuser:
image:
name: ghcr.io/flux-framework/flux-jupyter-spawn
tag: "2023"
pullPolicy: Always
cpu:
limit: 1
memory:
limit: '4G'
cmd: /entrypoint.sh

# This runs as the root user, who clones and changes ownership to uid 1000
initContainers:
- name: init-myservice
image: ghcr.io/flux-framework/flux-jupyter-init:2023
command: ["/entrypoint.sh"]
volumeMounts:
- name: flux-tutorial
mountPath: /home/jovyan

# This is how we get the tutorial files added
storage:
type: none
# gitRepo volume is deprecated so we need another way
# https://kubernetes.io/docs/concepts/storage/volumes/#gitrepo
extraVolumes:
- name: flux-tutorial
emptyDir: {}
extraVolumeMounts:
- name: flux-tutorial
mountPath: /home/jovyan
Loading

0 comments on commit 23db79f

Please sign in to comment.