-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into phi_4_bug_fix
- Loading branch information
Showing
102 changed files
with
3,307 additions
and
1,489 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# Two Nodes Sglang example | ||
|
||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: distributed-sglang | ||
spec: | ||
replicas: 2 # number of nodes/pods to run distributed sglang | ||
selector: | ||
matchLabels: | ||
app: distributed-sglang | ||
serviceName: "" | ||
template: | ||
metadata: | ||
labels: | ||
app: distributed-sglang | ||
spec: | ||
containers: | ||
- name: sglang-container | ||
image: docker.io/lmsysorg/sglang:latest | ||
imagePullPolicy: Always # image may be replaced by official CI versioned image | ||
command: | ||
- /bin/bash | ||
- -c | ||
# please modify the sglang serving arguments below, as necessary. | ||
# NOTE: the --expert-parallel-size and --enable-ep-moe are for MoE model like DeepSeek-R1 | ||
args: | ||
- | | ||
python3 -m sglang.launch_server \ | ||
--model /llm-folder \ | ||
--dist-init-addr sglang-master-pod:5000 \ | ||
--tensor-parallel-size 16 \ | ||
--nnodes 2 \ | ||
--node-rank $POD_INDEX \ | ||
--trust-remote-code \ | ||
--host 0.0.0.0 \ | ||
--port 8000 \ | ||
--enable-metrics \ | ||
--enable-ep-moe \ | ||
--expert-parallel-size 16 | ||
env: | ||
- name: POD_INDEX # reflects the node-rank | ||
valueFrom: | ||
fieldRef: | ||
apiVersion: v1 | ||
fieldPath: metadata.labels['apps.kubernetes.io/pod-index'] | ||
- name: NCCL_DEBUG | ||
value: INFO | ||
resources: | ||
limits: | ||
nvidia.com/gpu: "8" | ||
requests: | ||
volumeMounts: | ||
- mountPath: /dev/shm | ||
name: dshm | ||
- mountPath: /llm-folder | ||
name: llm | ||
securityContext: | ||
privileged: true # to leverage RDMA/InfiniBand device, co-work with HostNetwork=true | ||
hostNetwork: true | ||
volumes: | ||
- emptyDir: | ||
medium: Memory | ||
sizeLimit: 10Gi | ||
name: dshm | ||
- hostPath: | ||
path: /llm-folder # replace with PVC or hostPath with your model weights | ||
type: DirectoryOrCreate | ||
name: llm | ||
#- persistentVolumeClaim: | ||
# claimName: llm-pvc | ||
# name: llm | ||
--- | ||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: sglang-master-pod | ||
spec: | ||
type: ClusterIP | ||
selector: | ||
app: distributed-sglang | ||
apps.kubernetes.io/pod-index: "0" | ||
ports: | ||
- name: dist-port | ||
port: 5000 | ||
targetPort: 5000 | ||
--- | ||
# the serving service | ||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: sglang-serving-on-master | ||
spec: | ||
type: NodePort | ||
selector: | ||
app: distributed-sglang | ||
apps.kubernetes.io/pod-index: "0" | ||
ports: | ||
- name: serving | ||
port: 8000 | ||
targetPort: 8000 | ||
- name: metrics | ||
port: 8080 | ||
targetPort: 8080 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,3 +11,4 @@ General Guidance | |
faq.md | ||
learn_more.md | ||
modelscope.md | ||
production_metrics.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,12 +18,15 @@ dependencies = ["requests", "tqdm", "numpy", "IPython", "setproctitle"] | |
[project.optional-dependencies] | ||
runtime_common = [ | ||
"aiohttp", | ||
"datasets", | ||
"decord", | ||
"fastapi", | ||
"hf_transfer", | ||
"huggingface_hub", | ||
"interegular", | ||
"llguidance>=0.6.15", | ||
"modelscope", | ||
"ninja", | ||
"orjson", | ||
"packaging", | ||
"pillow", | ||
|
@@ -33,18 +36,15 @@ runtime_common = [ | |
"python-multipart", | ||
"pyzmq>=25.1.2", | ||
"torchao>=0.7.0", | ||
"transformers @ git+https://github.com/huggingface/[email protected]", | ||
"uvicorn", | ||
"uvloop", | ||
"xgrammar==0.1.14", | ||
"ninja", | ||
"transformers @ git+https://github.com/huggingface/transformers.git@84f0186", | ||
"llguidance>=0.6.15", | ||
"datasets" | ||
] | ||
|
||
srt = [ | ||
"sglang[runtime_common]", | ||
"sgl-kernel==0.0.3.post6", | ||
"sgl-kernel==0.0.4", | ||
"flashinfer_python==0.2.2.post1", | ||
"torch==2.5.1", | ||
"vllm>=0.6.4.post1,<=0.7.2", | ||
|
Oops, something went wrong.