From d0c3f9ed3bf769624b185e2d73b29c173e1a83a5 Mon Sep 17 00:00:00 2001 From: tongyu <119610311+tongyu0924@users.noreply.github.com> Date: Tue, 28 Jan 2025 01:28:34 +0800 Subject: [PATCH 1/5] Update sampling_params.md --- docs/references/sampling_params.md | 52 +++++++++++++++++++----------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/docs/references/sampling_params.md b/docs/references/sampling_params.md index 77d7c9f82e7..835fba7740a 100644 --- a/docs/references/sampling_params.md +++ b/docs/references/sampling_params.md @@ -8,42 +8,58 @@ The `/generate` endpoint accepts the following arguments in the JSON format. ```python @dataclass class GenerateReqInput: + """ + Data structure for the input to the `/generate` endpoint. + """ # The input prompt. It can be a single prompt or a batch of prompts. text: Optional[Union[List[str], str]] = None - # The token ids for text; one can specify either text or input_ids + + # The token IDs for text; one can specify either `text` or `input_ids`. input_ids: Optional[Union[List[List[int]], List[int]]] = None - # The embeddings for input_ids; one can specify either text or input_ids or input_embeds. + + # The embeddings for input_ids; one can specify either `text` or `input_ids` or `input_embeds`. input_embeds: Optional[Union[List[List[List[float]]], List[List[float]]]] = None - # The image input. It can be a file name, a url, or base64 encoded string. - # See also python/sglang/srt/utils.py:load_image. + + # The image input. It can be a file name, a URL, or a base64 encoded string. + # See also: `python/sglang/srt/utils.py:load_image`. image_data: Optional[Union[List[str], str]] = None - # The sampling_params. See descriptions below. + + # Sampling parameters. A dictionary or a list of dictionaries. See descriptions below. sampling_params: Optional[Union[List[Dict], Dict]] = None - # The request id. + + # An optional request ID for tracking requests. rid: Optional[Union[List[str], str]] = None - # Whether to return logprobs. + + # Whether to return log probabilities for generated tokens. return_logprob: Optional[Union[List[bool], bool]] = None - # If return logprobs, the start location in the prompt for returning logprobs. - # By default, this value is "-1", which means it will only return logprobs for output tokens. + + # If `return_logprob` is True, this specifies the starting position for log probabilities. + # Default is `-1`, meaning log probabilities will only be calculated for generated tokens. logprob_start_len: Optional[Union[List[int], int]] = None - # If return logprobs, the number of top logprobs to return at each position. + + # If `return_logprob` is True, specifies the number of top log probabilities to return for each token. top_logprobs_num: Optional[Union[List[int], int]] = None - # Whether to detokenize tokens in text in the returned logprobs. + + # Whether to include detokenized text in the log probabilities. return_text_in_logprobs: bool = False - # Whether to stream output. + + # Whether to enable streaming of the output. stream: bool = False - # Whether to log metrics for this request (e.g. health_generate calls do not log metrics) + + # Whether to log metrics for this request. Useful for health checks. log_metrics: bool = True - # The modalities of the image data [image, multi-images, video] + # The modalities of the input image. Possible values: [image, multi-images, video]. modalities: Optional[List[str]] = None - # LoRA related + + # Path to LoRA (Low-Rank Adaptation) configuration or checkpoint files. lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None - # Session info for continual prompting + # Session information for continual prompting (stateful conversations). session_params: Optional[Union[List[Dict], Dict]] = None - # Custom logit processor for advanced sampling control. Must be a serialized instance - # of `CustomLogitProcessor` in python/sglang/srt/sampling/custom_logit_processor.py + + # Custom logit processor for advanced sampling control. + # Must be a serialized instance of `CustomLogitProcessor` in `python/sglang/srt/sampling/custom_logit_processor.py`. # Use the processor's `to_str()` method to generate the serialized string. custom_logit_processor: Optional[Union[List[Optional[str]], str]] = None ``` From 33079a34c484fe28bbd3d1327934460e9717a2b6 Mon Sep 17 00:00:00 2001 From: tongyu <119610311+tongyu0924@users.noreply.github.com> Date: Tue, 28 Jan 2025 01:36:11 +0800 Subject: [PATCH 2/5] Update sampling_params.md --- docs/references/sampling_params.md | 57 ++++++++++++++---------------- 1 file changed, 26 insertions(+), 31 deletions(-) diff --git a/docs/references/sampling_params.md b/docs/references/sampling_params.md index 835fba7740a..86fc78e9242 100644 --- a/docs/references/sampling_params.md +++ b/docs/references/sampling_params.md @@ -8,60 +8,55 @@ The `/generate` endpoint accepts the following arguments in the JSON format. ```python @dataclass class GenerateReqInput: - """ - Data structure for the input to the `/generate` endpoint. - """ - # The input prompt. It can be a single prompt or a batch of prompts. + """Defines the input parameters for the /generate endpoint in SGLang.""" + # The input prompt, can be a single string or a batch of strings. text: Optional[Union[List[str], str]] = None - - # The token IDs for text; one can specify either `text` or `input_ids`. + # The token IDs for the input prompt; can specify either `text` or `input_ids`. input_ids: Optional[Union[List[List[int]], List[int]]] = None - - # The embeddings for input_ids; one can specify either `text` or `input_ids` or `input_embeds`. + # The embeddings for the input IDs; can specify either `text`, `input_ids`, or `input_embeds`. input_embeds: Optional[Union[List[List[List[float]]], List[List[float]]]] = None - - # The image input. It can be a file name, a URL, or a base64 encoded string. - # See also: `python/sglang/srt/utils.py:load_image`. + # Image input, can be a file name, a URL, or a base64-encoded string. + # Refer to python/sglang/srt/utils.py:load_image for details. image_data: Optional[Union[List[str], str]] = None - # Sampling parameters. A dictionary or a list of dictionaries. See descriptions below. + # Sampling-related parameters + # Parameters that define the sampling behavior, described in detail below. sampling_params: Optional[Union[List[Dict], Dict]] = None - # An optional request ID for tracking requests. + ## Metadata + # Request ID to track the request (can be a single ID or a batch of IDs). rid: Optional[Union[List[str], str]] = None - # Whether to return log probabilities for generated tokens. return_logprob: Optional[Union[List[bool], bool]] = None - - # If `return_logprob` is True, this specifies the starting position for log probabilities. - # Default is `-1`, meaning log probabilities will only be calculated for generated tokens. + # The starting position in the prompt for returning log probabilities. + # Defaults to "-1", which means it only returns log probabilities for output tokens. logprob_start_len: Optional[Union[List[int], int]] = None - - # If `return_logprob` is True, specifies the number of top log probabilities to return for each token. + # The number of top log probabilities to return for each token position. top_logprobs_num: Optional[Union[List[int], int]] = None - - # Whether to include detokenized text in the log probabilities. + # Whether to include detokenized text in the returned log probabilities. return_text_in_logprobs: bool = False - # Whether to enable streaming of the output. + ## Output streaming and logging + # Whether to stream the output in real-time. stream: bool = False - - # Whether to log metrics for this request. Useful for health checks. + # Whether to log metrics for this request (e.g., for health checks, metrics logging is disabled). log_metrics: bool = True - # The modalities of the input image. Possible values: [image, multi-images, video]. + ## Multi-modal and session-related parameters + # Defines the modalities for image input (e.g., `image`, `multi-images`, or `video`). modalities: Optional[List[str]] = None - - # Path to LoRA (Low-Rank Adaptation) configuration or checkpoint files. + # LoRA-related parameters for low-rank adaptation models. lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None - # Session information for continual prompting (stateful conversations). + ## Session and custom logit processor + # Session parameters for continual prompting across multiple requests. session_params: Optional[Union[List[Dict], Dict]] = None - # Custom logit processor for advanced sampling control. - # Must be a serialized instance of `CustomLogitProcessor` in `python/sglang/srt/sampling/custom_logit_processor.py`. - # Use the processor's `to_str()` method to generate the serialized string. + # Must be a serialized instance of `CustomLogitProcessor` from + # python/sglang/srt/sampling/custom_logit_processor.py. + # Use the processor's `to_str()` method for serialization. custom_logit_processor: Optional[Union[List[Optional[str]], str]] = None + ``` The `sampling_params` follows this format From 2eed9478c09c44346a70a0d78f21406f74ebc766 Mon Sep 17 00:00:00 2001 From: tongyu <119610311+tongyu0924@users.noreply.github.com> Date: Tue, 28 Jan 2025 02:02:34 +0800 Subject: [PATCH 3/5] Update sampling_params.md --- docs/references/sampling_params.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/references/sampling_params.md b/docs/references/sampling_params.md index 86fc78e9242..3dfa7eaf74d 100644 --- a/docs/references/sampling_params.md +++ b/docs/references/sampling_params.md @@ -3,7 +3,7 @@ This doc describes the sampling parameters of the SGLang Runtime. It is the low-level endpoint of the runtime. If you want a high-level endpoint that can automatically handle chat templates, consider using the [OpenAI Compatible API](../backend/openai_api_completions.ipynb). -The `/generate` endpoint accepts the following arguments in the JSON format. +The `/generate` endpoint accepts the following arguments in JSON format. These parameters allow customization of input data, output control, and configuration options. ```python @dataclass From 35e9453e984fb228bbc6e34e199647be0885ca3f Mon Sep 17 00:00:00 2001 From: tongyu <119610311+tongyu0924@users.noreply.github.com> Date: Tue, 28 Jan 2025 02:11:23 +0800 Subject: [PATCH 4/5] Update sampling_params.md --- docs/references/sampling_params.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/references/sampling_params.md b/docs/references/sampling_params.md index 3dfa7eaf74d..e63d77ba23e 100644 --- a/docs/references/sampling_params.md +++ b/docs/references/sampling_params.md @@ -59,7 +59,7 @@ class GenerateReqInput: ``` -The `sampling_params` follows this format +The `sampling_params` configure the generation process, including sampling strategies and output constraints. Below is a detailed breakdown of each parameter: ```python # The maximum number of output tokens From 5f5f4b08aa1f429500a83b24ea236f09d52261e5 Mon Sep 17 00:00:00 2001 From: tongyu <119610311+tongyu0924@users.noreply.github.com> Date: Tue, 28 Jan 2025 02:11:58 +0800 Subject: [PATCH 5/5] Update sampling_params.md --- docs/references/sampling_params.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/references/sampling_params.md b/docs/references/sampling_params.md index e63d77ba23e..013dfc40963 100644 --- a/docs/references/sampling_params.md +++ b/docs/references/sampling_params.md @@ -59,7 +59,7 @@ class GenerateReqInput: ``` -The `sampling_params` configure the generation process, including sampling strategies and output constraints. Below is a detailed breakdown of each parameter: +The `sampling_params` configure the generation process, including sampling strategies and output constraints. Below is a detailed breakdown of each parameter. ```python # The maximum number of output tokens