diff --git a/docs/references/sampling_params.md b/docs/references/sampling_params.md index 77d7c9f82e7..013dfc40963 100644 --- a/docs/references/sampling_params.md +++ b/docs/references/sampling_params.md @@ -3,52 +3,63 @@ This doc describes the sampling parameters of the SGLang Runtime. It is the low-level endpoint of the runtime. If you want a high-level endpoint that can automatically handle chat templates, consider using the [OpenAI Compatible API](../backend/openai_api_completions.ipynb). -The `/generate` endpoint accepts the following arguments in the JSON format. +The `/generate` endpoint accepts the following arguments in JSON format. These parameters allow customization of input data, output control, and configuration options. ```python @dataclass class GenerateReqInput: - # The input prompt. It can be a single prompt or a batch of prompts. + """Defines the input parameters for the /generate endpoint in SGLang.""" + # The input prompt, can be a single string or a batch of strings. text: Optional[Union[List[str], str]] = None - # The token ids for text; one can specify either text or input_ids + # The token IDs for the input prompt; can specify either `text` or `input_ids`. input_ids: Optional[Union[List[List[int]], List[int]]] = None - # The embeddings for input_ids; one can specify either text or input_ids or input_embeds. + # The embeddings for the input IDs; can specify either `text`, `input_ids`, or `input_embeds`. input_embeds: Optional[Union[List[List[List[float]]], List[List[float]]]] = None - # The image input. It can be a file name, a url, or base64 encoded string. - # See also python/sglang/srt/utils.py:load_image. + # Image input, can be a file name, a URL, or a base64-encoded string. + # Refer to python/sglang/srt/utils.py:load_image for details. image_data: Optional[Union[List[str], str]] = None - # The sampling_params. See descriptions below. + + # Sampling-related parameters + # Parameters that define the sampling behavior, described in detail below. sampling_params: Optional[Union[List[Dict], Dict]] = None - # The request id. + + ## Metadata + # Request ID to track the request (can be a single ID or a batch of IDs). rid: Optional[Union[List[str], str]] = None - # Whether to return logprobs. + # Whether to return log probabilities for generated tokens. return_logprob: Optional[Union[List[bool], bool]] = None - # If return logprobs, the start location in the prompt for returning logprobs. - # By default, this value is "-1", which means it will only return logprobs for output tokens. + # The starting position in the prompt for returning log probabilities. + # Defaults to "-1", which means it only returns log probabilities for output tokens. logprob_start_len: Optional[Union[List[int], int]] = None - # If return logprobs, the number of top logprobs to return at each position. + # The number of top log probabilities to return for each token position. top_logprobs_num: Optional[Union[List[int], int]] = None - # Whether to detokenize tokens in text in the returned logprobs. + # Whether to include detokenized text in the returned log probabilities. return_text_in_logprobs: bool = False - # Whether to stream output. + + ## Output streaming and logging + # Whether to stream the output in real-time. stream: bool = False - # Whether to log metrics for this request (e.g. health_generate calls do not log metrics) + # Whether to log metrics for this request (e.g., for health checks, metrics logging is disabled). log_metrics: bool = True - # The modalities of the image data [image, multi-images, video] + ## Multi-modal and session-related parameters + # Defines the modalities for image input (e.g., `image`, `multi-images`, or `video`). modalities: Optional[List[str]] = None - # LoRA related + # LoRA-related parameters for low-rank adaptation models. lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None - # Session info for continual prompting + ## Session and custom logit processor + # Session parameters for continual prompting across multiple requests. session_params: Optional[Union[List[Dict], Dict]] = None - # Custom logit processor for advanced sampling control. Must be a serialized instance - # of `CustomLogitProcessor` in python/sglang/srt/sampling/custom_logit_processor.py - # Use the processor's `to_str()` method to generate the serialized string. + # Custom logit processor for advanced sampling control. + # Must be a serialized instance of `CustomLogitProcessor` from + # python/sglang/srt/sampling/custom_logit_processor.py. + # Use the processor's `to_str()` method for serialization. custom_logit_processor: Optional[Union[List[Optional[str]], str]] = None + ``` -The `sampling_params` follows this format +The `sampling_params` configure the generation process, including sampling strategies and output constraints. Below is a detailed breakdown of each parameter. ```python # The maximum number of output tokens