Skip to content

Commit

Permalink
Stricter tracing inclusions (#738)
Browse files Browse the repository at this point in the history
  • Loading branch information
noyoshi authored Jan 17, 2025
1 parent 9bab135 commit 30c97cb
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 12 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/router_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: 1.79.0
toolchain: 1.83.0
override: true
components: rustfmt, clippy
- name: Install Protoc
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Rust builder
FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef
FROM lukemathwalker/cargo-chef:latest-rust-1.83 AS chef
WORKDIR /usr/src

ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
Expand Down
14 changes: 7 additions & 7 deletions router/src/infer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ impl Infer {
}

/// Add a new request to the queue and return a stream of InferStreamResponse
#[instrument(skip(self))]
#[instrument(skip_all,fields(parameters = ? request.parameters))]
pub(crate) async fn generate_stream(
&self,
request: GenerateRequest,
Expand Down Expand Up @@ -400,7 +400,7 @@ impl Infer {
}

/// Add a new request to the queue and return a InferResponse
#[instrument(skip(self))]
#[instrument(skip_all,fields(parameters = ? request.parameters))]
pub(crate) async fn generate(
&self,
request: GenerateRequest,
Expand Down Expand Up @@ -488,7 +488,7 @@ impl Infer {
}
}

#[instrument(skip(self))]
#[instrument(skip_all,fields(parameters = ? request.parameters))]
pub(crate) async fn embed(&self, request: EmbedRequest) -> Result<EmbedResponse, InferError> {
// Limit concurrent requests by acquiring a permit from the semaphore
let _permit = self
Expand Down Expand Up @@ -618,7 +618,7 @@ impl Infer {
}
}

#[instrument(skip(self))]
#[instrument(skip_all)]
pub(crate) async fn classify(
&self,
request: ClassifyRequest,
Expand Down Expand Up @@ -731,7 +731,7 @@ impl Infer {
}
}

#[instrument(skip(self))]
#[instrument(skip_all)]
pub(crate) async fn classify_batch(
&self,
request: BatchClassifyRequest,
Expand Down Expand Up @@ -861,7 +861,7 @@ impl Infer {
}
}

#[instrument(skip(self))]
#[instrument(skip_all,fields(parameters = ? request.parameters))]
pub(crate) async fn embed_batch(
&self,
request: BatchEmbedRequest,
Expand Down Expand Up @@ -996,7 +996,7 @@ impl Infer {

/// Add best_of new requests to the queue and return a InferResponse of the sequence with
/// the highest log probability per token
#[instrument(skip(self))]
#[instrument(skip_all,fields(parameters = ? request.parameters, best_of, prefix_caching))]
pub(crate) async fn generate_best_of(
&self,
request: GenerateRequest,
Expand Down
38 changes: 35 additions & 3 deletions router/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,18 @@ example = json ! ({"error": "Input validation error"})),
example = json ! ({"error": "Incomplete generation"})),
)
)]
#[instrument(skip(infer, req))]
#[instrument(
skip_all,
fields(
parameters = ? req.0.parameters,
total_time,
validation_time,
queue_time,
inference_time,
time_per_token,
seed,
)
)]
async fn compat_generate(
default_return_full_text: Extension<bool>,
infer: Extension<Infer>,
Expand All @@ -91,6 +102,10 @@ async fn compat_generate(
req_headers: HeaderMap,
req: Json<CompatGenerateRequest>,
) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
// Log some useful headers to the span.
let span = tracing::Span::current();
trace_headers(req_headers.clone(), &span);

let mut req = req.0;

// default return_full_text given the pipeline_tag
Expand Down Expand Up @@ -147,7 +162,7 @@ example = json ! ({"error": "Input validation error"})),
example = json ! ({"error": "Incomplete generation"})),
)
)]
#[instrument(skip(infer, req))]
#[instrument(skip(infer, req, req_headers))]
async fn completions_v1(
default_return_full_text: Extension<bool>,
infer: Extension<Infer>,
Expand All @@ -158,6 +173,8 @@ async fn completions_v1(
req_headers: HeaderMap,
req: Json<CompletionRequest>,
) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
let span = tracing::Span::current();
trace_headers(req_headers.clone(), &span);
let mut req = req.0;
if req.model == info.model_id.as_str() {
// Allow user to specify the base model, but treat it as an empty adapter_id
Expand Down Expand Up @@ -232,7 +249,7 @@ example = json ! ({"error": "Input validation error"})),
example = json ! ({"error": "Incomplete generation"})),
)
)]
#[instrument(skip(infer, req))]
#[instrument(skip(infer, req, req_headers))]
async fn chat_completions_v1(
default_return_full_text: Extension<bool>,
infer: Extension<Infer>,
Expand All @@ -243,6 +260,8 @@ async fn chat_completions_v1(
req_headers: HeaderMap,
req: Json<ChatCompletionRequest>,
) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
let span = tracing::Span::current();
trace_headers(req_headers.clone(), &span);
let mut req = req.0;
let model_id = info.model_id.clone();
if req.model == info.model_id.as_str() {
Expand Down Expand Up @@ -632,6 +651,7 @@ async fn generate(
mut req: Json<GenerateRequest>,
) -> Result<(HeaderMap, Json<GenerateResponse>), (StatusCode, Json<ErrorResponse>)> {
let span = tracing::Span::current();
trace_headers(req_headers.clone(), &span);
let start_time = Instant::now();
metrics::increment_counter!("lorax_request_count");

Expand Down Expand Up @@ -2058,3 +2078,15 @@ async fn tokenize(
))
}
}

fn trace_headers(headers: HeaderMap, span: &tracing::Span) {
headers
.get("x-predibase-tenant")
.map(|value| span.record("x-predibase-tenant", value.to_str().unwrap_or("unknown")));
headers
.get("user-agent")
.map(|value| span.record("user-agent", value.to_str().unwrap_or("unknown")));
headers
.get("x-b3-traceid")
.map(|value| span.record("x-b3-traceid", value.to_str().unwrap_or("")));
}

0 comments on commit 30c97cb

Please sign in to comment.