From 8a5857ecf4b40387c724bf1a4bc005342706b9b9 Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Sat, 11 Jan 2025 08:31:04 +0900 Subject: [PATCH 1/8] skipping a lot more input fields to the default tracing --- router/src/infer.rs | 14 +++++++------- router/src/server.rs | 18 ++++++++++++++---- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/router/src/infer.rs b/router/src/infer.rs index 703dacd46..5b39ec37d 100644 --- a/router/src/infer.rs +++ b/router/src/infer.rs @@ -276,7 +276,7 @@ impl Infer { } /// Add a new request to the queue and return a stream of InferStreamResponse - #[instrument(skip(self))] + #[instrument(skip_all,fields(parameters = ? request.parameters))] pub(crate) async fn generate_stream( &self, request: GenerateRequest, @@ -400,7 +400,7 @@ impl Infer { } /// Add a new request to the queue and return a InferResponse - #[instrument(skip(self))] + #[instrument(skip_all,fields(parameters = ? request.parameters))] pub(crate) async fn generate( &self, request: GenerateRequest, @@ -488,7 +488,7 @@ impl Infer { } } - #[instrument(skip(self))] + #[instrument(skip_all,fields(parameters = ? request.parameters))] pub(crate) async fn embed(&self, request: EmbedRequest) -> Result { // Limit concurrent requests by acquiring a permit from the semaphore let _permit = self @@ -618,7 +618,7 @@ impl Infer { } } - #[instrument(skip(self))] + #[instrument(skip_all)] pub(crate) async fn classify( &self, request: ClassifyRequest, @@ -731,7 +731,7 @@ impl Infer { } } - #[instrument(skip(self))] + #[instrument(skip_all)] pub(crate) async fn classify_batch( &self, request: BatchClassifyRequest, @@ -861,7 +861,7 @@ impl Infer { } } - #[instrument(skip(self))] + #[instrument(skip_all,fields(parameters = ? request.parameters))] pub(crate) async fn embed_batch( &self, request: BatchEmbedRequest, @@ -996,7 +996,7 @@ impl Infer { /// Add best_of new requests to the queue and return a InferResponse of the sequence with /// the highest log probability per token - #[instrument(skip(self))] + #[instrument(skip_all,fields(parameters = ? request.parameters, best_of, prefix_caching))] pub(crate) async fn generate_best_of( &self, request: GenerateRequest, diff --git a/router/src/server.rs b/router/src/server.rs index 3eb24521d..89a2da991 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -80,8 +80,18 @@ example = json ! ({"error": "Input validation error"})), example = json ! ({"error": "Incomplete generation"})), ) )] -#[instrument(skip(infer, req))] -async fn compat_generate( +#[instrument( +skip_all, +fields( +parameters = ? req.0.parameters, +total_time, +validation_time, +queue_time, +inference_time, +time_per_token, +seed, +) +)]async fn compat_generate( default_return_full_text: Extension, infer: Extension, info: Extension, @@ -147,7 +157,7 @@ example = json ! ({"error": "Input validation error"})), example = json ! ({"error": "Incomplete generation"})), ) )] -#[instrument(skip(infer, req))] +#[instrument(skip(infer, req, req_headers))] async fn completions_v1( default_return_full_text: Extension, infer: Extension, @@ -232,7 +242,7 @@ example = json ! ({"error": "Input validation error"})), example = json ! ({"error": "Incomplete generation"})), ) )] -#[instrument(skip(infer, req))] +#[instrument(skip(infer, req, req_headers))] async fn chat_completions_v1( default_return_full_text: Extension, infer: Extension, From f0a7d9f9069eaa6d88d76bc557b2cbb5ce64d7e2 Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Sat, 11 Jan 2025 08:47:13 +0900 Subject: [PATCH 2/8] added useful header logging --- router/src/server.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/router/src/server.rs b/router/src/server.rs index 89a2da991..b64b1f22a 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -91,7 +91,8 @@ inference_time, time_per_token, seed, ) -)]async fn compat_generate( +)] +async fn compat_generate( default_return_full_text: Extension, infer: Extension, info: Extension, @@ -101,6 +102,10 @@ seed, req_headers: HeaderMap, req: Json, ) -> Result)> { + // Log some useful headers to the span. + let span = tracing::Span::current(); + trace_headers(req_headers, &span); + let mut req = req.0; // default return_full_text given the pipeline_tag @@ -168,6 +173,8 @@ async fn completions_v1( req_headers: HeaderMap, req: Json, ) -> Result)> { + let span = tracing::Span::current(); + trace_headers(req_headers, &span); let mut req = req.0; if req.model == info.model_id.as_str() { // Allow user to specify the base model, but treat it as an empty adapter_id @@ -253,6 +260,8 @@ async fn chat_completions_v1( req_headers: HeaderMap, req: Json, ) -> Result)> { + let span = tracing::Span::current(); + trace_headers(req_headers, &span); let mut req = req.0; let model_id = info.model_id.clone(); if req.model == info.model_id.as_str() { @@ -642,6 +651,7 @@ async fn generate( mut req: Json, ) -> Result<(HeaderMap, Json), (StatusCode, Json)> { let span = tracing::Span::current(); + trace_headers(req_headers, &span); let start_time = Instant::now(); metrics::increment_counter!("lorax_request_count"); @@ -2068,3 +2078,15 @@ async fn tokenize( )) } } + +fn trace_headers(headers: HeaderMap, span: &tracing::Span) { + headers + .get("x-predibase-tenant") + .map(|value| span.record("x-predibase-tenant", value)); + headers + .get("user-agent") + .map(|value| span.record("user-agent", value)); + headers + .get("x-b3-traceid") + .map(|value| span.record("x-b3-traceid", value)); +} From 5751b1dda89f17a28e770c5eb7a9842c856cc98e Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Tue, 14 Jan 2025 05:38:06 +0900 Subject: [PATCH 3/8] fix build --- router/src/server.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/router/src/server.rs b/router/src/server.rs index b64b1f22a..fff46bc8c 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -2079,6 +2079,15 @@ async fn tokenize( } } +// Implements Tracing Value for Header Value, to support recording header values +impl tracing::Value for HeaderValue { + fn record(&self, key: &tracing::field::Field, visitor: &mut dyn Visit) { + // Convert HeaderValue to a string representation safely + let value_str = self.to_str().unwrap_or(""); + key.record_str(value_str, visitor); + } +} + fn trace_headers(headers: HeaderMap, span: &tracing::Span) { headers .get("x-predibase-tenant") From 157e80908e78db0dd1fecf88513fe7543aa43ec0 Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Fri, 17 Jan 2025 09:00:06 +0900 Subject: [PATCH 4/8] bump rust version --- .github/workflows/router_tests.yaml | 2 +- Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/router_tests.yaml b/.github/workflows/router_tests.yaml index 6eb95f130..404b481d6 100644 --- a/.github/workflows/router_tests.yaml +++ b/.github/workflows/router_tests.yaml @@ -29,7 +29,7 @@ jobs: - name: Install Rust uses: actions-rs/toolchain@v1 with: - toolchain: 1.79.0 + toolchain: 1.83.0 override: true components: rustfmt, clippy - name: Install Protoc diff --git a/Dockerfile b/Dockerfile index eccefae58..0988daf58 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Rust builder -FROM lukemathwalker/cargo-chef:latest-rust-1.79 AS chef +FROM lukemathwalker/cargo-chef:latest-rust-1.83 AS chef WORKDIR /usr/src ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse From a18253dce457003d144d168d8bd51356619a3a3d Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Fri, 17 Jan 2025 09:16:27 +0900 Subject: [PATCH 5/8] fix imports / types --- router/src/server.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/router/src/server.rs b/router/src/server.rs index fff46bc8c..49766f081 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -22,7 +22,7 @@ use crate::{ use axum::extract::Extension; use axum::extract::Query; use axum::http::header; -use axum::http::{HeaderMap, Method, StatusCode}; +use axum::http::{HeaderMap, HeaderValue, Method, StatusCode}; use axum::response::sse::{Event, KeepAlive, Sse}; use axum::response::{IntoResponse, Response}; use axum::routing::{get, post}; @@ -2081,7 +2081,7 @@ async fn tokenize( // Implements Tracing Value for Header Value, to support recording header values impl tracing::Value for HeaderValue { - fn record(&self, key: &tracing::field::Field, visitor: &mut dyn Visit) { + fn record(&self, key: &tracing::field::Field, visitor: &mut dyn tracing::field::Visit) { // Convert HeaderValue to a string representation safely let value_str = self.to_str().unwrap_or(""); key.record_str(value_str, visitor); From a7cae1a7803b5533f1962f5b8ba41ada9105dc77 Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Fri, 17 Jan 2025 09:37:56 +0900 Subject: [PATCH 6/8] fix --- router/src/server.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/router/src/server.rs b/router/src/server.rs index 49766f081..3bd33912f 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -104,7 +104,7 @@ async fn compat_generate( ) -> Result)> { // Log some useful headers to the span. let span = tracing::Span::current(); - trace_headers(req_headers, &span); + trace_headers(req_headers.clone(), &span); let mut req = req.0; @@ -174,7 +174,7 @@ async fn completions_v1( req: Json, ) -> Result)> { let span = tracing::Span::current(); - trace_headers(req_headers, &span); + trace_headers(req_headers.clone(), &span); let mut req = req.0; if req.model == info.model_id.as_str() { // Allow user to specify the base model, but treat it as an empty adapter_id @@ -261,7 +261,7 @@ async fn chat_completions_v1( req: Json, ) -> Result)> { let span = tracing::Span::current(); - trace_headers(req_headers, &span); + trace_headers(req_headers.clone(), &span); let mut req = req.0; let model_id = info.model_id.clone(); if req.model == info.model_id.as_str() { @@ -651,7 +651,7 @@ async fn generate( mut req: Json, ) -> Result<(HeaderMap, Json), (StatusCode, Json)> { let span = tracing::Span::current(); - trace_headers(req_headers, &span); + trace_headers(req_headers.clone(), &span); let start_time = Instant::now(); metrics::increment_counter!("lorax_request_count"); @@ -2084,7 +2084,7 @@ impl tracing::Value for HeaderValue { fn record(&self, key: &tracing::field::Field, visitor: &mut dyn tracing::field::Visit) { // Convert HeaderValue to a string representation safely let value_str = self.to_str().unwrap_or(""); - key.record_str(value_str, visitor); + visitor.record_str(key, value_str); } } From 6125c1809f89854a0b639413e9a05177aeda49a6 Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Fri, 17 Jan 2025 09:42:28 +0900 Subject: [PATCH 7/8] attempt to fix it --- router/src/server.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/router/src/server.rs b/router/src/server.rs index 3bd33912f..712bdd373 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -22,7 +22,7 @@ use crate::{ use axum::extract::Extension; use axum::extract::Query; use axum::http::header; -use axum::http::{HeaderMap, HeaderValue, Method, StatusCode}; +use axum::http::{HeaderMap, Method, StatusCode}; use axum::response::sse::{Event, KeepAlive, Sse}; use axum::response::{IntoResponse, Response}; use axum::routing::{get, post}; @@ -2080,7 +2080,7 @@ async fn tokenize( } // Implements Tracing Value for Header Value, to support recording header values -impl tracing::Value for HeaderValue { +impl tracing::Value for axum::http::HeaderValue { fn record(&self, key: &tracing::field::Field, visitor: &mut dyn tracing::field::Visit) { // Convert HeaderValue to a string representation safely let value_str = self.to_str().unwrap_or(""); From ec8fe7dbb7a42051022f3ab075de99bc0e88e8e9 Mon Sep 17 00:00:00 2001 From: Noah Yoshida Date: Fri, 17 Jan 2025 09:47:13 +0900 Subject: [PATCH 8/8] record string value --- router/src/server.rs | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/router/src/server.rs b/router/src/server.rs index 712bdd373..8f9ac505e 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -2079,23 +2079,14 @@ async fn tokenize( } } -// Implements Tracing Value for Header Value, to support recording header values -impl tracing::Value for axum::http::HeaderValue { - fn record(&self, key: &tracing::field::Field, visitor: &mut dyn tracing::field::Visit) { - // Convert HeaderValue to a string representation safely - let value_str = self.to_str().unwrap_or(""); - visitor.record_str(key, value_str); - } -} - fn trace_headers(headers: HeaderMap, span: &tracing::Span) { headers .get("x-predibase-tenant") - .map(|value| span.record("x-predibase-tenant", value)); + .map(|value| span.record("x-predibase-tenant", value.to_str().unwrap_or("unknown"))); headers .get("user-agent") - .map(|value| span.record("user-agent", value)); + .map(|value| span.record("user-agent", value.to_str().unwrap_or("unknown"))); headers .get("x-b3-traceid") - .map(|value| span.record("x-b3-traceid", value)); + .map(|value| span.record("x-b3-traceid", value.to_str().unwrap_or(""))); }