From 9c615f786c4f36ccb930549079b3481682f20ff1 Mon Sep 17 00:00:00 2001
From: Jeong Ukjae <JeongUkJae@gmail.com>
Date: Thu, 15 Jun 2023 16:41:33 +0900
Subject: [PATCH] record runtime latency with signature name

---
 tensorflow_serving/servables/tensorflow/classifier.cc   | 6 ++++--
 tensorflow_serving/servables/tensorflow/predict_util.cc | 3 ++-
 tensorflow_serving/servables/tensorflow/regressor.cc    | 5 +++--
 tensorflow_serving/servables/tensorflow/util.cc         | 9 ++++++---
 tensorflow_serving/servables/tensorflow/util.h          | 3 ++-
 5 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/tensorflow_serving/servables/tensorflow/classifier.cc b/tensorflow_serving/servables/tensorflow/classifier.cc
index e6a2948ab33..c700b2bb498 100644
--- a/tensorflow_serving/servables/tensorflow/classifier.cc
+++ b/tensorflow_serving/servables/tensorflow/classifier.cc
@@ -74,8 +74,10 @@ class SavedModelTensorFlowClassifier : public ClassifierInterface {
         run_options_, request.input(), input_tensor_name, output_tensor_names,
         session_, &outputs, &num_examples, thread_pool_options_,
         &runtime_latency));
-    RecordRuntimeLatency(request.model_spec().name(), /*api=*/"Classify",
-                         /*runtime=*/"TF1", runtime_latency);
+    RecordRuntimeLatency(request.model_spec().name(),
+                         /*signature_name=*/"tensorflow/serving/classify",
+                         /*api=*/"Classify", /*runtime=*/"TF1",
+                         runtime_latency);
 
     TRACELITERAL("ConvertToClassificationResult");
     return PostProcessClassificationResult(
diff --git a/tensorflow_serving/servables/tensorflow/predict_util.cc b/tensorflow_serving/servables/tensorflow/predict_util.cc
index 639fb020e30..05b0776f584 100644
--- a/tensorflow_serving/servables/tensorflow/predict_util.cc
+++ b/tensorflow_serving/servables/tensorflow/predict_util.cc
@@ -104,7 +104,8 @@ Status RunPredict(
                                   output_tensor_names, {}, &outputs,
                                   &run_metadata, thread_pool_options));
   const uint64_t end_microseconds = EnvTime::NowMicros();
-  RecordRuntimeLatency(request.model_spec().name(), /*api=*/"Predict",
+  RecordRuntimeLatency(request.model_spec().name(),
+                       /*signature_name=*/signature_name, /*api=*/"Predict",
                        /*runtime=*/"TF1",
                        end_microseconds - start_microseconds);
 
diff --git a/tensorflow_serving/servables/tensorflow/regressor.cc b/tensorflow_serving/servables/tensorflow/regressor.cc
index ab1e4e3ef2a..c336a2756df 100644
--- a/tensorflow_serving/servables/tensorflow/regressor.cc
+++ b/tensorflow_serving/servables/tensorflow/regressor.cc
@@ -73,8 +73,9 @@ class SavedModelTensorFlowRegressor : public RegressorInterface {
         run_options_, request.input(), input_tensor_name, output_tensor_names,
         session_, &outputs, &num_examples, thread_pool_options_,
         &runtime_latency));
-    RecordRuntimeLatency(request.model_spec().name(), /*api=*/"Regress",
-                         /*runtime=*/"TF1", runtime_latency);
+    RecordRuntimeLatency(request.model_spec().name(),
+                         /*signature_name=*/"tensorflow/serving/regress",
+                         /*api=*/"Regress", /*runtime=*/"TF1", runtime_latency);
 
     TRACELITERAL("ConvertToRegressionResult");
     return PostProcessRegressionResult(*signature_, num_examples,
diff --git a/tensorflow_serving/servables/tensorflow/util.cc b/tensorflow_serving/servables/tensorflow/util.cc
index ccc9575f783..118439ca06c 100644
--- a/tensorflow_serving/servables/tensorflow/util.cc
+++ b/tensorflow_serving/servables/tensorflow/util.cc
@@ -60,11 +60,12 @@ auto* model_request_status_count_total = monitoring::Counter<2>::New(
     "/tensorflow/serving/request_count", "The total number of requests.",
     "model_name", "status");
 
-auto* runtime_latency = monitoring::Sampler<3>::New(
+auto* runtime_latency = monitoring::Sampler<4>::New(
     {
         "/tensorflow/serving/runtime_latency",
         "Distribution of wall time (in microseconds) for Tensorflow runtime.",
         "model_name",
+        "signature_name",
         "API",
         "runtime",
     },  // Scale of 10, power of 1.8 with bucket count 33 (~20 minutes).
@@ -341,9 +342,11 @@ Status EstimateResourceFromPathUsingDiskState(const string& path,
   return OkStatus();
 }
 
-void RecordRuntimeLatency(const string& model_name, const string& api,
+void RecordRuntimeLatency(const string& model_name,
+                          const string& signature_name, const string& api,
                           const string& runtime, int64_t latency_usec) {
-  runtime_latency->GetCell(model_name, api, runtime)->Add(latency_usec);
+  runtime_latency->GetCell(model_name, signature_name, api, runtime)->Add(
+      latency_usec);
 }
 
 void RecordRequestLatency(const string& model_name, const string& api,
diff --git a/tensorflow_serving/servables/tensorflow/util.h b/tensorflow_serving/servables/tensorflow/util.h
index 692d0f7caa4..dda3a9ff44e 100644
--- a/tensorflow_serving/servables/tensorflow/util.h
+++ b/tensorflow_serving/servables/tensorflow/util.h
@@ -115,7 +115,8 @@ Status EstimateResourceFromPathUsingDiskState(const string& path,
                                               ResourceAllocation* estimate);
 
 // Update metrics for runtime latency.
-void RecordRuntimeLatency(const string& model_name, const string& api,
+void RecordRuntimeLatency(const string& model_name,
+                          const string& signature_name, const string& api,
                           const string& runtime, int64_t latency_usec);
 
 // Update metrics for request latency.