triton-inference-server · yeahdongcn · May 6, 2023
diff --git a/protobuf/grpc_service.proto b/protobuf/grpc_service.proto
@@ -429,6 +429,13 @@ message ModelMetadataResponse
   //@@     The model's outputs.
   //@@
   repeated TensorMetadata outputs = 5;
+
+  //@@
+  //@@  .. cpp:var:: string card
+  //@@
+  //@@     The model card.
+  //@@
+  string card = 6;
 }
 
 //@@
@@ -923,7 +930,7 @@ message InferStatistics
   //@@  .. cpp:var:: StatisticDuration queue
   //@@
   //@@     The count and cumulative duration that inference requests wait in
-  //@@     scheduling or other queues. The "queue" count and cumulative 
+  //@@     scheduling or other queues. The "queue" count and cumulative
   //@@     duration includes cache hits.
   //@@
   StatisticDuration queue = 3;
@@ -965,7 +972,7 @@ message InferStatistics
   //@@     and extract output tensor data from the Response Cache on a cache
   //@@     hit. For example, this duration should include the time to copy
   //@@     output tensor data from the Response Cache to the response object.
-  //@@     On cache hits, triton does not need to go to the model/backend 
+  //@@     On cache hits, triton does not need to go to the model/backend
   //@@     for the output tensor data, so the "compute_input", "compute_infer",
   //@@     and "compute_output" fields are not updated. Assuming the response
   //@@     cache is enabled for a given model, a cache hit occurs for a