Package inference

Interface GrpcService.ModelStatisticsOrBuilder

All Superinterfaces:
com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder
All Known Implementing Classes:
GrpcService.ModelStatistics, GrpcService.ModelStatistics.Builder
Enclosing class:
GrpcService

public static interface GrpcService.ModelStatisticsOrBuilder extends com.google.protobuf.MessageOrBuilder
  • Method Details

    • getName

      String getName()
      @@  .. cpp:var:: string name
      @@
      @@     The name of the model. If not given returns statistics for all
      @@
       
      string name = 1;
      Returns:
      The name.
    • getNameBytes

      com.google.protobuf.ByteString getNameBytes()
      @@  .. cpp:var:: string name
      @@
      @@     The name of the model. If not given returns statistics for all
      @@
       
      string name = 1;
      Returns:
      The bytes for name.
    • getVersion

      String getVersion()
      @@  .. cpp:var:: string version
      @@
      @@     The version of the model.
      @@
       
      string version = 2;
      Returns:
      The version.
    • getVersionBytes

      com.google.protobuf.ByteString getVersionBytes()
      @@  .. cpp:var:: string version
      @@
      @@     The version of the model.
      @@
       
      string version = 2;
      Returns:
      The bytes for version.
    • getLastInference

      long getLastInference()
      @@  .. cpp:var:: uint64 last_inference
      @@
      @@     The timestamp of the last inference request made for this model,
      @@     as milliseconds since the epoch.
      @@
       
      uint64 last_inference = 3;
      Returns:
      The lastInference.
    • getInferenceCount

      long getInferenceCount()
      @@  .. cpp:var:: uint64 last_inference
      @@
      @@     The cumulative count of successful inference requests made for this
      @@     model. Each inference in a batched request is counted as an
      @@     individual inference. For example, if a client sends a single
      @@     inference request with batch size 64, "inference_count" will be
      @@     incremented by 64. Similarly, if a clients sends 64 individual
      @@     requests each with batch size 1, "inference_count" will be
      @@     incremented by 64. The "inference_count" value DOES NOT include
      @@     cache hits.
      @@
       
      uint64 inference_count = 4;
      Returns:
      The inferenceCount.
    • getExecutionCount

      long getExecutionCount()
      @@  .. cpp:var:: uint64 last_inference
      @@
      @@     The cumulative count of the number of successful inference executions
      @@     performed for the model. When dynamic batching is enabled, a single
      @@     model execution can perform inferencing for more than one inference
      @@     request. For example, if a clients sends 64 individual requests each
      @@     with batch size 1 and the dynamic batcher batches them into a single
      @@     large batch for model execution then "execution_count" will be
      @@     incremented by 1. If, on the other hand, the dynamic batcher is not
      @@     enabled for that each of the 64 individual requests is executed
      @@     independently, then "execution_count" will be incremented by 64.
      @@     The "execution_count" value DOES NOT include cache hits.
      @@
       
      uint64 execution_count = 5;
      Returns:
      The executionCount.
    • hasInferenceStats

      boolean hasInferenceStats()
      @@  .. cpp:var:: InferStatistics inference_stats
      @@
      @@     The aggregate statistics for the model/version.
      @@
       
      .inference.InferStatistics inference_stats = 6;
      Returns:
      Whether the inferenceStats field is set.
    • getInferenceStats

      GrpcService.InferStatistics getInferenceStats()
      @@  .. cpp:var:: InferStatistics inference_stats
      @@
      @@     The aggregate statistics for the model/version.
      @@
       
      .inference.InferStatistics inference_stats = 6;
      Returns:
      The inferenceStats.
    • getInferenceStatsOrBuilder

      GrpcService.InferStatisticsOrBuilder getInferenceStatsOrBuilder()
      @@  .. cpp:var:: InferStatistics inference_stats
      @@
      @@     The aggregate statistics for the model/version.
      @@
       
      .inference.InferStatistics inference_stats = 6;
    • getBatchStatsList

      @@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
      @@
      @@     The aggregate statistics for each different batch size that is
      @@     executed in the model. The batch statistics indicate how many actual
      @@     model executions were performed and show differences due to different
      @@     batch size (for example, larger batches typically take longer to
      @@     compute).
      @@
       
      repeated .inference.InferBatchStatistics batch_stats = 7;
    • getBatchStats

      GrpcService.InferBatchStatistics getBatchStats(int index)
      @@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
      @@
      @@     The aggregate statistics for each different batch size that is
      @@     executed in the model. The batch statistics indicate how many actual
      @@     model executions were performed and show differences due to different
      @@     batch size (for example, larger batches typically take longer to
      @@     compute).
      @@
       
      repeated .inference.InferBatchStatistics batch_stats = 7;
    • getBatchStatsCount

      int getBatchStatsCount()
      @@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
      @@
      @@     The aggregate statistics for each different batch size that is
      @@     executed in the model. The batch statistics indicate how many actual
      @@     model executions were performed and show differences due to different
      @@     batch size (for example, larger batches typically take longer to
      @@     compute).
      @@
       
      repeated .inference.InferBatchStatistics batch_stats = 7;
    • getBatchStatsOrBuilderList

      List<? extends GrpcService.InferBatchStatisticsOrBuilder> getBatchStatsOrBuilderList()
      @@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
      @@
      @@     The aggregate statistics for each different batch size that is
      @@     executed in the model. The batch statistics indicate how many actual
      @@     model executions were performed and show differences due to different
      @@     batch size (for example, larger batches typically take longer to
      @@     compute).
      @@
       
      repeated .inference.InferBatchStatistics batch_stats = 7;
    • getBatchStatsOrBuilder

      GrpcService.InferBatchStatisticsOrBuilder getBatchStatsOrBuilder(int index)
      @@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
      @@
      @@     The aggregate statistics for each different batch size that is
      @@     executed in the model. The batch statistics indicate how many actual
      @@     model executions were performed and show differences due to different
      @@     batch size (for example, larger batches typically take longer to
      @@     compute).
      @@
       
      repeated .inference.InferBatchStatistics batch_stats = 7;
    • getMemoryUsageList

      List<GrpcService.MemoryUsage> getMemoryUsageList()
      @@  .. cpp:var:: MemoryUsage memory_usage (repeated)
      @@
      @@     The memory usage detected during model loading, which may be used to
      @@     estimate the memory to be released once the model is unloaded. Note
      @@     that the estimation is inferenced by the profiling tools and
      @@     framework's memory schema, therefore it is advised to perform
      @@     experiments to understand the scenario that the reported memory usage
      @@     can be relied on. As a starting point, the GPU memory usage for
      @@     models in ONNX Runtime backend and TensorRT backend is usually
      @@     aligned.
      @@
       
      repeated .inference.MemoryUsage memory_usage = 8;
    • getMemoryUsage

      GrpcService.MemoryUsage getMemoryUsage(int index)
      @@  .. cpp:var:: MemoryUsage memory_usage (repeated)
      @@
      @@     The memory usage detected during model loading, which may be used to
      @@     estimate the memory to be released once the model is unloaded. Note
      @@     that the estimation is inferenced by the profiling tools and
      @@     framework's memory schema, therefore it is advised to perform
      @@     experiments to understand the scenario that the reported memory usage
      @@     can be relied on. As a starting point, the GPU memory usage for
      @@     models in ONNX Runtime backend and TensorRT backend is usually
      @@     aligned.
      @@
       
      repeated .inference.MemoryUsage memory_usage = 8;
    • getMemoryUsageCount

      int getMemoryUsageCount()
      @@  .. cpp:var:: MemoryUsage memory_usage (repeated)
      @@
      @@     The memory usage detected during model loading, which may be used to
      @@     estimate the memory to be released once the model is unloaded. Note
      @@     that the estimation is inferenced by the profiling tools and
      @@     framework's memory schema, therefore it is advised to perform
      @@     experiments to understand the scenario that the reported memory usage
      @@     can be relied on. As a starting point, the GPU memory usage for
      @@     models in ONNX Runtime backend and TensorRT backend is usually
      @@     aligned.
      @@
       
      repeated .inference.MemoryUsage memory_usage = 8;
    • getMemoryUsageOrBuilderList

      List<? extends GrpcService.MemoryUsageOrBuilder> getMemoryUsageOrBuilderList()
      @@  .. cpp:var:: MemoryUsage memory_usage (repeated)
      @@
      @@     The memory usage detected during model loading, which may be used to
      @@     estimate the memory to be released once the model is unloaded. Note
      @@     that the estimation is inferenced by the profiling tools and
      @@     framework's memory schema, therefore it is advised to perform
      @@     experiments to understand the scenario that the reported memory usage
      @@     can be relied on. As a starting point, the GPU memory usage for
      @@     models in ONNX Runtime backend and TensorRT backend is usually
      @@     aligned.
      @@
       
      repeated .inference.MemoryUsage memory_usage = 8;
    • getMemoryUsageOrBuilder

      GrpcService.MemoryUsageOrBuilder getMemoryUsageOrBuilder(int index)
      @@  .. cpp:var:: MemoryUsage memory_usage (repeated)
      @@
      @@     The memory usage detected during model loading, which may be used to
      @@     estimate the memory to be released once the model is unloaded. Note
      @@     that the estimation is inferenced by the profiling tools and
      @@     framework's memory schema, therefore it is advised to perform
      @@     experiments to understand the scenario that the reported memory usage
      @@     can be relied on. As a starting point, the GPU memory usage for
      @@     models in ONNX Runtime backend and TensorRT backend is usually
      @@     aligned.
      @@
       
      repeated .inference.MemoryUsage memory_usage = 8;
    • getResponseStatsCount

      int getResponseStatsCount()
      @@  .. cpp:var:: map<string, InferResponseStatistics> response_stats
      @@
      @@     The key and value pairs for all responses statistics. The key is a
      @@     string identifying a set of response statistics aggregated together
      @@     (i.e. index of the response sent). The value is the aggregated
      @@     response statistics.
      @@
       
      map<string, .inference.InferResponseStatistics> response_stats = 9;
    • containsResponseStats

      boolean containsResponseStats(String key)
      @@  .. cpp:var:: map<string, InferResponseStatistics> response_stats
      @@
      @@     The key and value pairs for all responses statistics. The key is a
      @@     string identifying a set of response statistics aggregated together
      @@     (i.e. index of the response sent). The value is the aggregated
      @@     response statistics.
      @@
       
      map<string, .inference.InferResponseStatistics> response_stats = 9;
    • getResponseStats

      Deprecated.
    • getResponseStatsMap

      @@  .. cpp:var:: map<string, InferResponseStatistics> response_stats
      @@
      @@     The key and value pairs for all responses statistics. The key is a
      @@     string identifying a set of response statistics aggregated together
      @@     (i.e. index of the response sent). The value is the aggregated
      @@     response statistics.
      @@
       
      map<string, .inference.InferResponseStatistics> response_stats = 9;
    • getResponseStatsOrDefault

      @@  .. cpp:var:: map<string, InferResponseStatistics> response_stats
      @@
      @@     The key and value pairs for all responses statistics. The key is a
      @@     string identifying a set of response statistics aggregated together
      @@     (i.e. index of the response sent). The value is the aggregated
      @@     response statistics.
      @@
       
      map<string, .inference.InferResponseStatistics> response_stats = 9;
    • getResponseStatsOrThrow

      GrpcService.InferResponseStatistics getResponseStatsOrThrow(String key)
      @@  .. cpp:var:: map<string, InferResponseStatistics> response_stats
      @@
      @@     The key and value pairs for all responses statistics. The key is a
      @@     string identifying a set of response statistics aggregated together
      @@     (i.e. index of the response sent). The value is the aggregated
      @@     response statistics.
      @@
       
      map<string, .inference.InferResponseStatistics> response_stats = 9;