All Superinterfaces:: com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder

All Known Implementing Classes:: GrpcService.ModelStatistics, GrpcService.ModelStatistics.Builder

Enclosing class:: GrpcService

public static interface GrpcService.ModelStatisticsOrBuilder extends com.google.protobuf.MessageOrBuilder

Method Summary

Modifier and Type

Method

Description

boolean

containsResponseStats(String key)

@@ ..

GrpcService.InferBatchStatistics

getBatchStats(int index)

@@ ..

int

getBatchStatsCount()

@@ ..

List<GrpcService.InferBatchStatistics>

getBatchStatsList()

@@ ..

GrpcService.InferBatchStatisticsOrBuilder

getBatchStatsOrBuilder(int index)

@@ ..

List<? extends GrpcService.InferBatchStatisticsOrBuilder>

getBatchStatsOrBuilderList()

@@ ..

long

getExecutionCount()

@@ ..

long

getInferenceCount()

@@ ..

GrpcService.InferStatistics

getInferenceStats()

@@ ..

GrpcService.InferStatisticsOrBuilder

getInferenceStatsOrBuilder()

@@ ..

long

getLastInference()

@@ ..

GrpcService.MemoryUsage

getMemoryUsage(int index)

@@ ..

int

getMemoryUsageCount()

@@ ..

List<GrpcService.MemoryUsage>

getMemoryUsageList()

@@ ..

GrpcService.MemoryUsageOrBuilder

getMemoryUsageOrBuilder(int index)

@@ ..

List<? extends GrpcService.MemoryUsageOrBuilder>

getMemoryUsageOrBuilderList()

@@ ..

String

getName()

@@ ..

com.google.protobuf.ByteString

getNameBytes()

@@ ..

Map<String,GrpcService.InferResponseStatistics>

getResponseStats()

Deprecated.

int

getResponseStatsCount()

@@ ..

Map<String,GrpcService.InferResponseStatistics>

getResponseStatsMap()

@@ ..

GrpcService.InferResponseStatistics

getResponseStatsOrDefault(String key, GrpcService.InferResponseStatistics defaultValue)

@@ ..

GrpcService.InferResponseStatistics

getResponseStatsOrThrow(String key)

@@ ..

String

getVersion()

@@ ..

com.google.protobuf.ByteString

getVersionBytes()

@@ ..

boolean

hasInferenceStats()

@@ ..

Methods inherited from interface com.google.protobuf.MessageLiteOrBuilder
isInitialized

Methods inherited from interface com.google.protobuf.MessageOrBuilder
findInitializationErrors, getAllFields, getDefaultInstanceForType, getDescriptorForType, getField, getInitializationErrorString, getOneofFieldDescriptor, getRepeatedField, getRepeatedFieldCount, getUnknownFields, hasField, hasOneof

Method Details

getName

String getName()

@@  .. cpp:var:: string name
@@
@@     The name of the model. If not given returns statistics for all
@@

string name = 1;

Returns:: The name.

getNameBytes

com.google.protobuf.ByteString getNameBytes()

@@  .. cpp:var:: string name
@@
@@     The name of the model. If not given returns statistics for all
@@

string name = 1;

Returns:: The bytes for name.

getVersion

String getVersion()

@@  .. cpp:var:: string version
@@
@@     The version of the model.
@@

string version = 2;

Returns:: The version.

getVersionBytes

com.google.protobuf.ByteString getVersionBytes()
```
@@  .. cpp:var:: string version
@@
@@     The version of the model.
@@
 
```
string version = 2;
Returns:

The bytes for version.

getLastInference

long getLastInference()

@@  .. cpp:var:: uint64 last_inference
@@
@@     The timestamp of the last inference request made for this model,
@@     as milliseconds since the epoch.
@@

uint64 last_inference = 3;

Returns:: The lastInference.

getInferenceCount

long getInferenceCount()

@@  .. cpp:var:: uint64 last_inference
@@
@@     The cumulative count of successful inference requests made for this
@@     model. Each inference in a batched request is counted as an
@@     individual inference. For example, if a client sends a single
@@     inference request with batch size 64, "inference_count" will be
@@     incremented by 64. Similarly, if a clients sends 64 individual
@@     requests each with batch size 1, "inference_count" will be
@@     incremented by 64. The "inference_count" value DOES NOT include
@@     cache hits.
@@

uint64 inference_count = 4;

Returns:: The inferenceCount.

getExecutionCount

long getExecutionCount()

@@  .. cpp:var:: uint64 last_inference
@@
@@     The cumulative count of the number of successful inference executions
@@     performed for the model. When dynamic batching is enabled, a single
@@     model execution can perform inferencing for more than one inference
@@     request. For example, if a clients sends 64 individual requests each
@@     with batch size 1 and the dynamic batcher batches them into a single
@@     large batch for model execution then "execution_count" will be
@@     incremented by 1. If, on the other hand, the dynamic batcher is not
@@     enabled for that each of the 64 individual requests is executed
@@     independently, then "execution_count" will be incremented by 64.
@@     The "execution_count" value DOES NOT include cache hits.
@@

uint64 execution_count = 5;

Returns:: The executionCount.

hasInferenceStats

boolean hasInferenceStats()
```
@@  .. cpp:var:: InferStatistics inference_stats
@@
@@     The aggregate statistics for the model/version.
@@
 
```
.inference.InferStatistics inference_stats = 6;
Returns:

Whether the inferenceStats field is set.
getInferenceStats

GrpcService.InferStatistics getInferenceStats()
```
@@  .. cpp:var:: InferStatistics inference_stats
@@
@@     The aggregate statistics for the model/version.
@@
 
```
.inference.InferStatistics inference_stats = 6;
Returns:

The inferenceStats.
getInferenceStatsOrBuilder

GrpcService.InferStatisticsOrBuilder getInferenceStatsOrBuilder()
```
@@  .. cpp:var:: InferStatistics inference_stats
@@
@@     The aggregate statistics for the model/version.
@@
 
```
.inference.InferStatistics inference_stats = 6;

getBatchStatsList

List<GrpcService.InferBatchStatistics> getBatchStatsList()

@@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
@@
@@     The aggregate statistics for each different batch size that is
@@     executed in the model. The batch statistics indicate how many actual
@@     model executions were performed and show differences due to different
@@     batch size (for example, larger batches typically take longer to
@@     compute).
@@

repeated .inference.InferBatchStatistics batch_stats = 7;

getBatchStats

GrpcService.InferBatchStatistics getBatchStats(int index)

@@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
@@
@@     The aggregate statistics for each different batch size that is
@@     executed in the model. The batch statistics indicate how many actual
@@     model executions were performed and show differences due to different
@@     batch size (for example, larger batches typically take longer to
@@     compute).
@@

repeated .inference.InferBatchStatistics batch_stats = 7;

getBatchStatsCount

int getBatchStatsCount()

@@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
@@
@@     The aggregate statistics for each different batch size that is
@@     executed in the model. The batch statistics indicate how many actual
@@     model executions were performed and show differences due to different
@@     batch size (for example, larger batches typically take longer to
@@     compute).
@@

repeated .inference.InferBatchStatistics batch_stats = 7;

getBatchStatsOrBuilderList

List<? extends GrpcService.InferBatchStatisticsOrBuilder> getBatchStatsOrBuilderList()

@@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
@@
@@     The aggregate statistics for each different batch size that is
@@     executed in the model. The batch statistics indicate how many actual
@@     model executions were performed and show differences due to different
@@     batch size (for example, larger batches typically take longer to
@@     compute).
@@

repeated .inference.InferBatchStatistics batch_stats = 7;

getBatchStatsOrBuilder

GrpcService.InferBatchStatisticsOrBuilder getBatchStatsOrBuilder(int index)

@@  .. cpp:var:: InferBatchStatistics batch_stats (repeated)
@@
@@     The aggregate statistics for each different batch size that is
@@     executed in the model. The batch statistics indicate how many actual
@@     model executions were performed and show differences due to different
@@     batch size (for example, larger batches typically take longer to
@@     compute).
@@

repeated .inference.InferBatchStatistics batch_stats = 7;

getMemoryUsageList

List<GrpcService.MemoryUsage> getMemoryUsageList()

@@  .. cpp:var:: MemoryUsage memory_usage (repeated)
@@
@@     The memory usage detected during model loading, which may be used to
@@     estimate the memory to be released once the model is unloaded. Note
@@     that the estimation is inferenced by the profiling tools and
@@     framework's memory schema, therefore it is advised to perform
@@     experiments to understand the scenario that the reported memory usage
@@     can be relied on. As a starting point, the GPU memory usage for
@@     models in ONNX Runtime backend and TensorRT backend is usually
@@     aligned.
@@

repeated .inference.MemoryUsage memory_usage = 8;

getMemoryUsage

GrpcService.MemoryUsage getMemoryUsage(int index)

@@  .. cpp:var:: MemoryUsage memory_usage (repeated)
@@
@@     The memory usage detected during model loading, which may be used to
@@     estimate the memory to be released once the model is unloaded. Note
@@     that the estimation is inferenced by the profiling tools and
@@     framework's memory schema, therefore it is advised to perform
@@     experiments to understand the scenario that the reported memory usage
@@     can be relied on. As a starting point, the GPU memory usage for
@@     models in ONNX Runtime backend and TensorRT backend is usually
@@     aligned.
@@

repeated .inference.MemoryUsage memory_usage = 8;

getMemoryUsageCount

int getMemoryUsageCount()

@@  .. cpp:var:: MemoryUsage memory_usage (repeated)
@@
@@     The memory usage detected during model loading, which may be used to
@@     estimate the memory to be released once the model is unloaded. Note
@@     that the estimation is inferenced by the profiling tools and
@@     framework's memory schema, therefore it is advised to perform
@@     experiments to understand the scenario that the reported memory usage
@@     can be relied on. As a starting point, the GPU memory usage for
@@     models in ONNX Runtime backend and TensorRT backend is usually
@@     aligned.
@@

repeated .inference.MemoryUsage memory_usage = 8;

getMemoryUsageOrBuilderList

List<? extends GrpcService.MemoryUsageOrBuilder> getMemoryUsageOrBuilderList()

@@  .. cpp:var:: MemoryUsage memory_usage (repeated)
@@
@@     The memory usage detected during model loading, which may be used to
@@     estimate the memory to be released once the model is unloaded. Note
@@     that the estimation is inferenced by the profiling tools and
@@     framework's memory schema, therefore it is advised to perform
@@     experiments to understand the scenario that the reported memory usage
@@     can be relied on. As a starting point, the GPU memory usage for
@@     models in ONNX Runtime backend and TensorRT backend is usually
@@     aligned.
@@

repeated .inference.MemoryUsage memory_usage = 8;

getMemoryUsageOrBuilder

GrpcService.MemoryUsageOrBuilder getMemoryUsageOrBuilder(int index)

@@  .. cpp:var:: MemoryUsage memory_usage (repeated)
@@
@@     The memory usage detected during model loading, which may be used to
@@     estimate the memory to be released once the model is unloaded. Note
@@     that the estimation is inferenced by the profiling tools and
@@     framework's memory schema, therefore it is advised to perform
@@     experiments to understand the scenario that the reported memory usage
@@     can be relied on. As a starting point, the GPU memory usage for
@@     models in ONNX Runtime backend and TensorRT backend is usually
@@     aligned.
@@

repeated .inference.MemoryUsage memory_usage = 8;