All Superinterfaces:: com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder

All Known Implementing Classes:: ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators, ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.Builder

Enclosing class:: ModelConfigOuterClass.ModelOptimizationPolicy

public static interface ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAcceleratorsOrBuilder extends com.google.protobuf.MessageOrBuilder

Method Summary

Modifier and Type

Method

Description

ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator

getCpuExecutionAccelerator(int index)

@@ ..

int

getCpuExecutionAcceleratorCount()

@@ ..

List<ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator>

getCpuExecutionAcceleratorList()

@@ ..

ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.AcceleratorOrBuilder

getCpuExecutionAcceleratorOrBuilder(int index)

@@ ..

List<? extends ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.AcceleratorOrBuilder>

getCpuExecutionAcceleratorOrBuilderList()

@@ ..

ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator

getGpuExecutionAccelerator(int index)

@@ ..

int

getGpuExecutionAcceleratorCount()

@@ ..

List<ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator>

getGpuExecutionAcceleratorList()

@@ ..

ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.AcceleratorOrBuilder

getGpuExecutionAcceleratorOrBuilder(int index)

@@ ..

List<? extends ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.AcceleratorOrBuilder>

getGpuExecutionAcceleratorOrBuilderList()

@@ ..

Methods inherited from interface com.google.protobuf.MessageLiteOrBuilder
isInitialized

Methods inherited from interface com.google.protobuf.MessageOrBuilder
findInitializationErrors, getAllFields, getDefaultInstanceForType, getDescriptorForType, getField, getInitializationErrorString, getOneofFieldDescriptor, getRepeatedField, getRepeatedFieldCount, getUnknownFields, hasField, hasOneof

Method Details

getGpuExecutionAcceleratorList

List<ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator> getGpuExecutionAcceleratorList()

@@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
@@
@@       The preferred execution provider to be used if the model instance
@@       is deployed on GPU.
@@
@@       For ONNX Runtime backend, possible value is "tensorrt" as name,
@@       and no parameters are required.
@@
@@       For TensorFlow backend, possible values are "tensorrt",
@@       "auto_mixed_precision", "gpu_io".
@@
@@       For "tensorrt", the following parameters can be specified:
@@         "precision_mode": The precision used for optimization.
@@         Allowed values are "FP32" and "FP16". Default value is "FP32".
@@
@@         "max_cached_engines": The maximum number of cached TensorRT
@@         engines in dynamic TensorRT ops. Default value is 100.
@@
@@         "minimum_segment_size": The smallest model subgraph that will
@@         be considered for optimization by TensorRT. Default value is 3.
@@
@@         "max_workspace_size_bytes": The maximum GPU memory the model
@@         can use temporarily during execution. Default value is 1GB.
@@
@@       For "auto_mixed_precision", no parameters are required. If set,
@@       the model will try to use FP16 for better performance.
@@       This optimization can not be set with "tensorrt".
@@
@@       For "gpu_io", no parameters are required. If set, the model will
@@       be executed using TensorFlow Callable API to set input and output
@@       tensors in GPU memory if possible, which can reduce data transfer
@@       overhead if the model is used in ensemble. However, the Callable
@@       object will be created on model creation and it will request all
@@       outputs for every model execution, which may impact the
@@       performance if a request does not require all outputs. This
@@       optimization will only take affect if the model instance is
@@       created with KIND_GPU.
@@

repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;

getGpuExecutionAccelerator

ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator getGpuExecutionAccelerator(int index)

@@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
@@
@@       The preferred execution provider to be used if the model instance
@@       is deployed on GPU.
@@
@@       For ONNX Runtime backend, possible value is "tensorrt" as name,
@@       and no parameters are required.
@@
@@       For TensorFlow backend, possible values are "tensorrt",
@@       "auto_mixed_precision", "gpu_io".
@@
@@       For "tensorrt", the following parameters can be specified:
@@         "precision_mode": The precision used for optimization.
@@         Allowed values are "FP32" and "FP16". Default value is "FP32".
@@
@@         "max_cached_engines": The maximum number of cached TensorRT
@@         engines in dynamic TensorRT ops. Default value is 100.
@@
@@         "minimum_segment_size": The smallest model subgraph that will
@@         be considered for optimization by TensorRT. Default value is 3.
@@
@@         "max_workspace_size_bytes": The maximum GPU memory the model
@@         can use temporarily during execution. Default value is 1GB.
@@
@@       For "auto_mixed_precision", no parameters are required. If set,
@@       the model will try to use FP16 for better performance.
@@       This optimization can not be set with "tensorrt".
@@
@@       For "gpu_io", no parameters are required. If set, the model will
@@       be executed using TensorFlow Callable API to set input and output
@@       tensors in GPU memory if possible, which can reduce data transfer
@@       overhead if the model is used in ensemble. However, the Callable
@@       object will be created on model creation and it will request all
@@       outputs for every model execution, which may impact the
@@       performance if a request does not require all outputs. This
@@       optimization will only take affect if the model instance is
@@       created with KIND_GPU.
@@

repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;

getGpuExecutionAcceleratorCount

int getGpuExecutionAcceleratorCount()

@@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
@@
@@       The preferred execution provider to be used if the model instance
@@       is deployed on GPU.
@@
@@       For ONNX Runtime backend, possible value is "tensorrt" as name,
@@       and no parameters are required.
@@
@@       For TensorFlow backend, possible values are "tensorrt",
@@       "auto_mixed_precision", "gpu_io".
@@
@@       For "tensorrt", the following parameters can be specified:
@@         "precision_mode": The precision used for optimization.
@@         Allowed values are "FP32" and "FP16". Default value is "FP32".
@@
@@         "max_cached_engines": The maximum number of cached TensorRT
@@         engines in dynamic TensorRT ops. Default value is 100.
@@
@@         "minimum_segment_size": The smallest model subgraph that will
@@         be considered for optimization by TensorRT. Default value is 3.
@@
@@         "max_workspace_size_bytes": The maximum GPU memory the model
@@         can use temporarily during execution. Default value is 1GB.
@@
@@       For "auto_mixed_precision", no parameters are required. If set,
@@       the model will try to use FP16 for better performance.
@@       This optimization can not be set with "tensorrt".
@@
@@       For "gpu_io", no parameters are required. If set, the model will
@@       be executed using TensorFlow Callable API to set input and output
@@       tensors in GPU memory if possible, which can reduce data transfer
@@       overhead if the model is used in ensemble. However, the Callable
@@       object will be created on model creation and it will request all
@@       outputs for every model execution, which may impact the
@@       performance if a request does not require all outputs. This
@@       optimization will only take affect if the model instance is
@@       created with KIND_GPU.
@@

repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;

getGpuExecutionAcceleratorOrBuilderList

List<? extends ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.AcceleratorOrBuilder> getGpuExecutionAcceleratorOrBuilderList()

@@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
@@
@@       The preferred execution provider to be used if the model instance
@@       is deployed on GPU.
@@
@@       For ONNX Runtime backend, possible value is "tensorrt" as name,
@@       and no parameters are required.
@@
@@       For TensorFlow backend, possible values are "tensorrt",
@@       "auto_mixed_precision", "gpu_io".
@@
@@       For "tensorrt", the following parameters can be specified:
@@         "precision_mode": The precision used for optimization.
@@         Allowed values are "FP32" and "FP16". Default value is "FP32".
@@
@@         "max_cached_engines": The maximum number of cached TensorRT
@@         engines in dynamic TensorRT ops. Default value is 100.
@@
@@         "minimum_segment_size": The smallest model subgraph that will
@@         be considered for optimization by TensorRT. Default value is 3.
@@
@@         "max_workspace_size_bytes": The maximum GPU memory the model
@@         can use temporarily during execution. Default value is 1GB.
@@
@@       For "auto_mixed_precision", no parameters are required. If set,
@@       the model will try to use FP16 for better performance.
@@       This optimization can not be set with "tensorrt".
@@
@@       For "gpu_io", no parameters are required. If set, the model will
@@       be executed using TensorFlow Callable API to set input and output
@@       tensors in GPU memory if possible, which can reduce data transfer
@@       overhead if the model is used in ensemble. However, the Callable
@@       object will be created on model creation and it will request all
@@       outputs for every model execution, which may impact the
@@       performance if a request does not require all outputs. This
@@       optimization will only take affect if the model instance is
@@       created with KIND_GPU.
@@

repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;

getGpuExecutionAcceleratorOrBuilder

ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.AcceleratorOrBuilder getGpuExecutionAcceleratorOrBuilder(int index)

@@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
@@
@@       The preferred execution provider to be used if the model instance
@@       is deployed on GPU.
@@
@@       For ONNX Runtime backend, possible value is "tensorrt" as name,
@@       and no parameters are required.
@@
@@       For TensorFlow backend, possible values are "tensorrt",
@@       "auto_mixed_precision", "gpu_io".
@@
@@       For "tensorrt", the following parameters can be specified:
@@         "precision_mode": The precision used for optimization.
@@         Allowed values are "FP32" and "FP16". Default value is "FP32".
@@
@@         "max_cached_engines": The maximum number of cached TensorRT
@@         engines in dynamic TensorRT ops. Default value is 100.
@@
@@         "minimum_segment_size": The smallest model subgraph that will
@@         be considered for optimization by TensorRT. Default value is 3.
@@
@@         "max_workspace_size_bytes": The maximum GPU memory the model
@@         can use temporarily during execution. Default value is 1GB.
@@
@@       For "auto_mixed_precision", no parameters are required. If set,
@@       the model will try to use FP16 for better performance.
@@       This optimization can not be set with "tensorrt".
@@
@@       For "gpu_io", no parameters are required. If set, the model will
@@       be executed using TensorFlow Callable API to set input and output
@@       tensors in GPU memory if possible, which can reduce data transfer
@@       overhead if the model is used in ensemble. However, the Callable
@@       object will be created on model creation and it will request all
@@       outputs for every model execution, which may impact the
@@       performance if a request does not require all outputs. This
@@       optimization will only take affect if the model instance is
@@       created with KIND_GPU.
@@

repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;