Package inference

Interface ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAcceleratorsOrBuilder

All Superinterfaces:
com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder
All Known Implementing Classes:
ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators, ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAccelerators.Builder
Enclosing class:
ModelConfigOuterClass.ModelOptimizationPolicy

public static interface ModelConfigOuterClass.ModelOptimizationPolicy.ExecutionAcceleratorsOrBuilder extends com.google.protobuf.MessageOrBuilder
  • Method Details

    • getGpuExecutionAcceleratorList

      @@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on GPU.
      @@
      @@       For ONNX Runtime backend, possible value is "tensorrt" as name,
      @@       and no parameters are required.
      @@
      @@       For TensorFlow backend, possible values are "tensorrt",
      @@       "auto_mixed_precision", "gpu_io".
      @@
      @@       For "tensorrt", the following parameters can be specified:
      @@         "precision_mode": The precision used for optimization.
      @@         Allowed values are "FP32" and "FP16". Default value is "FP32".
      @@
      @@         "max_cached_engines": The maximum number of cached TensorRT
      @@         engines in dynamic TensorRT ops. Default value is 100.
      @@
      @@         "minimum_segment_size": The smallest model subgraph that will
      @@         be considered for optimization by TensorRT. Default value is 3.
      @@
      @@         "max_workspace_size_bytes": The maximum GPU memory the model
      @@         can use temporarily during execution. Default value is 1GB.
      @@
      @@       For "auto_mixed_precision", no parameters are required. If set,
      @@       the model will try to use FP16 for better performance.
      @@       This optimization can not be set with "tensorrt".
      @@
      @@       For "gpu_io", no parameters are required. If set, the model will
      @@       be executed using TensorFlow Callable API to set input and output
      @@       tensors in GPU memory if possible, which can reduce data transfer
      @@       overhead if the model is used in ensemble. However, the Callable
      @@       object will be created on model creation and it will request all
      @@       outputs for every model execution, which may impact the
      @@       performance if a request does not require all outputs. This
      @@       optimization will only take affect if the model instance is
      @@       created with KIND_GPU.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;
    • getGpuExecutionAccelerator

      @@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on GPU.
      @@
      @@       For ONNX Runtime backend, possible value is "tensorrt" as name,
      @@       and no parameters are required.
      @@
      @@       For TensorFlow backend, possible values are "tensorrt",
      @@       "auto_mixed_precision", "gpu_io".
      @@
      @@       For "tensorrt", the following parameters can be specified:
      @@         "precision_mode": The precision used for optimization.
      @@         Allowed values are "FP32" and "FP16". Default value is "FP32".
      @@
      @@         "max_cached_engines": The maximum number of cached TensorRT
      @@         engines in dynamic TensorRT ops. Default value is 100.
      @@
      @@         "minimum_segment_size": The smallest model subgraph that will
      @@         be considered for optimization by TensorRT. Default value is 3.
      @@
      @@         "max_workspace_size_bytes": The maximum GPU memory the model
      @@         can use temporarily during execution. Default value is 1GB.
      @@
      @@       For "auto_mixed_precision", no parameters are required. If set,
      @@       the model will try to use FP16 for better performance.
      @@       This optimization can not be set with "tensorrt".
      @@
      @@       For "gpu_io", no parameters are required. If set, the model will
      @@       be executed using TensorFlow Callable API to set input and output
      @@       tensors in GPU memory if possible, which can reduce data transfer
      @@       overhead if the model is used in ensemble. However, the Callable
      @@       object will be created on model creation and it will request all
      @@       outputs for every model execution, which may impact the
      @@       performance if a request does not require all outputs. This
      @@       optimization will only take affect if the model instance is
      @@       created with KIND_GPU.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;
    • getGpuExecutionAcceleratorCount

      int getGpuExecutionAcceleratorCount()
      @@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on GPU.
      @@
      @@       For ONNX Runtime backend, possible value is "tensorrt" as name,
      @@       and no parameters are required.
      @@
      @@       For TensorFlow backend, possible values are "tensorrt",
      @@       "auto_mixed_precision", "gpu_io".
      @@
      @@       For "tensorrt", the following parameters can be specified:
      @@         "precision_mode": The precision used for optimization.
      @@         Allowed values are "FP32" and "FP16". Default value is "FP32".
      @@
      @@         "max_cached_engines": The maximum number of cached TensorRT
      @@         engines in dynamic TensorRT ops. Default value is 100.
      @@
      @@         "minimum_segment_size": The smallest model subgraph that will
      @@         be considered for optimization by TensorRT. Default value is 3.
      @@
      @@         "max_workspace_size_bytes": The maximum GPU memory the model
      @@         can use temporarily during execution. Default value is 1GB.
      @@
      @@       For "auto_mixed_precision", no parameters are required. If set,
      @@       the model will try to use FP16 for better performance.
      @@       This optimization can not be set with "tensorrt".
      @@
      @@       For "gpu_io", no parameters are required. If set, the model will
      @@       be executed using TensorFlow Callable API to set input and output
      @@       tensors in GPU memory if possible, which can reduce data transfer
      @@       overhead if the model is used in ensemble. However, the Callable
      @@       object will be created on model creation and it will request all
      @@       outputs for every model execution, which may impact the
      @@       performance if a request does not require all outputs. This
      @@       optimization will only take affect if the model instance is
      @@       created with KIND_GPU.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;
    • getGpuExecutionAcceleratorOrBuilderList

      @@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on GPU.
      @@
      @@       For ONNX Runtime backend, possible value is "tensorrt" as name,
      @@       and no parameters are required.
      @@
      @@       For TensorFlow backend, possible values are "tensorrt",
      @@       "auto_mixed_precision", "gpu_io".
      @@
      @@       For "tensorrt", the following parameters can be specified:
      @@         "precision_mode": The precision used for optimization.
      @@         Allowed values are "FP32" and "FP16". Default value is "FP32".
      @@
      @@         "max_cached_engines": The maximum number of cached TensorRT
      @@         engines in dynamic TensorRT ops. Default value is 100.
      @@
      @@         "minimum_segment_size": The smallest model subgraph that will
      @@         be considered for optimization by TensorRT. Default value is 3.
      @@
      @@         "max_workspace_size_bytes": The maximum GPU memory the model
      @@         can use temporarily during execution. Default value is 1GB.
      @@
      @@       For "auto_mixed_precision", no parameters are required. If set,
      @@       the model will try to use FP16 for better performance.
      @@       This optimization can not be set with "tensorrt".
      @@
      @@       For "gpu_io", no parameters are required. If set, the model will
      @@       be executed using TensorFlow Callable API to set input and output
      @@       tensors in GPU memory if possible, which can reduce data transfer
      @@       overhead if the model is used in ensemble. However, the Callable
      @@       object will be created on model creation and it will request all
      @@       outputs for every model execution, which may impact the
      @@       performance if a request does not require all outputs. This
      @@       optimization will only take affect if the model instance is
      @@       created with KIND_GPU.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;
    • getGpuExecutionAcceleratorOrBuilder

      @@    .. cpp:var:: Accelerator gpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on GPU.
      @@
      @@       For ONNX Runtime backend, possible value is "tensorrt" as name,
      @@       and no parameters are required.
      @@
      @@       For TensorFlow backend, possible values are "tensorrt",
      @@       "auto_mixed_precision", "gpu_io".
      @@
      @@       For "tensorrt", the following parameters can be specified:
      @@         "precision_mode": The precision used for optimization.
      @@         Allowed values are "FP32" and "FP16". Default value is "FP32".
      @@
      @@         "max_cached_engines": The maximum number of cached TensorRT
      @@         engines in dynamic TensorRT ops. Default value is 100.
      @@
      @@         "minimum_segment_size": The smallest model subgraph that will
      @@         be considered for optimization by TensorRT. Default value is 3.
      @@
      @@         "max_workspace_size_bytes": The maximum GPU memory the model
      @@         can use temporarily during execution. Default value is 1GB.
      @@
      @@       For "auto_mixed_precision", no parameters are required. If set,
      @@       the model will try to use FP16 for better performance.
      @@       This optimization can not be set with "tensorrt".
      @@
      @@       For "gpu_io", no parameters are required. If set, the model will
      @@       be executed using TensorFlow Callable API to set input and output
      @@       tensors in GPU memory if possible, which can reduce data transfer
      @@       overhead if the model is used in ensemble. However, the Callable
      @@       object will be created on model creation and it will request all
      @@       outputs for every model execution, which may impact the
      @@       performance if a request does not require all outputs. This
      @@       optimization will only take affect if the model instance is
      @@       created with KIND_GPU.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator gpu_execution_accelerator = 1;
    • getCpuExecutionAcceleratorList

      @@    .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on CPU.
      @@
      @@       For ONNX Runtime backend, possible value is "openvino" as name,
      @@       and no parameters are required.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator cpu_execution_accelerator = 2;
    • getCpuExecutionAccelerator

      @@    .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on CPU.
      @@
      @@       For ONNX Runtime backend, possible value is "openvino" as name,
      @@       and no parameters are required.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator cpu_execution_accelerator = 2;
    • getCpuExecutionAcceleratorCount

      int getCpuExecutionAcceleratorCount()
      @@    .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on CPU.
      @@
      @@       For ONNX Runtime backend, possible value is "openvino" as name,
      @@       and no parameters are required.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator cpu_execution_accelerator = 2;
    • getCpuExecutionAcceleratorOrBuilderList

      @@    .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on CPU.
      @@
      @@       For ONNX Runtime backend, possible value is "openvino" as name,
      @@       and no parameters are required.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator cpu_execution_accelerator = 2;
    • getCpuExecutionAcceleratorOrBuilder

      @@    .. cpp:var:: Accelerator cpu_execution_accelerator (repeated)
      @@
      @@       The preferred execution provider to be used if the model instance
      @@       is deployed on CPU.
      @@
      @@       For ONNX Runtime backend, possible value is "openvino" as name,
      @@       and no parameters are required.
      @@
       
      repeated .inference.ModelOptimizationPolicy.ExecutionAccelerators.Accelerator cpu_execution_accelerator = 2;