ModelConfigOuterClass.ModelOptimizationPolicy.CudaOrBuilder (java-triton-sdk 1.0.2 API)

All Superinterfaces:: com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder

All Known Implementing Classes:: ModelConfigOuterClass.ModelOptimizationPolicy.Cuda, ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.Builder

Enclosing class:: ModelConfigOuterClass.ModelOptimizationPolicy

public static interface ModelConfigOuterClass.ModelOptimizationPolicy.CudaOrBuilder extends com.google.protobuf.MessageOrBuilder

Method Summary

Modifier and Type

Method

Description

boolean

getBusyWaitEvents()

@@ ..

boolean

getGraphs()

@@ ..

ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpec

getGraphSpec(int index)

@@ ..

int

getGraphSpecCount()

@@ ..

List<ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpec>

getGraphSpecList()

@@ ..

ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpecOrBuilder

getGraphSpecOrBuilder(int index)

@@ ..

List<? extends ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpecOrBuilder>

getGraphSpecOrBuilderList()

@@ ..

boolean

getOutputCopyStream()

@@ ..

Methods inherited from interface com.google.protobuf.MessageLiteOrBuilder
isInitialized

Methods inherited from interface com.google.protobuf.MessageOrBuilder
findInitializationErrors, getAllFields, getDefaultInstanceForType, getDescriptorForType, getField, getInitializationErrorString, getOneofFieldDescriptor, getRepeatedField, getRepeatedFieldCount, getUnknownFields, hasField, hasOneof

Method Details

getGraphs

boolean getGraphs()

@@    .. cpp:var:: bool graphs
@@
@@       Use CUDA graphs API to capture model operations and execute
@@       them more efficiently. Default value is false.
@@       Currently only recognized by TensorRT backend.
@@

bool graphs = 1;

Returns:: The graphs.

getBusyWaitEvents

boolean getBusyWaitEvents()

@@    .. cpp:var:: bool busy_wait_events
@@
@@       Use busy-waiting to synchronize CUDA events to achieve minimum
@@       latency from event complete to host thread to be notified, with
@@       the cost of high CPU load. Default value is false.
@@       Currently only recognized by TensorRT backend.
@@

bool busy_wait_events = 2;

Returns:: The busyWaitEvents.

getGraphSpecList

List<ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpec> getGraphSpecList()

@@    .. cpp:var:: GraphSpec graph_spec (repeated)
@@
@@       Specification of the CUDA graph to be captured. If not specified
@@       and 'graphs' is true, the default CUDA graphs will be captured
@@       based on model settings.
@@       Currently only recognized by TensorRT backend.
@@

repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;

getGraphSpec

ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpec getGraphSpec(int index)

@@    .. cpp:var:: GraphSpec graph_spec (repeated)
@@
@@       Specification of the CUDA graph to be captured. If not specified
@@       and 'graphs' is true, the default CUDA graphs will be captured
@@       based on model settings.
@@       Currently only recognized by TensorRT backend.
@@

repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;

getGraphSpecCount

int getGraphSpecCount()

@@    .. cpp:var:: GraphSpec graph_spec (repeated)
@@
@@       Specification of the CUDA graph to be captured. If not specified
@@       and 'graphs' is true, the default CUDA graphs will be captured
@@       based on model settings.
@@       Currently only recognized by TensorRT backend.
@@

repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;

getGraphSpecOrBuilderList

List<? extends ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpecOrBuilder> getGraphSpecOrBuilderList()

@@    .. cpp:var:: GraphSpec graph_spec (repeated)
@@
@@       Specification of the CUDA graph to be captured. If not specified
@@       and 'graphs' is true, the default CUDA graphs will be captured
@@       based on model settings.
@@       Currently only recognized by TensorRT backend.
@@

repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;

getGraphSpecOrBuilder

ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpecOrBuilder getGraphSpecOrBuilder(int index)

@@    .. cpp:var:: GraphSpec graph_spec (repeated)
@@
@@       Specification of the CUDA graph to be captured. If not specified
@@       and 'graphs' is true, the default CUDA graphs will be captured
@@       based on model settings.
@@       Currently only recognized by TensorRT backend.
@@

repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;

getOutputCopyStream

boolean getOutputCopyStream()

@@    .. cpp:var:: bool output_copy_stream
@@
@@       Uses a CUDA stream separate from the inference stream to copy the
@@       output to host. However, be aware that setting this option to
@@       true will lead to an increase in the memory consumption of the
@@       model as Triton will allocate twice as much GPU memory for its
@@       I/O tensor buffers. Default value is false.
@@       Currently only recognized by TensorRT backend.
@@

bool output_copy_stream = 4;

Returns:: The outputCopyStream.