Package inference
Interface ModelConfigOuterClass.ModelOptimizationPolicy.CudaOrBuilder
- All Superinterfaces:
com.google.protobuf.MessageLiteOrBuilder,com.google.protobuf.MessageOrBuilder
- All Known Implementing Classes:
ModelConfigOuterClass.ModelOptimizationPolicy.Cuda,ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.Builder
- Enclosing class:
ModelConfigOuterClass.ModelOptimizationPolicy
public static interface ModelConfigOuterClass.ModelOptimizationPolicy.CudaOrBuilder
extends com.google.protobuf.MessageOrBuilder
-
Method Summary
Modifier and TypeMethodDescriptionboolean@@ ..boolean@@ ..getGraphSpec(int index) @@ ..int@@ ..@@ ..getGraphSpecOrBuilder(int index) @@ ..@@ ..boolean@@ ..Methods inherited from interface com.google.protobuf.MessageLiteOrBuilder
isInitializedMethods inherited from interface com.google.protobuf.MessageOrBuilder
findInitializationErrors, getAllFields, getDefaultInstanceForType, getDescriptorForType, getField, getInitializationErrorString, getOneofFieldDescriptor, getRepeatedField, getRepeatedFieldCount, getUnknownFields, hasField, hasOneof
-
Method Details
-
getGraphs
boolean getGraphs()@@ .. cpp:var:: bool graphs @@ @@ Use CUDA graphs API to capture model operations and execute @@ them more efficiently. Default value is false. @@ Currently only recognized by TensorRT backend. @@
bool graphs = 1;- Returns:
- The graphs.
-
getBusyWaitEvents
boolean getBusyWaitEvents()@@ .. cpp:var:: bool busy_wait_events @@ @@ Use busy-waiting to synchronize CUDA events to achieve minimum @@ latency from event complete to host thread to be notified, with @@ the cost of high CPU load. Default value is false. @@ Currently only recognized by TensorRT backend. @@
bool busy_wait_events = 2;- Returns:
- The busyWaitEvents.
-
getGraphSpecList
List<ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpec> getGraphSpecList()@@ .. cpp:var:: GraphSpec graph_spec (repeated) @@ @@ Specification of the CUDA graph to be captured. If not specified @@ and 'graphs' is true, the default CUDA graphs will be captured @@ based on model settings. @@ Currently only recognized by TensorRT backend. @@
repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3; -
getGraphSpec
@@ .. cpp:var:: GraphSpec graph_spec (repeated) @@ @@ Specification of the CUDA graph to be captured. If not specified @@ and 'graphs' is true, the default CUDA graphs will be captured @@ based on model settings. @@ Currently only recognized by TensorRT backend. @@
repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3; -
getGraphSpecCount
int getGraphSpecCount()@@ .. cpp:var:: GraphSpec graph_spec (repeated) @@ @@ Specification of the CUDA graph to be captured. If not specified @@ and 'graphs' is true, the default CUDA graphs will be captured @@ based on model settings. @@ Currently only recognized by TensorRT backend. @@
repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3; -
getGraphSpecOrBuilderList
List<? extends ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpecOrBuilder> getGraphSpecOrBuilderList()@@ .. cpp:var:: GraphSpec graph_spec (repeated) @@ @@ Specification of the CUDA graph to be captured. If not specified @@ and 'graphs' is true, the default CUDA graphs will be captured @@ based on model settings. @@ Currently only recognized by TensorRT backend. @@
repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3; -
getGraphSpecOrBuilder
ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.GraphSpecOrBuilder getGraphSpecOrBuilder(int index) @@ .. cpp:var:: GraphSpec graph_spec (repeated) @@ @@ Specification of the CUDA graph to be captured. If not specified @@ and 'graphs' is true, the default CUDA graphs will be captured @@ based on model settings. @@ Currently only recognized by TensorRT backend. @@
repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3; -
getOutputCopyStream
boolean getOutputCopyStream()@@ .. cpp:var:: bool output_copy_stream @@ @@ Uses a CUDA stream separate from the inference stream to copy the @@ output to host. However, be aware that setting this option to @@ true will lead to an increase in the memory consumption of the @@ model as Triton will allocate twice as much GPU memory for its @@ I/O tensor buffers. Default value is false. @@ Currently only recognized by TensorRT backend. @@
bool output_copy_stream = 4;- Returns:
- The outputCopyStream.
-