Package inference

Interface ModelConfigOuterClass.ModelOptimizationPolicy.CudaOrBuilder

All Superinterfaces:
com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder
All Known Implementing Classes:
ModelConfigOuterClass.ModelOptimizationPolicy.Cuda, ModelConfigOuterClass.ModelOptimizationPolicy.Cuda.Builder
Enclosing class:
ModelConfigOuterClass.ModelOptimizationPolicy

public static interface ModelConfigOuterClass.ModelOptimizationPolicy.CudaOrBuilder extends com.google.protobuf.MessageOrBuilder
  • Method Details

    • getGraphs

      boolean getGraphs()
      @@    .. cpp:var:: bool graphs
      @@
      @@       Use CUDA graphs API to capture model operations and execute
      @@       them more efficiently. Default value is false.
      @@       Currently only recognized by TensorRT backend.
      @@
       
      bool graphs = 1;
      Returns:
      The graphs.
    • getBusyWaitEvents

      boolean getBusyWaitEvents()
      @@    .. cpp:var:: bool busy_wait_events
      @@
      @@       Use busy-waiting to synchronize CUDA events to achieve minimum
      @@       latency from event complete to host thread to be notified, with
      @@       the cost of high CPU load. Default value is false.
      @@       Currently only recognized by TensorRT backend.
      @@
       
      bool busy_wait_events = 2;
      Returns:
      The busyWaitEvents.
    • getGraphSpecList

      @@    .. cpp:var:: GraphSpec graph_spec (repeated)
      @@
      @@       Specification of the CUDA graph to be captured. If not specified
      @@       and 'graphs' is true, the default CUDA graphs will be captured
      @@       based on model settings.
      @@       Currently only recognized by TensorRT backend.
      @@
       
      repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;
    • getGraphSpec

      @@    .. cpp:var:: GraphSpec graph_spec (repeated)
      @@
      @@       Specification of the CUDA graph to be captured. If not specified
      @@       and 'graphs' is true, the default CUDA graphs will be captured
      @@       based on model settings.
      @@       Currently only recognized by TensorRT backend.
      @@
       
      repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;
    • getGraphSpecCount

      int getGraphSpecCount()
      @@    .. cpp:var:: GraphSpec graph_spec (repeated)
      @@
      @@       Specification of the CUDA graph to be captured. If not specified
      @@       and 'graphs' is true, the default CUDA graphs will be captured
      @@       based on model settings.
      @@       Currently only recognized by TensorRT backend.
      @@
       
      repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;
    • getGraphSpecOrBuilderList

      @@    .. cpp:var:: GraphSpec graph_spec (repeated)
      @@
      @@       Specification of the CUDA graph to be captured. If not specified
      @@       and 'graphs' is true, the default CUDA graphs will be captured
      @@       based on model settings.
      @@       Currently only recognized by TensorRT backend.
      @@
       
      repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;
    • getGraphSpecOrBuilder

      @@    .. cpp:var:: GraphSpec graph_spec (repeated)
      @@
      @@       Specification of the CUDA graph to be captured. If not specified
      @@       and 'graphs' is true, the default CUDA graphs will be captured
      @@       based on model settings.
      @@       Currently only recognized by TensorRT backend.
      @@
       
      repeated .inference.ModelOptimizationPolicy.Cuda.GraphSpec graph_spec = 3;
    • getOutputCopyStream

      boolean getOutputCopyStream()
      @@    .. cpp:var:: bool output_copy_stream
      @@
      @@       Uses a CUDA stream separate from the inference stream to copy the
      @@       output to host. However, be aware that setting this option to
      @@       true will lead to an increase in the memory consumption of the
      @@       model as Triton will allocate twice as much GPU memory for its
      @@       I/O tensor buffers. Default value is false.
      @@       Currently only recognized by TensorRT backend.
      @@
       
      bool output_copy_stream = 4;
      Returns:
      The outputCopyStream.