Package inference

Interface ModelConfigOuterClass.ModelSequenceBatchingOrBuilder

All Superinterfaces:
com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder
All Known Implementing Classes:
ModelConfigOuterClass.ModelSequenceBatching, ModelConfigOuterClass.ModelSequenceBatching.Builder
Enclosing class:
ModelConfigOuterClass

public static interface ModelConfigOuterClass.ModelSequenceBatchingOrBuilder extends com.google.protobuf.MessageOrBuilder
  • Method Details

    • hasDirect

      boolean hasDirect()
      @@    .. cpp:var:: StrategyDirect direct
      @@
      @@       StrategyDirect scheduling strategy.
      @@
       
      .inference.ModelSequenceBatching.StrategyDirect direct = 3;
      Returns:
      Whether the direct field is set.
    • getDirect

      @@    .. cpp:var:: StrategyDirect direct
      @@
      @@       StrategyDirect scheduling strategy.
      @@
       
      .inference.ModelSequenceBatching.StrategyDirect direct = 3;
      Returns:
      The direct.
    • getDirectOrBuilder

      @@    .. cpp:var:: StrategyDirect direct
      @@
      @@       StrategyDirect scheduling strategy.
      @@
       
      .inference.ModelSequenceBatching.StrategyDirect direct = 3;
    • hasOldest

      boolean hasOldest()
      @@    .. cpp:var:: StrategyOldest oldest
      @@
      @@       StrategyOldest scheduling strategy.
      @@
       
      .inference.ModelSequenceBatching.StrategyOldest oldest = 4;
      Returns:
      Whether the oldest field is set.
    • getOldest

      @@    .. cpp:var:: StrategyOldest oldest
      @@
      @@       StrategyOldest scheduling strategy.
      @@
       
      .inference.ModelSequenceBatching.StrategyOldest oldest = 4;
      Returns:
      The oldest.
    • getOldestOrBuilder

      @@    .. cpp:var:: StrategyOldest oldest
      @@
      @@       StrategyOldest scheduling strategy.
      @@
       
      .inference.ModelSequenceBatching.StrategyOldest oldest = 4;
    • getMaxSequenceIdleMicroseconds

      long getMaxSequenceIdleMicroseconds()
      @@  .. cpp:var:: uint64 max_sequence_idle_microseconds
      @@
      @@     The maximum time, in microseconds, that a sequence is allowed to
      @@     be idle before it is aborted. The inference server considers a
      @@     sequence idle when it does not have any inference request queued
      @@     for the sequence. If this limit is exceeded, the inference server
      @@     will free the sequence slot allocated by the sequence and make it
      @@     available for another sequence. If not specified (or specified as
      @@     zero) a default value of 1000000 (1 second) is used.
      @@
       
      uint64 max_sequence_idle_microseconds = 1;
      Returns:
      The maxSequenceIdleMicroseconds.
    • getControlInputList

      @@  .. cpp:var:: ControlInput control_input (repeated)
      @@
      @@     The model input(s) that the server should use to communicate
      @@     sequence start, stop, ready and similar control values to the
      @@     model.
      @@
       
      repeated .inference.ModelSequenceBatching.ControlInput control_input = 2;
    • getControlInput

      @@  .. cpp:var:: ControlInput control_input (repeated)
      @@
      @@     The model input(s) that the server should use to communicate
      @@     sequence start, stop, ready and similar control values to the
      @@     model.
      @@
       
      repeated .inference.ModelSequenceBatching.ControlInput control_input = 2;
    • getControlInputCount

      int getControlInputCount()
      @@  .. cpp:var:: ControlInput control_input (repeated)
      @@
      @@     The model input(s) that the server should use to communicate
      @@     sequence start, stop, ready and similar control values to the
      @@     model.
      @@
       
      repeated .inference.ModelSequenceBatching.ControlInput control_input = 2;
    • getControlInputOrBuilderList

      @@  .. cpp:var:: ControlInput control_input (repeated)
      @@
      @@     The model input(s) that the server should use to communicate
      @@     sequence start, stop, ready and similar control values to the
      @@     model.
      @@
       
      repeated .inference.ModelSequenceBatching.ControlInput control_input = 2;
    • getControlInputOrBuilder

      @@  .. cpp:var:: ControlInput control_input (repeated)
      @@
      @@     The model input(s) that the server should use to communicate
      @@     sequence start, stop, ready and similar control values to the
      @@     model.
      @@
       
      repeated .inference.ModelSequenceBatching.ControlInput control_input = 2;
    • getStateList

      @@  .. cpp:var:: State state (repeated)
      @@
      @@     The optional state that can be stored in Triton for performing
      @@     inference requests on a sequence. Each sequence holds an implicit
      @@     state local to itself. The output state tensor provided by the
      @@     model in 'output_name' field of the current inference request will
      @@     be transferred as an input tensor named 'input_name' in the next
      @@     request of the same sequence. The input state of the first request
      @@     in the sequence contains garbage data.
      @@
       
      repeated .inference.ModelSequenceBatching.State state = 5;
    • getState

      @@  .. cpp:var:: State state (repeated)
      @@
      @@     The optional state that can be stored in Triton for performing
      @@     inference requests on a sequence. Each sequence holds an implicit
      @@     state local to itself. The output state tensor provided by the
      @@     model in 'output_name' field of the current inference request will
      @@     be transferred as an input tensor named 'input_name' in the next
      @@     request of the same sequence. The input state of the first request
      @@     in the sequence contains garbage data.
      @@
       
      repeated .inference.ModelSequenceBatching.State state = 5;
    • getStateCount

      int getStateCount()
      @@  .. cpp:var:: State state (repeated)
      @@
      @@     The optional state that can be stored in Triton for performing
      @@     inference requests on a sequence. Each sequence holds an implicit
      @@     state local to itself. The output state tensor provided by the
      @@     model in 'output_name' field of the current inference request will
      @@     be transferred as an input tensor named 'input_name' in the next
      @@     request of the same sequence. The input state of the first request
      @@     in the sequence contains garbage data.
      @@
       
      repeated .inference.ModelSequenceBatching.State state = 5;
    • getStateOrBuilderList

      @@  .. cpp:var:: State state (repeated)
      @@
      @@     The optional state that can be stored in Triton for performing
      @@     inference requests on a sequence. Each sequence holds an implicit
      @@     state local to itself. The output state tensor provided by the
      @@     model in 'output_name' field of the current inference request will
      @@     be transferred as an input tensor named 'input_name' in the next
      @@     request of the same sequence. The input state of the first request
      @@     in the sequence contains garbage data.
      @@
       
      repeated .inference.ModelSequenceBatching.State state = 5;
    • getStateOrBuilder

      @@  .. cpp:var:: State state (repeated)
      @@
      @@     The optional state that can be stored in Triton for performing
      @@     inference requests on a sequence. Each sequence holds an implicit
      @@     state local to itself. The output state tensor provided by the
      @@     model in 'output_name' field of the current inference request will
      @@     be transferred as an input tensor named 'input_name' in the next
      @@     request of the same sequence. The input state of the first request
      @@     in the sequence contains garbage data.
      @@
       
      repeated .inference.ModelSequenceBatching.State state = 5;
    • getIterativeSequence

      boolean getIterativeSequence()
      @@  .. cpp:var:: bool iterative_sequence
      @@
      @@     Requests for iterative sequences are processed over a number
      @@     of iterations. An iterative sequence is initiated by a single
      @@     request and is "rescheduled" by the model until completion.
      @@     Requests for inflight requests will be batched together
      @@     and can complete independently. Note this feature
      @@     requires backend support. Default value is false.
       
      bool iterative_sequence = 6;
      Returns:
      The iterativeSequence.
    • getStrategyChoiceCase