Package inference

Interface ModelConfigOuterClass.ModelDynamicBatchingOrBuilder

All Superinterfaces:
com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder
All Known Implementing Classes:
ModelConfigOuterClass.ModelDynamicBatching, ModelConfigOuterClass.ModelDynamicBatching.Builder
Enclosing class:
ModelConfigOuterClass

public static interface ModelConfigOuterClass.ModelDynamicBatchingOrBuilder extends com.google.protobuf.MessageOrBuilder
  • Method Details

    • getPreferredBatchSizeList

      List<Integer> getPreferredBatchSizeList()
      @@  .. cpp:var:: int32 preferred_batch_size (repeated)
      @@
      @@     Preferred batch sizes for dynamic batching. If a batch of one of
      @@     these sizes can be formed it will be executed immediately.  If
      @@     not specified a preferred batch size will be chosen automatically
      @@     based on model and GPU characteristics.
      @@
       
      repeated int32 preferred_batch_size = 1;
      Returns:
      A list containing the preferredBatchSize.
    • getPreferredBatchSizeCount

      int getPreferredBatchSizeCount()
      @@  .. cpp:var:: int32 preferred_batch_size (repeated)
      @@
      @@     Preferred batch sizes for dynamic batching. If a batch of one of
      @@     these sizes can be formed it will be executed immediately.  If
      @@     not specified a preferred batch size will be chosen automatically
      @@     based on model and GPU characteristics.
      @@
       
      repeated int32 preferred_batch_size = 1;
      Returns:
      The count of preferredBatchSize.
    • getPreferredBatchSize

      int getPreferredBatchSize(int index)
      @@  .. cpp:var:: int32 preferred_batch_size (repeated)
      @@
      @@     Preferred batch sizes for dynamic batching. If a batch of one of
      @@     these sizes can be formed it will be executed immediately.  If
      @@     not specified a preferred batch size will be chosen automatically
      @@     based on model and GPU characteristics.
      @@
       
      repeated int32 preferred_batch_size = 1;
      Parameters:
      index - The index of the element to return.
      Returns:
      The preferredBatchSize at the given index.
    • getMaxQueueDelayMicroseconds

      long getMaxQueueDelayMicroseconds()
      @@  .. cpp:var:: uint64 max_queue_delay_microseconds
      @@
      @@     The maximum time, in microseconds, a request will be delayed in
      @@     the scheduling queue to wait for additional requests for
      @@     batching. Default is 0.
      @@
       
      uint64 max_queue_delay_microseconds = 2;
      Returns:
      The maxQueueDelayMicroseconds.
    • getPreserveOrdering

      boolean getPreserveOrdering()
      @@  .. cpp:var:: bool preserve_ordering
      @@
      @@     Should the dynamic batcher preserve the ordering of responses to
      @@     match the order of requests received by the scheduler. Default is
      @@     false. If true, the responses will be returned in the same order as
      @@     the order of requests sent to the scheduler. If false, the responses
      @@     may be returned in arbitrary order. This option is specifically
      @@     needed when a sequence of related inference requests (i.e. inference
      @@     requests with the same correlation ID) are sent to the dynamic
      @@     batcher to ensure that the sequence responses are in the correct
      @@     order.
      @@
       
      bool preserve_ordering = 3;
      Returns:
      The preserveOrdering.
    • getPriorityLevels

      long getPriorityLevels()
      @@  .. cpp:var:: uint64 priority_levels
      @@
      @@     The number of priority levels to be enabled for the model,
      @@     the priority level starts from 1 and 1 is the highest priority.
      @@     Requests are handled in priority order with all priority 1 requests
      @@     processed before priority 2, all priority 2 requests processed before
      @@     priority 3, etc. Requests with the same priority level will be
      @@     handled in the order that they are received.
      @@
       
      uint64 priority_levels = 4;
      Returns:
      The priorityLevels.
    • getDefaultPriorityLevel

      long getDefaultPriorityLevel()
      @@  .. cpp:var:: uint64 default_priority_level
      @@
      @@     The priority level used for requests that don't specify their
      @@     priority. The value must be in the range [ 1, 'priority_levels' ].
      @@
       
      uint64 default_priority_level = 5;
      Returns:
      The defaultPriorityLevel.
    • hasDefaultQueuePolicy

      boolean hasDefaultQueuePolicy()
      @@  .. cpp:var:: ModelQueuePolicy default_queue_policy
      @@
      @@     The default queue policy used for requests that don't require
      @@     priority handling and requests that specify priority levels where
      @@     there is no specific policy given. If not specified, a policy with
      @@     default field values will be used.
      @@
       
      .inference.ModelQueuePolicy default_queue_policy = 6;
      Returns:
      Whether the defaultQueuePolicy field is set.
    • getDefaultQueuePolicy

      @@  .. cpp:var:: ModelQueuePolicy default_queue_policy
      @@
      @@     The default queue policy used for requests that don't require
      @@     priority handling and requests that specify priority levels where
      @@     there is no specific policy given. If not specified, a policy with
      @@     default field values will be used.
      @@
       
      .inference.ModelQueuePolicy default_queue_policy = 6;
      Returns:
      The defaultQueuePolicy.
    • getDefaultQueuePolicyOrBuilder

      ModelConfigOuterClass.ModelQueuePolicyOrBuilder getDefaultQueuePolicyOrBuilder()
      @@  .. cpp:var:: ModelQueuePolicy default_queue_policy
      @@
      @@     The default queue policy used for requests that don't require
      @@     priority handling and requests that specify priority levels where
      @@     there is no specific policy given. If not specified, a policy with
      @@     default field values will be used.
      @@
       
      .inference.ModelQueuePolicy default_queue_policy = 6;
    • getPriorityQueuePolicyCount

      int getPriorityQueuePolicyCount()
      @@  .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy
      @@
      @@     Specify the queue policy for the priority level. The default queue
      @@     policy will be used if a priority level doesn't specify a queue
      @@     policy.
      @@
       
      map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7;
    • containsPriorityQueuePolicy

      boolean containsPriorityQueuePolicy(long key)
      @@  .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy
      @@
      @@     Specify the queue policy for the priority level. The default queue
      @@     policy will be used if a priority level doesn't specify a queue
      @@     policy.
      @@
       
      map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7;
    • getPriorityQueuePolicy

      Deprecated.
    • getPriorityQueuePolicyMap

      Map<Long,ModelConfigOuterClass.ModelQueuePolicy> getPriorityQueuePolicyMap()
      @@  .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy
      @@
      @@     Specify the queue policy for the priority level. The default queue
      @@     policy will be used if a priority level doesn't specify a queue
      @@     policy.
      @@
       
      map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7;
    • getPriorityQueuePolicyOrDefault

      ModelConfigOuterClass.ModelQueuePolicy getPriorityQueuePolicyOrDefault(long key, ModelConfigOuterClass.ModelQueuePolicy defaultValue)
      @@  .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy
      @@
      @@     Specify the queue policy for the priority level. The default queue
      @@     policy will be used if a priority level doesn't specify a queue
      @@     policy.
      @@
       
      map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7;
    • getPriorityQueuePolicyOrThrow

      ModelConfigOuterClass.ModelQueuePolicy getPriorityQueuePolicyOrThrow(long key)
      @@  .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy
      @@
      @@     Specify the queue policy for the priority level. The default queue
      @@     policy will be used if a priority level doesn't specify a queue
      @@     policy.
      @@
       
      map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7;