Package inference
Interface ModelConfigOuterClass.ModelDynamicBatchingOrBuilder
- All Superinterfaces:
com.google.protobuf.MessageLiteOrBuilder,com.google.protobuf.MessageOrBuilder
- All Known Implementing Classes:
ModelConfigOuterClass.ModelDynamicBatching,ModelConfigOuterClass.ModelDynamicBatching.Builder
- Enclosing class:
ModelConfigOuterClass
public static interface ModelConfigOuterClass.ModelDynamicBatchingOrBuilder
extends com.google.protobuf.MessageOrBuilder
-
Method Summary
Modifier and TypeMethodDescriptionbooleancontainsPriorityQueuePolicy(long key) @@ ..long@@ ..@@ ..@@ ..long@@ ..intgetPreferredBatchSize(int index) @@ ..int@@ ..@@ ..boolean@@ ..long@@ ..Deprecated.int@@ ..@@ ..getPriorityQueuePolicyOrDefault(long key, ModelConfigOuterClass.ModelQueuePolicy defaultValue) @@ ..getPriorityQueuePolicyOrThrow(long key) @@ ..boolean@@ ..Methods inherited from interface com.google.protobuf.MessageLiteOrBuilder
isInitializedMethods inherited from interface com.google.protobuf.MessageOrBuilder
findInitializationErrors, getAllFields, getDefaultInstanceForType, getDescriptorForType, getField, getInitializationErrorString, getOneofFieldDescriptor, getRepeatedField, getRepeatedFieldCount, getUnknownFields, hasField, hasOneof
-
Method Details
-
getPreferredBatchSizeList
@@ .. cpp:var:: int32 preferred_batch_size (repeated) @@ @@ Preferred batch sizes for dynamic batching. If a batch of one of @@ these sizes can be formed it will be executed immediately. If @@ not specified a preferred batch size will be chosen automatically @@ based on model and GPU characteristics. @@
repeated int32 preferred_batch_size = 1;- Returns:
- A list containing the preferredBatchSize.
-
getPreferredBatchSizeCount
int getPreferredBatchSizeCount()@@ .. cpp:var:: int32 preferred_batch_size (repeated) @@ @@ Preferred batch sizes for dynamic batching. If a batch of one of @@ these sizes can be formed it will be executed immediately. If @@ not specified a preferred batch size will be chosen automatically @@ based on model and GPU characteristics. @@
repeated int32 preferred_batch_size = 1;- Returns:
- The count of preferredBatchSize.
-
getPreferredBatchSize
int getPreferredBatchSize(int index) @@ .. cpp:var:: int32 preferred_batch_size (repeated) @@ @@ Preferred batch sizes for dynamic batching. If a batch of one of @@ these sizes can be formed it will be executed immediately. If @@ not specified a preferred batch size will be chosen automatically @@ based on model and GPU characteristics. @@
repeated int32 preferred_batch_size = 1;- Parameters:
index- The index of the element to return.- Returns:
- The preferredBatchSize at the given index.
-
getMaxQueueDelayMicroseconds
long getMaxQueueDelayMicroseconds()@@ .. cpp:var:: uint64 max_queue_delay_microseconds @@ @@ The maximum time, in microseconds, a request will be delayed in @@ the scheduling queue to wait for additional requests for @@ batching. Default is 0. @@
uint64 max_queue_delay_microseconds = 2;- Returns:
- The maxQueueDelayMicroseconds.
-
getPreserveOrdering
boolean getPreserveOrdering()@@ .. cpp:var:: bool preserve_ordering @@ @@ Should the dynamic batcher preserve the ordering of responses to @@ match the order of requests received by the scheduler. Default is @@ false. If true, the responses will be returned in the same order as @@ the order of requests sent to the scheduler. If false, the responses @@ may be returned in arbitrary order. This option is specifically @@ needed when a sequence of related inference requests (i.e. inference @@ requests with the same correlation ID) are sent to the dynamic @@ batcher to ensure that the sequence responses are in the correct @@ order. @@
bool preserve_ordering = 3;- Returns:
- The preserveOrdering.
-
getPriorityLevels
long getPriorityLevels()@@ .. cpp:var:: uint64 priority_levels @@ @@ The number of priority levels to be enabled for the model, @@ the priority level starts from 1 and 1 is the highest priority. @@ Requests are handled in priority order with all priority 1 requests @@ processed before priority 2, all priority 2 requests processed before @@ priority 3, etc. Requests with the same priority level will be @@ handled in the order that they are received. @@
uint64 priority_levels = 4;- Returns:
- The priorityLevels.
-
getDefaultPriorityLevel
long getDefaultPriorityLevel()@@ .. cpp:var:: uint64 default_priority_level @@ @@ The priority level used for requests that don't specify their @@ priority. The value must be in the range [ 1, 'priority_levels' ]. @@
uint64 default_priority_level = 5;- Returns:
- The defaultPriorityLevel.
-
hasDefaultQueuePolicy
boolean hasDefaultQueuePolicy()@@ .. cpp:var:: ModelQueuePolicy default_queue_policy @@ @@ The default queue policy used for requests that don't require @@ priority handling and requests that specify priority levels where @@ there is no specific policy given. If not specified, a policy with @@ default field values will be used. @@
.inference.ModelQueuePolicy default_queue_policy = 6;- Returns:
- Whether the defaultQueuePolicy field is set.
-
getDefaultQueuePolicy
ModelConfigOuterClass.ModelQueuePolicy getDefaultQueuePolicy()@@ .. cpp:var:: ModelQueuePolicy default_queue_policy @@ @@ The default queue policy used for requests that don't require @@ priority handling and requests that specify priority levels where @@ there is no specific policy given. If not specified, a policy with @@ default field values will be used. @@
.inference.ModelQueuePolicy default_queue_policy = 6;- Returns:
- The defaultQueuePolicy.
-
getDefaultQueuePolicyOrBuilder
ModelConfigOuterClass.ModelQueuePolicyOrBuilder getDefaultQueuePolicyOrBuilder()@@ .. cpp:var:: ModelQueuePolicy default_queue_policy @@ @@ The default queue policy used for requests that don't require @@ priority handling and requests that specify priority levels where @@ there is no specific policy given. If not specified, a policy with @@ default field values will be used. @@
.inference.ModelQueuePolicy default_queue_policy = 6; -
getPriorityQueuePolicyCount
int getPriorityQueuePolicyCount()@@ .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy @@ @@ Specify the queue policy for the priority level. The default queue @@ policy will be used if a priority level doesn't specify a queue @@ policy. @@
map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7; -
containsPriorityQueuePolicy
boolean containsPriorityQueuePolicy(long key) @@ .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy @@ @@ Specify the queue policy for the priority level. The default queue @@ policy will be used if a priority level doesn't specify a queue @@ policy. @@
map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7; -
getPriorityQueuePolicy
Deprecated.UsegetPriorityQueuePolicyMap()instead. -
getPriorityQueuePolicyMap
Map<Long,ModelConfigOuterClass.ModelQueuePolicy> getPriorityQueuePolicyMap()@@ .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy @@ @@ Specify the queue policy for the priority level. The default queue @@ policy will be used if a priority level doesn't specify a queue @@ policy. @@
map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7; -
getPriorityQueuePolicyOrDefault
ModelConfigOuterClass.ModelQueuePolicy getPriorityQueuePolicyOrDefault(long key, ModelConfigOuterClass.ModelQueuePolicy defaultValue) @@ .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy @@ @@ Specify the queue policy for the priority level. The default queue @@ policy will be used if a priority level doesn't specify a queue @@ policy. @@
map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7; -
getPriorityQueuePolicyOrThrow
@@ .. cpp:var:: map<uint64, ModelQueuePolicy> priority_queue_policy @@ @@ Specify the queue policy for the priority level. The default queue @@ policy will be used if a priority level doesn't specify a queue @@ policy. @@
map<uint64, .inference.ModelQueuePolicy> priority_queue_policy = 7;
-