Package inference

Interface ModelConfigOuterClass.ModelEnsemblingOrBuilder

All Superinterfaces:
com.google.protobuf.MessageLiteOrBuilder, com.google.protobuf.MessageOrBuilder
All Known Implementing Classes:
ModelConfigOuterClass.ModelEnsembling, ModelConfigOuterClass.ModelEnsembling.Builder
Enclosing class:
ModelConfigOuterClass

public static interface ModelConfigOuterClass.ModelEnsemblingOrBuilder extends com.google.protobuf.MessageOrBuilder
  • Method Details

    • getStepList

      @@  .. cpp:var:: Step step (repeated)
      @@
      @@     The models and the input / output mappings used within the ensemble.
      @@
       
      repeated .inference.ModelEnsembling.Step step = 1;
    • getStep

      @@  .. cpp:var:: Step step (repeated)
      @@
      @@     The models and the input / output mappings used within the ensemble.
      @@
       
      repeated .inference.ModelEnsembling.Step step = 1;
    • getStepCount

      int getStepCount()
      @@  .. cpp:var:: Step step (repeated)
      @@
      @@     The models and the input / output mappings used within the ensemble.
      @@
       
      repeated .inference.ModelEnsembling.Step step = 1;
    • getStepOrBuilderList

      @@  .. cpp:var:: Step step (repeated)
      @@
      @@     The models and the input / output mappings used within the ensemble.
      @@
       
      repeated .inference.ModelEnsembling.Step step = 1;
    • getStepOrBuilder

      @@  .. cpp:var:: Step step (repeated)
      @@
      @@     The models and the input / output mappings used within the ensemble.
      @@
       
      repeated .inference.ModelEnsembling.Step step = 1;
    • getMaxInflightRequests

      int getMaxInflightRequests()
      @@  .. cpp:var:: uint32 max_inflight_requests
      @@
      @@     The maximum number of concurrent inflight requests allowed at each
      @@     ensemble step per inference request. This limit prevents unbounded
      @@     memory growth when ensemble steps produce responses faster than
      @@     downstream steps can consume, e.g. decoupled models.
      @@     Default value is 0, which indicates that no limit is enforced.
      @@
      @@     Note: Applying this limit may block upstream steps while they wait
      @@     for downstream capacity. This blocking does not cancel or internally
      @@     time out intermediate requests, but clients may experience increased
      @@     end-to-end latency.
      @@
       
      uint32 max_inflight_requests = 2;
      Returns:
      The maxInflightRequests.