roboflow
diff --git a/‎inference/core/entities/requests/inference.py‎
Lines changed: 7 additions & 2 deletions b/‎inference/core/entities/requests/inference.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎inference/core/models/inference_models_adapters.py‎
Lines changed: 51 additions & 25 deletions b/‎inference/core/models/inference_models_adapters.py‎
Lines changed: 51 additions & 25 deletions
diff --git a/‎inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py‎
Lines changed: 7 additions & 3 deletions b/‎inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v2.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py‎
Lines changed: 10 additions & 3 deletions b/‎inference/core/workflows/core_steps/models/roboflow/keypoint_detection/v2.py‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py‎
Lines changed: 7 additions & 3 deletions b/‎inference/core/workflows/core_steps/models/roboflow/object_detection/v2.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎inference_models/inference_models/models/auto_loaders/auto_resolution_cache.py‎
Lines changed: 6 additions & 1 deletion b/‎inference_models/inference_models/models/auto_loaders/auto_resolution_cache.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎inference_models/inference_models/models/auto_loaders/core.py‎
Lines changed: 21 additions & 0 deletions b/‎inference_models/inference_models/models/auto_loaders/core.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎inference_models/inference_models/models/auto_loaders/entities.py‎
Lines changed: 1 addition & 12 deletions b/‎inference_models/inference_models/models/auto_loaders/entities.py‎
Lines changed: 1 addition & 12 deletions
diff --git a/‎inference_models/inference_models/models/auto_loaders/types.py‎
Lines changed: 19 additions & 0 deletions b/‎inference_models/inference_models/models/auto_loaders/types.py‎
Lines changed: 19 additions & 0 deletions
@@ -146,9 +146,14 @@ class ObjectDetectionInferenceRequest(CVInferenceRequest):
         description="If provided, only predictions for the listed classes will be returned",
     )
     confidence: Optional[float] = Field(
-        default=0.4,
+        default=None,
         examples=[0.5],
-        description="The confidence threshold used to filter out predictions",
+        description=(
+            "The confidence threshold used to filter out predictions. If omitted, "
+            "the server uses the model's F1-optimal threshold from model evaluation "
+            "when available, otherwise falls back to 0.4. Pass an explicit value to "
+            "override both."
+        ),
     )
     fix_batch_size: Optional[bool] = Field(
         default=False,
 
@@ -62,6 +62,7 @@
 )
 from inference_models.models.base.types import PreprocessingMetadata
 
+
 DEFAULT_COLOR_PALETTE = [
     "#A351FB",
     "#FF4040",
@@ -154,7 +155,10 @@ def postprocess(
         **kwargs,
     ) -> List[ObjectDetectionInferenceResponse]:
         mapped_kwargs = self.map_inference_kwargs(kwargs)
-        detections_list = self._model.post_process(
+        # The model owns the recommendedParameters priority chain (user → per-class
+        # → global → default) and per-class refinement. The adapter just passes
+        # the user's confidence kwarg through unchanged.
+        detections_list = self._model.post_process_with_confidence_filter(
             predictions, preprocess_return_metadata, **mapped_kwargs
         )
 
@@ -305,7 +309,8 @@ def postprocess(
         **kwargs,
     ) -> List[InstanceSegmentationInferenceResponse]:
         mapped_kwargs = self.map_inference_kwargs(kwargs)
-        detections_list = self._model.post_process(
+        # See OD adapter — the model owns the recommendedParameters filter chain.
+        detections_list = self._model.post_process_with_confidence_filter(
             predictions, preprocess_return_metadata, **mapped_kwargs
         )
 
@@ -465,7 +470,8 @@ def postprocess(
         **kwargs,
     ) -> List[KeypointsDetectionInferenceResponse]:
         mapped_kwargs = self.map_inference_kwargs(kwargs)
-        keypoints_list, detections_list = self._model.post_process(
+        # See OD adapter — the model owns the recommendedParameters filter chain.
+        keypoints_list, detections_list = self._model.post_process_with_confidence_filter(
             predictions, preprocess_return_metadata, **mapped_kwargs
         )
         if detections_list is None:
@@ -677,25 +683,36 @@ def postprocess(
         List[ClassificationInferenceResponse],
     ]:
         mapped_kwargs = self.map_inference_kwargs(kwargs)
-        post_processed_predictions = self._model.post_process(
-            predictions, **mapped_kwargs
-        )
-        if isinstance(post_processed_predictions, list):
-            # multi-label classification
-            return prepare_multi_label_classification_response(
-                post_processed_predictions,
-                image_sizes=returned_metadata,
-                class_names=self.class_names,
-                confidence_threshold=kwargs.get("confidence", 0.5),
+        if isinstance(self._model, MultiLabelClassificationModel):
+            # Model owns the recommendedParameters filter chain — its
+            # post_process_with_confidence_filter applies per-class refinement
+            # to `class_ids`. The response builder reads `class_ids` directly
+            # rather than re-thresholding the full confidence vector, so the
+            # per-class decision makes it through to the API response.
+            post_processed_predictions = (
+                self._model.post_process_with_confidence_filter(
+                    predictions, **mapped_kwargs
+                )
             )
-        else:
-            # single-label classification
-            return prepare_classification_response(
+            return prepare_multi_label_classification_response(
                 post_processed_predictions,
                 image_sizes=returned_metadata,
                 class_names=self.class_names,
-                confidence_threshold=kwargs.get("confidence", 0.5),
             )
+        # Single-label classification: top-1 always wins regardless of
+        # confidence, so per-class refinement isn't meaningful here. The base
+        # class deliberately opts out of recommendedParameters entirely. The
+        # response builder still uses kwargs.get("confidence", 0.5) for the
+        # cutoff that decides which alternative classes show up.
+        post_processed_predictions = self._model.post_process(
+            predictions, **mapped_kwargs
+        )
+        return prepare_classification_response(
+            post_processed_predictions,
+            image_sizes=returned_metadata,
+            class_names=self.class_names,
+            confidence_threshold=kwargs.get("confidence", 0.5),
+        )
 
     def clear_cache(self, delete_from_disk: bool = True) -> None:
         """Clears any cache if necessary. TODO: Implement this to delete the cache from the experimental model.
@@ -747,20 +764,29 @@ def prepare_multi_label_classification_response(
     post_processed_predictions: List[MultiLabelClassificationPrediction],
     image_sizes: List[Tuple[int, int]],
     class_names: List[str],
-    confidence_threshold: float,
 ) -> List[MultiLabelClassificationInferenceResponse]:
+    """Build the API response from a model's post-processed predictions.
+
+    `prediction.class_ids` is the authoritative list of "passed" classes —
+    the model's `post_process_with_confidence_filter` already applied the
+    full priority chain (user → per-class → global → default), so the
+    response builder doesn't re-threshold here. The full per-class score
+    vector is still emitted in `image_predictions_dict` for UI display.
+    """
     results = []
     for prediction, image_size in zip(post_processed_predictions, image_sizes):
-        image_predictions_dict = dict()
-        predicted_classes = []
-        for class_id, confidence in enumerate(prediction.confidence.cpu().tolist()):
-            cls_name = class_names[class_id]
-            image_predictions_dict[cls_name] = {
+        image_predictions_dict = {
+            class_names[class_id]: {
                 "confidence": confidence,
                 "class_id": class_id,
             }
-            if confidence > confidence_threshold:
-                predicted_classes.append(cls_name)
+            for class_id, confidence in enumerate(
+                prediction.confidence.cpu().tolist()
+            )
+        }
+        predicted_classes = [
+            class_names[class_id] for class_id in prediction.class_ids.tolist()
+        ]
         results.append(
             MultiLabelClassificationInferenceResponse(
                 predictions=image_predictions_dict,
 
@@ -85,11 +85,15 @@ class BlockManifest(WorkflowBlockManifest):
     images: Selector(kind=[IMAGE_KIND]) = ImageInputField
     model_id: Union[Selector(kind=[ROBOFLOW_MODEL_ID_KIND]), str] = RoboflowModelField
     confidence: Union[
-        FloatZeroToOne,
+        Optional[FloatZeroToOne],
         Selector(kind=[FLOAT_ZERO_TO_ONE_KIND]),
     ] = Field(
-        default=0.4,
-        description="Confidence threshold for predictions.",
+        default=None,
+        description=(
+            "Confidence threshold for predictions. If omitted, the inference "
+            "server uses the model's F1-optimal threshold from model evaluation "
+            "when available, otherwise falls back to 0.4."
+        ),
         examples=[0.3, "$inputs.confidence_threshold"],
     )
     class_filter: Union[Optional[List[str]], Selector(kind=[LIST_OF_VALUES_KIND])] = (
 
@@ -84,11 +84,18 @@ class BlockManifest(WorkflowBlockManifest):
     images: Selector(kind=[IMAGE_KIND]) = ImageInputField
     model_id: Union[Selector(kind=[ROBOFLOW_MODEL_ID_KIND]), str] = RoboflowModelField
     confidence: Union[
-        FloatZeroToOne,
+        Optional[FloatZeroToOne],
         Selector(kind=[FLOAT_ZERO_TO_ONE_KIND]),
     ] = Field(
-        default=0.4,
-        description="Confidence threshold for predictions.",
+        default=None,
+        description=(
+            "Per-instance confidence threshold for predictions. If omitted, the "
+            "inference server uses the model's F1-optimal threshold from model "
+            "evaluation when available, otherwise falls back to 0.4. Note that "
+            "this filters which detected instances (e.g. people, animals) are "
+            "returned at all — separately, `keypoint_confidence` filters which "
+            "individual joints within an accepted instance are marked visible."
+        ),
         examples=[0.3, "$inputs.confidence_threshold"],
     )
     keypoint_confidence: Union[
 
@@ -82,11 +82,15 @@ class BlockManifest(WorkflowBlockManifest):
     images: Selector(kind=[IMAGE_KIND]) = ImageInputField
     model_id: Union[Selector(kind=[ROBOFLOW_MODEL_ID_KIND]), str] = RoboflowModelField
     confidence: Union[
-        FloatZeroToOne,
+        Optional[FloatZeroToOne],
         Selector(kind=[FLOAT_ZERO_TO_ONE_KIND]),
     ] = Field(
-        default=0.4,
-        description="Confidence threshold for predictions.",
+        default=None,
+        description=(
+            "Confidence threshold for predictions. If omitted, the inference "
+            "server uses the model's F1-optimal threshold from model evaluation "
+            "when available, otherwise falls back to 0.4."
+        ),
         examples=[0.3, "$inputs.confidence_threshold"],
     )
     class_filter: Union[Optional[List[str]], Selector(kind=[LIST_OF_VALUES_KIND])] = (
 
@@ -17,7 +17,10 @@
     TaskType,
 )
 from inference_models.utils.file_system import dump_json, read_json
-from inference_models.weights_providers.entities import ModelDependency
+from inference_models.weights_providers.entities import (
+    ModelDependency,
+    RecommendedParameters,
+)
 
 
 class AutoResolutionCacheEntry(BaseModel):
@@ -30,6 +33,8 @@ class AutoResolutionCacheEntry(BaseModel):
     model_dependencies: Optional[List[ModelDependency]] = Field(default=None)
     created_at: datetime
     model_features: Optional[dict] = Field(default=None)
+    # Cached so auto-load cache hits don't need to re-fetch model metadata.
+    recommended_parameters: Optional[RecommendedParameters] = Field(default=None)
 
 
 class AutoResolutionCache(ABC):
 
@@ -81,6 +81,7 @@
     ModelDependency,
     ModelPackageMetadata,
     Quantization,
+    RecommendedParameters,
 )
 
 MODEL_TYPES_TO_LOAD_FROM_CHECKPOINT = {
@@ -926,6 +927,7 @@ def model_directory_pointer(model_dir: str) -> None:
                 model_dependencies=model_metadata.model_dependencies,
                 model_dependencies_instances=model_dependencies_instances,
                 model_dependencies_directories=model_dependencies_directories,
+                recommended_parameters=model_metadata.recommended_parameters,
                 max_package_loading_attempts=max_package_loading_attempts,
                 model_download_file_lock_acquire_timeout=model_download_file_lock_acquire_timeout,
                 verify_hash_while_download=verify_hash_while_download,
@@ -1081,6 +1083,11 @@ def attempt_loading_model_with_auto_load_cache(
         model = model_class.from_pretrained(
             model_package_cache_dir, **model_init_kwargs
         )
+        # See initialize_model() for the hasattr-gated injection rationale.
+        if cache_entry.recommended_parameters is not None and hasattr(
+            type(model), "recommended_parameters"
+        ):
+            model.recommended_parameters = cache_entry.recommended_parameters
         verbose_info(
             message=f"Successfully loaded model {model_name_or_path} using auto-loading cache.",
             verbose_requested=verbose,
@@ -1113,6 +1120,7 @@ def attempt_loading_matching_model_packages(
     model_dependencies: Optional[List[ModelDependency]],
     model_dependencies_instances: Dict[str, AnyModel],
     model_dependencies_directories: Dict[str, str],
+    recommended_parameters: Optional[RecommendedParameters] = None,
     max_package_loading_attempts: Optional[int] = None,
     model_download_file_lock_acquire_timeout: int = FILE_LOCK_ACQUIRE_TIMEOUT,
     verbose: bool = True,
@@ -1153,6 +1161,7 @@ def attempt_loading_matching_model_packages(
                 model_dependencies=model_dependencies,
                 model_dependencies_instances=model_dependencies_instances,
                 model_dependencies_directories=model_dependencies_directories,
+                recommended_parameters=recommended_parameters,
                 verify_hash_while_download=verify_hash_while_download,
                 download_files_without_hash=download_files_without_hash,
                 on_file_created=partial(
@@ -1218,6 +1227,7 @@ def initialize_model(
     model_dependencies: Optional[List[ModelDependency]],
     model_dependencies_instances: Dict[str, AnyModel],
     model_dependencies_directories: Dict[str, str],
+    recommended_parameters: Optional[RecommendedParameters] = None,
     model_download_file_lock_acquire_timeout: int = FILE_LOCK_ACQUIRE_TIMEOUT,
     verify_hash_while_download: bool = True,
     download_files_without_hash: bool = False,
@@ -1308,6 +1318,14 @@ def initialize_model(
     resolved_files.update(dependencies_resolved_files)
     model_init_kwargs[MODEL_DEPENDENCIES_KEY] = model_dependencies_instances
     model = model_class.from_pretrained(model_package_cache_dir, **model_init_kwargs)
+    # Inject recommended parameters onto model classes that opt in by declaring
+    # `recommended_parameters` at the class level (default = None). hasattr on
+    # `type(model)` checks the class, not instance state — so model types that
+    # don't care (single-label classification, embeddings, etc.) silently no-op.
+    if recommended_parameters is not None and hasattr(
+        type(model), "recommended_parameters"
+    ):
+        model.recommended_parameters = recommended_parameters
     dump_auto_resolution_cache(
         use_auto_resolution_cache=use_auto_resolution_cache,
         auto_resolution_cache=auto_resolution_cache,
@@ -1320,6 +1338,7 @@ def initialize_model(
         resolved_files=resolved_files,
         model_dependencies=model_dependencies,
         model_features=model_package.model_features,
+        recommended_parameters=recommended_parameters,
     )
     return model, model_package_cache_dir
 
@@ -1484,6 +1503,7 @@ def dump_auto_resolution_cache(
     resolved_files: Set[str],
     model_dependencies: Optional[List[ModelDependency]],
     model_features: Optional[dict],
+    recommended_parameters: Optional[RecommendedParameters] = None,
 ) -> None:
     if not use_auto_resolution_cache:
         return None
@@ -1497,6 +1517,7 @@ def dump_auto_resolution_cache(
         created_at=datetime.now(),
         model_dependencies=model_dependencies,
         model_features=model_features,
+        recommended_parameters=recommended_parameters,
     )
     auto_resolution_cache.register(
         auto_negotiation_hash=auto_negotiation_hash, cache_entry=cache_content
 
@@ -1,7 +1,7 @@
 from dataclasses import dataclass, field
-from enum import Enum
 from typing import Optional, Union
 
+from inference_models.models.auto_loaders.types import BackendType
 from inference_models.models.base.classification import (
     ClassificationModel,
     MultiLabelClassificationModel,
@@ -22,17 +22,6 @@
 MODEL_CONFIG_FILE_NAME = "model_config.json"
 
 
-class BackendType(str, Enum):
-    TORCH = "torch"
-    TORCH_SCRIPT = "torch-script"
-    ONNX = "onnx"
-    TRT = "trt"
-    HF = "hugging-face"
-    ULTRALYTICS = "ultralytics"
-    MEDIAPIPE = "mediapipe"
-    CUSTOM = "custom"
-
-
 AnyModel = Union[
     ClassificationModel,
     MultiLabelClassificationModel,
 
@@ -0,0 +1,19 @@
+"""
+Leaf-level auto-loader types with no model-class dependencies. Split out from
+`auto_loaders/entities.py` so that `weights_providers/entities.py` can import
+`BackendType` without pulling in the model base class tree (which would cause
+a cycle — base classes depend on `weights_providers.entities.RecommendedParameters`).
+"""
+
+from enum import Enum
+
+
+class BackendType(str, Enum):
+    TORCH = "torch"
+    TORCH_SCRIPT = "torch-script"
+    ONNX = "onnx"
+    TRT = "trt"
+    HF = "hugging-face"
+    ULTRALYTICS = "ultralytics"
+    MEDIAPIPE = "mediapipe"
+    CUSTOM = "custom"