Skip to content

Commit a88151d

Browse files
Add change to inform caller about model loading failures due to environment constrains violated (#2180)
* Add change to inform caller about model loading failures due to environment constrains violated * Add initial tests and fix mistake * Add tests and bump dependencies in inference
1 parent 57b58c0 commit a88151d

File tree

24 files changed

+716
-50
lines changed

24 files changed

+716
-50
lines changed

inference/core/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,10 @@ class CannotInitialiseModelError(Exception):
215215
pass
216216

217217

218+
class CannotInitialiseModelDueToInputSizeError(CannotInitialiseModelError):
219+
pass
220+
221+
218222
class RetryRequestError(Exception):
219223

220224
def __init__(self, message: str, inner_error: Exception):

inference/core/interfaces/http/error_handlers.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from inference.core import logger
66
from inference.core.entities.responses.workflows import WorkflowErrorResponse
77
from inference.core.exceptions import (
8+
CannotInitialiseModelDueToInputSizeError,
89
ContentTypeInvalid,
910
ContentTypeMissing,
1011
CreditsExceededError,
@@ -59,6 +60,7 @@
5960
NotSupportedExecutionEngineError,
6061
ReferenceTypeError,
6162
RuntimeInputError,
63+
RuntimeLimitsCausedStepExecutionError,
6264
StepExecutionError,
6365
StepInputDimensionalityError,
6466
WorkflowBlockError,
@@ -77,7 +79,9 @@
7779
ModelInputError,
7880
ModelLoadingError,
7981
ModelNotFoundError,
82+
ModelPackageAlternativesExhaustedError,
8083
ModelPackageNegotiationError,
84+
ModelPackageRestrictedError,
8185
ModelRetrievalError,
8286
UnauthorizedModelAccessError,
8387
UntrustedFileError,
@@ -309,6 +313,40 @@ def wrapped_route(*args, **kwargs):
309313
resp = JSONResponse(
310314
status_code=500, content={"message": "Model package is broken."}
311315
)
316+
except (
317+
CannotInitialiseModelDueToInputSizeError,
318+
ModelPackageRestrictedError,
319+
) as error:
320+
logger.exception("%s: %s", type(error).__name__, error)
321+
resp = JSONResponse(
322+
status_code=507,
323+
content={
324+
"message": "Model loading failed due to restrictions of server configuration - "
325+
"usually due to excessive runtime memory requirement of the model (for instance "
326+
"caused by large input size).",
327+
},
328+
)
329+
except ModelPackageAlternativesExhaustedError as error:
330+
logger.exception("%s: %s", type(error).__name__, error)
331+
inner_errors = error.alternatives_errors or []
332+
if any(isinstance(e, ModelPackageRestrictedError) for e in inner_errors):
333+
resp = JSONResponse(
334+
status_code=507,
335+
content={
336+
"message": "Model loading failed due to restrictions of server configuration - "
337+
"usually due to excessive runtime memory requirement of the model (for instance "
338+
"caused by large input size).",
339+
"help_url": error.help_url,
340+
},
341+
)
342+
else:
343+
resp = JSONResponse(
344+
status_code=500,
345+
content={
346+
"message": f"Model loading failed: {error}",
347+
"help_url": error.help_url,
348+
},
349+
)
312350
except ModelLoadingError as error:
313351
logger.exception("%s: %s", type(error).__name__, error)
314352
resp = JSONResponse(
@@ -389,7 +427,10 @@ def wrapped_route(*args, **kwargs):
389427
"message": "Timeout when attempting to connect to Roboflow API."
390428
},
391429
)
392-
except ClientCausedStepExecutionError as error:
430+
except (
431+
ClientCausedStepExecutionError,
432+
RuntimeLimitsCausedStepExecutionError,
433+
) as error:
393434
logger.exception("%s: %s", type(error).__name__, error)
394435
content = WorkflowErrorResponse(
395436
message=str(error.public_message),
@@ -711,6 +752,40 @@ async def wrapped_route(*args, **kwargs):
711752
resp = JSONResponse(
712753
status_code=500, content={"message": "Model package is broken."}
713754
)
755+
except (
756+
CannotInitialiseModelDueToInputSizeError,
757+
ModelPackageRestrictedError,
758+
) as error:
759+
logger.exception("%s: %s", type(error).__name__, error)
760+
resp = JSONResponse(
761+
status_code=507,
762+
content={
763+
"message": "Model loading failed due to restrictions of server configuration - "
764+
"usually due to excessive runtime memory requirement of the model (for instance "
765+
"caused by large input size).",
766+
},
767+
)
768+
except ModelPackageAlternativesExhaustedError as error:
769+
logger.exception("%s: %s", type(error).__name__, error)
770+
inner_errors = error.alternatives_errors or []
771+
if any(isinstance(e, ModelPackageRestrictedError) for e in inner_errors):
772+
resp = JSONResponse(
773+
status_code=507,
774+
content={
775+
"message": "Model loading failed due to restrictions of server configuration - "
776+
"usually due to excessive runtime memory requirement of the model (for instance "
777+
"caused by large input size).",
778+
"help_url": error.help_url,
779+
},
780+
)
781+
else:
782+
resp = JSONResponse(
783+
status_code=500,
784+
content={
785+
"message": f"Model loading failed: {error}",
786+
"help_url": error.help_url,
787+
},
788+
)
714789
except ModelLoadingError as error:
715790
logger.exception("%s: %s", type(error).__name__, error)
716791
resp = JSONResponse(
@@ -791,7 +866,10 @@ async def wrapped_route(*args, **kwargs):
791866
"message": "Timeout when attempting to connect to Roboflow API."
792867
},
793868
)
794-
except ClientCausedStepExecutionError as error:
869+
except (
870+
ClientCausedStepExecutionError,
871+
RuntimeLimitsCausedStepExecutionError,
872+
) as error:
795873
logger.exception("%s: %s", type(error).__name__, error)
796874
content = WorkflowErrorResponse(
797875
message=str(error.public_message),

inference/core/workflows/errors.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,22 @@ def __init__(
189189
self.status_code = status_code
190190

191191

192+
class RuntimeLimitsCausedStepExecutionError(WorkflowExecutionEngineError):
193+
def __init__(
194+
self,
195+
block_id: str,
196+
status_code: int,
197+
public_message: str,
198+
context: str,
199+
inner_error: Optional[Exception] = None,
200+
):
201+
super().__init__(
202+
public_message=public_message, context=context, inner_error=inner_error
203+
)
204+
self.block_id = block_id
205+
self.status_code = status_code
206+
207+
192208
class ExecutionEngineRuntimeError(WorkflowExecutionEngineError):
193209
pass
194210

inference/core/workflows/execution_engine/v1/step_error_handlers.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from inference.core.exceptions import (
2+
CannotInitialiseModelDueToInputSizeError,
23
InferenceModelNotFound,
34
InvalidModelIDError,
45
ModelManagerLockAcquisitionError,
@@ -7,8 +8,16 @@
78
RoboflowAPINotAuthorizedError,
89
RoboflowAPINotNotFoundError,
910
)
10-
from inference.core.workflows.errors import ClientCausedStepExecutionError
11-
from inference_models.errors import ModelNotFoundError, UnauthorizedModelAccessError
11+
from inference.core.workflows.errors import (
12+
ClientCausedStepExecutionError,
13+
RuntimeLimitsCausedStepExecutionError,
14+
)
15+
from inference_models.errors import (
16+
ModelNotFoundError,
17+
ModelPackageAlternativesExhaustedError,
18+
ModelPackageRestrictedError,
19+
UnauthorizedModelAccessError,
20+
)
1221
from inference_sdk.http.errors import HTTPCallErrorError
1322

1423

@@ -27,6 +36,39 @@ def extended_roboflow_errors_handler(step_name: str, error: Exception) -> None:
2736
),
2837
):
2938
raise error
39+
if isinstance(error, CannotInitialiseModelDueToInputSizeError):
40+
raise RuntimeLimitsCausedStepExecutionError(
41+
block_id=step_name,
42+
status_code=507,
43+
public_message=f"Could not complete workflow execution due to configured runtime constraints. "
44+
f"Details: model input size causes runtime memory requirements exceed the limit "
45+
f"configured for the environment.",
46+
context="workflow_execution | step_execution",
47+
inner_error=error,
48+
) from error
49+
if isinstance(error, ModelPackageRestrictedError):
50+
raise RuntimeLimitsCausedStepExecutionError(
51+
block_id=step_name,
52+
status_code=507,
53+
public_message="Model loading failed due to restrictions of server configuration - "
54+
"usually due to excessive runtime memory requirement of the model (for instance "
55+
"caused by large input size).",
56+
context="workflow_execution | step_execution",
57+
inner_error=error,
58+
) from error
59+
if isinstance(error, ModelPackageAlternativesExhaustedError) and any(
60+
isinstance(e, ModelPackageRestrictedError)
61+
for e in (error.alternatives_errors or [])
62+
):
63+
raise RuntimeLimitsCausedStepExecutionError(
64+
block_id=step_name,
65+
status_code=507,
66+
public_message="Model loading failed due to restrictions of server configuration - "
67+
"usually due to excessive runtime memory requirement of the model (for instance "
68+
"caused by large input size).",
69+
context="workflow_execution | step_execution",
70+
inner_error=error,
71+
) from error
3072
if isinstance(error, InvalidModelIDError):
3173
raise ClientCausedStepExecutionError(
3274
block_id=step_name,
@@ -117,4 +159,13 @@ def extended_roboflow_errors_handler(step_name: str, error: Exception) -> None:
117159
context="workflow_execution | step_execution",
118160
inner_error=error,
119161
) from error
162+
if error.status_code == 507:
163+
raise RuntimeLimitsCausedStepExecutionError(
164+
block_id=step_name,
165+
status_code=507,
166+
public_message=f"Could not complete workflow execution due to configured runtime constraints. "
167+
f"Details: {error.api_message}",
168+
context="workflow_execution | step_execution",
169+
inner_error=error,
170+
) from error
120171
return None

inference/models/rfdetr/rfdetr.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
USE_PYTORCH_FOR_PREPROCESSING,
2323
)
2424
from inference.core.exceptions import (
25-
CannotInitialiseModelError,
25+
CannotInitialiseModelDueToInputSizeError,
2626
ModelArtefactError,
2727
OnnxProviderNotAvailable,
2828
)
@@ -450,7 +450,9 @@ def initialize_model(self, **kwargs) -> None:
450450
input_resolution,
451451
RFDETR_ONNX_MAX_RESOLUTION,
452452
)
453-
raise CannotInitialiseModelError(f"Resolution too high for RFDETR")
453+
raise CannotInitialiseModelDueToInputSizeError(
454+
f"Resolution too high for RFDETR"
455+
)
454456

455457
logger.debug("Creating inference session")
456458
if self.load_weights or not self.has_model_metadata:

inference_models/docs/changelog.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
# Changelog
22

3+
## `0.24.1`
4+
5+
### Changed
6+
7+
- Added optional field `alternatives_errors` to `ModelPackageAlternativesExhaustedError`, making it possible
8+
to report to the caller what types of errors happened during the load - making it possible to deduce if
9+
problem with loading is recoverable.
10+
11+
---
12+
313
## `0.24.0`
414

515
### Added

inference_models/inference_models/errors.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Optional
1+
from typing import List, Optional
22

33

44
class BaseInferenceModelsError(Exception):
@@ -106,7 +106,19 @@ class ModelLoadingError(BaseInferenceModelsError):
106106

107107

108108
class ModelPackageAlternativesExhaustedError(ModelLoadingError):
109-
pass
109+
110+
def __init__(
111+
self,
112+
message: str,
113+
help_url: Optional[str] = None,
114+
alternatives_errors: Optional[List[Exception]] = None,
115+
):
116+
super().__init__(message, help_url)
117+
self._alternatives_errors = alternatives_errors
118+
119+
@property
120+
def alternatives_errors(self) -> Optional[List[Exception]]:
121+
return self._alternatives_errors
110122

111123

112124
class MissingModelInitParameterError(ModelLoadingError):

inference_models/inference_models/models/auto_loaders/core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,6 +1203,7 @@ def attempt_loading_matching_model_packages(
12031203
f"the event. https://github.com/roboflow/inference/issues\n\n"
12041204
f"Here is the summary of errors for specific model packages:\n{summary_of_errors}\n\n",
12051205
help_url="https://inference-models.roboflow.com/errors/model-loading/#modelpackagealternativesexhaustederror",
1206+
alternatives_errors=[summary[1] for summary in failed_load_attempts],
12061207
)
12071208

12081209

inference_models/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "inference-models"
3-
version = "0.24.0"
3+
version = "0.24.1"
44
description = "The new inference engine for Computer Vision models"
55
readme = "README.md"
66
requires-python = ">=3.10,<3.13"

inference_models/uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)