@@ -109,15 +109,15 @@ def _fix_empty_assistant_content_in_messages(messages: Any) -> None:
109109 message ["content" ] = " "
110110
111111
112- def _get_ai_gateway_base_url (
112+ def _discover_ai_gateway_host (
113113 http_client : Client ,
114114 host : str ,
115115) -> str | None :
116- """Check if AI Gateway V2 is enabled and return its base URL .
116+ """Discover the AI Gateway host URL (scheme + netloc only) .
117117
118118 Calls GET /api/ai-gateway/v2/endpoints. If successful and endpoints exist,
119- extracts the ai_gateway_url from the first endpoint response.
120- Returns None if gateway is not available.
119+ extracts the ai_gateway_url from the first endpoint response and returns
120+ just the scheme + netloc (no path). Returns None if gateway is not available.
121121 """
122122 try :
123123 response = http_client .get (f"{ host } /api/ai-gateway/v2/endpoints" )
@@ -131,24 +131,53 @@ def _get_ai_gateway_base_url(
131131 if not gateway_url :
132132 return None
133133 parsed = urlparse (gateway_url )
134- return f"{ parsed .scheme } ://{ parsed .netloc } /mlflow/v1 "
134+ return f"{ parsed .scheme } ://{ parsed .netloc } "
135135 except Exception :
136136 return None
137137
138138
139+ def _get_ai_gateway_base_url (
140+ http_client : Client ,
141+ host : str ,
142+ ) -> str | None :
143+ """Check if AI Gateway V2 is enabled and return its MLflow base URL.
144+
145+ Returns the AI Gateway base URL with /mlflow/v1 path appended, or None if
146+ the gateway is not available.
147+ """
148+ gateway_host = _discover_ai_gateway_host (http_client , host )
149+ return f"{ gateway_host } /mlflow/v1" if gateway_host else None
150+
151+
139152def _resolve_base_url (
140153 workspace_client : WorkspaceClient ,
141154 base_url : str | None ,
142155 use_ai_gateway : bool ,
143156 http_client : Client ,
157+ use_ai_gateway_native_api : bool ,
144158) -> str :
145159 """Resolve the target base URL for the OpenAI client."""
160+ if use_ai_gateway_native_api and base_url is not None :
161+ raise ValueError ("Cannot specify both 'use_ai_gateway_native_api' and 'base_url'." )
162+ if use_ai_gateway_native_api and use_ai_gateway :
163+ raise ValueError ("Cannot specify both 'use_ai_gateway_native_api' and 'use_ai_gateway'." )
164+
146165 if base_url is not None :
147166 if _DATABRICKS_APPS_DOMAIN in base_url :
148167 _validate_oauth_for_apps (workspace_client )
149168 return base_url
150169
151- # Prioritize using AI Gateway endpoints
170+ # Native provider API via AI Gateway (e.g. OpenAI-compatible /openai path)
171+ if use_ai_gateway_native_api :
172+ gateway_host = _discover_ai_gateway_host (http_client , workspace_client .config .host )
173+ if gateway_host :
174+ return f"{ gateway_host } /openai/v1"
175+ raise ValueError (
176+ "Please ensure AI Gateway V2 is enabled for the workspace "
177+ "when use_ai_gateway_native_api is set to True."
178+ )
179+
180+ # MLflow-format AI Gateway endpoints
152181 if use_ai_gateway :
153182 gateway_url = _get_ai_gateway_base_url (http_client , workspace_client .config .host )
154183 if gateway_url :
@@ -362,8 +391,12 @@ class DatabricksOpenAI(OpenAI):
362391 base_url: Optional base URL to override the default serving endpoints URL. When the URL
363392 points to a Databricks App (contains "databricksapps"), OAuth authentication is
364393 required.
394+ use_ai_gateway_native_api: If True, auto-detect AI Gateway V2 and route requests through
395+ its native OpenAI-compatible API (``<ai_gateway_url>/openai/v1``). This allows use of
396+ provider-native features not available through the MLflow API. Cannot be combined
397+ with ``base_url`` or ``use_ai_gateway``. Defaults to False.
365398 use_ai_gateway: If True, auto-detect AI Gateway V2 availability and route
366- requests through it. Defaults to False.
399+ requests through it using the MLflow API . Defaults to False.
367400
368401 Example - Query a serving or AI gateway endpoint:
369402 >>> client = DatabricksOpenAI()
@@ -372,6 +405,13 @@ class DatabricksOpenAI(OpenAI):
372405 ... messages=[{"role": "user", "content": "Hello!"}],
373406 ... )
374407
408+ Example - Query AI Gateway endpoints via the native OpenAI-compatible API:
409+ >>> client = DatabricksOpenAI(use_ai_gateway_native_api=True)
410+ >>> response = client.chat.completions.create(
411+ ... model="databricks-meta-llama-3-1-70b-instruct",
412+ ... messages=[{"role": "user", "content": "Hello!"}],
413+ ... )
414+
375415 Example - Query a Databricks App directly by URL (requires OAuth):
376416 >>> # WorkspaceClient must be configured with OAuth authentication
377417 >>> # See: https://docs.databricks.com/aws/en/dev-tools/auth/oauth-u2m.html
@@ -397,6 +437,7 @@ def __init__(
397437 self ,
398438 workspace_client : WorkspaceClient | None = None ,
399439 base_url : str | None = None ,
440+ use_ai_gateway_native_api : bool = False ,
400441 use_ai_gateway : bool = False ,
401442 ):
402443 if workspace_client is None :
@@ -405,7 +446,9 @@ def __init__(
405446 self ._workspace_client = workspace_client
406447
407448 http_client = _get_authorized_http_client (workspace_client )
408- target_base_url = _resolve_base_url (workspace_client , base_url , use_ai_gateway , http_client )
449+ target_base_url = _resolve_base_url (
450+ workspace_client , base_url , use_ai_gateway , http_client , use_ai_gateway_native_api
451+ )
409452
410453 # Authentication is handled via http_client, not api_key
411454 super ().__init__ (
@@ -510,8 +553,12 @@ class AsyncDatabricksOpenAI(AsyncOpenAI):
510553 base_url: Optional base URL to override the default serving endpoints URL. When the URL
511554 points to a Databricks App (contains "databricksapps"), OAuth authentication is
512555 required.
556+ use_ai_gateway_native_api: If True, auto-detect AI Gateway V2 and route requests through
557+ its native OpenAI-compatible API (``<ai_gateway_url>/openai/v1``). This allows use of
558+ provider-native features not available through the MLflow API. Cannot be combined
559+ with ``base_url`` or ``use_ai_gateway``. Defaults to False.
513560 use_ai_gateway: If True, auto-detect AI Gateway V2 availability and route
514- requests through it. Defaults to False.
561+ requests through it using the MLflow API . Defaults to False.
515562
516563 Example - Query a serving or AI gateway endpoint:
517564 >>> client = AsyncDatabricksOpenAI()
@@ -520,6 +567,13 @@ class AsyncDatabricksOpenAI(AsyncOpenAI):
520567 ... messages=[{"role": "user", "content": "Hello!"}],
521568 ... )
522569
570+ Example - Query AI Gateway endpoints via the native OpenAI-compatible API:
571+ >>> client = AsyncDatabricksOpenAI(use_ai_gateway_native_api=True)
572+ >>> response = await client.chat.completions.create(
573+ ... model="databricks-meta-llama-3-1-70b-instruct",
574+ ... messages=[{"role": "user", "content": "Hello!"}],
575+ ... )
576+
523577 Example - Query a Databricks App directly by URL (requires OAuth):
524578 >>> # WorkspaceClient must be configured with OAuth authentication
525579 >>> # See: https://docs.databricks.com/aws/en/dev-tools/auth/oauth-u2m.html
@@ -545,6 +599,7 @@ def __init__(
545599 self ,
546600 workspace_client : WorkspaceClient | None = None ,
547601 base_url : str | None = None ,
602+ use_ai_gateway_native_api : bool = False ,
548603 use_ai_gateway : bool = False ,
549604 ):
550605 if workspace_client is None :
@@ -554,7 +609,7 @@ def __init__(
554609
555610 sync_http_client = _get_authorized_http_client (workspace_client )
556611 target_base_url = _resolve_base_url (
557- workspace_client , base_url , use_ai_gateway , sync_http_client
612+ workspace_client , base_url , use_ai_gateway , sync_http_client , use_ai_gateway_native_api
558613 )
559614
560615 # Authentication is handled via http_client, not api_key
0 commit comments