Skip to content

Commit 5c891d1

Browse files
TaoChenOSUmoonbox3
andauthored
Python: Add support to new openai text to image model (#13651)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> Dall-e-3 and Dall-e-2 have been deprecated. The newer gpt-1-image model returns base64 strings. ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> Return url if set, otherwise return the base64 string. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄 --------- Co-authored-by: Evan Mattson <evan.mattson@microsoft.com>
1 parent c250b37 commit 5c891d1

File tree

4 files changed

+117
-13
lines changed

4 files changed

+117
-13
lines changed

python/samples/concepts/images/image_generation.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# Copyright (c) Microsoft. All rights reserved.
22

33
import asyncio
4-
from urllib.request import urlopen
4+
import base64
5+
from io import BytesIO
56

67
from semantic_kernel.prompt_template import PromptTemplateConfig
78

@@ -20,16 +21,14 @@
2021

2122
async def main():
2223
kernel = Kernel()
23-
dalle3 = OpenAITextToImage()
24-
kernel.add_service(dalle3)
24+
service = OpenAITextToImage()
25+
kernel.add_service(service)
2526
kernel.add_service(OpenAIChatCompletion(service_id="default"))
2627

27-
image = await dalle3.generate_image(
28-
description="a painting of a flower vase", width=1024, height=1024, quality="hd", style="vivid"
29-
)
30-
print(image)
28+
image_b64 = await service.generate_image(description="a painting of a flower vase", width=1024, height=1024)
29+
3130
if pil_available:
32-
img = Image.open(urlopen(image)) # nosec
31+
img = Image.open(BytesIO(base64.b64decode(image_b64)))
3332
img.show()
3433

3534
result = await kernel.invoke_prompt(
@@ -42,7 +41,7 @@ async def main():
4241
role="user",
4342
items=[
4443
TextContent(text="What is in this image?"),
45-
ImageContent(uri=image),
44+
ImageContent(data=image_b64, data_format="base64", mime_type="image/png"),
4645
],
4746
)
4847
]

python/semantic_kernel/connectors/ai/open_ai/prompt_execution_settings/open_ai_text_to_image_execution_settings.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ class OpenAITextToImageExecutionSettings(PromptExecutionSettings):
3838
prompt: str | None = None
3939
ai_model_id: str | None = Field(default=None, serialization_alias="model")
4040
size: ImageSize | None = None
41-
quality: str | None = None
42-
style: str | None = None
41+
quality: Literal["high", "medium", "low"] | None = None
4342
output_compression: int | None = None
4443
background: Literal["transparent", "opaque", "auto"] | None = None
4544
n: int | None = Field(default=1, ge=1, le=10)

python/semantic_kernel/connectors/ai/open_ai/services/open_ai_text_to_image_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,10 @@ async def generate_image(
6868
response = await self._send_request(settings)
6969

7070
assert isinstance(response, ImagesResponse) # nosec
71-
if not response.data or not response.data[0].url:
71+
if not response.data or not (response.data[0].url or response.data[0].b64_json):
7272
raise ServiceResponseException("Failed to generate image.")
7373

74-
return response.data[0].url
74+
return response.data[0].url or response.data[0].b64_json # type: ignore[return-value]
7575

7676
async def generate_images(
7777
self,

python/tests/unit/connectors/ai/open_ai/services/test_openai_text_to_image.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,109 @@ async def test_edit_image_invalid_n_parameter():
267267
OpenAITextToImageExecutionSettings(n=0)
268268
with pytest.raises(pydantic.ValidationError):
269269
OpenAITextToImageExecutionSettings(n=11)
270+
271+
272+
@pytest.mark.asyncio
273+
async def test_generate_images_empty_prompt(openai_unit_test_env):
274+
"""Test that empty prompt raises ServiceInvalidRequestError."""
275+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
276+
with pytest.raises(ServiceInvalidRequestError):
277+
await service.generate_images("")
278+
279+
280+
@patch.object(OpenAITextToImageBase, "_send_request", new_callable=AsyncMock)
281+
async def test_generate_images_no_result(mock_generate, openai_unit_test_env):
282+
"""Test that empty response data raises ServiceResponseException."""
283+
mock_generate.return_value = ImagesResponse(created=0, data=[], usage=None)
284+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
285+
with pytest.raises(ServiceResponseException):
286+
await service.generate_images("prompt")
287+
288+
289+
@patch.object(OpenAITextToImageBase, "_send_request", new_callable=AsyncMock)
290+
async def test_generate_images_b64_json_response(mock_generate, openai_unit_test_env):
291+
"""Test that generate_images returns b64_json when url is not present."""
292+
mock_generate.return_value = ImagesResponse(created=1, data=[Image(b64_json="base64encodeddata")], usage=None)
293+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
294+
result = await service.generate_images("prompt")
295+
assert result == ["base64encodeddata"]
296+
297+
298+
@patch.object(OpenAITextToImageBase, "_send_request", new_callable=AsyncMock)
299+
async def test_generate_images_mixed_url_and_b64_response(mock_generate, openai_unit_test_env):
300+
"""Test that generate_images handles mixed url and b64_json responses."""
301+
mock_generate.return_value = ImagesResponse(
302+
created=2,
303+
data=[Image(url="http://example.com/img1.png"), Image(b64_json="base64data")],
304+
usage=None,
305+
)
306+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
307+
result = await service.generate_images("prompt")
308+
assert result == ["http://example.com/img1.png", "base64data"]
309+
310+
311+
@patch.object(OpenAITextToImageBase, "_send_request", new_callable=AsyncMock)
312+
async def test_generate_images_with_default_settings(mock_generate, openai_unit_test_env):
313+
"""Test that generate_images works when no settings are provided."""
314+
mock_generate.return_value = ImagesResponse(created=1, data=[Image(url="url")], usage=None)
315+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
316+
result = await service.generate_images("a beautiful sunset")
317+
assert result == ["url"]
318+
mock_generate.assert_awaited_once()
319+
320+
321+
@patch.object(OpenAITextToImageBase, "_send_request", new_callable=AsyncMock)
322+
async def test_generate_images_no_valid_image_data(mock_generate, openai_unit_test_env):
323+
"""Test that generate_images raises error when images have neither url nor b64_json."""
324+
mock_generate.return_value = ImagesResponse(created=1, data=[Image()], usage=None)
325+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
326+
with pytest.raises(ServiceResponseException, match="No valid image data found"):
327+
await service.generate_images("prompt")
328+
329+
330+
@pytest.mark.asyncio
331+
async def test_edit_image_neither_path_nor_file(openai_unit_test_env):
332+
"""Test that providing neither image_paths nor image_files raises ServiceInvalidRequestError."""
333+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
334+
with pytest.raises(ServiceInvalidRequestError):
335+
await service.edit_image(prompt="edit this")
336+
337+
338+
@patch.object(OpenAITextToImageBase, "_send_image_edit_request", new_callable=AsyncMock)
339+
async def test_edit_image_b64_json_response(mock_edit, openai_unit_test_env):
340+
"""Test editing an image returns b64_json when url is not present."""
341+
mock_edit.return_value = ImagesResponse(created=1, data=[Image(b64_json="edited_b64")], usage=None)
342+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
343+
result = await service.edit_image(
344+
prompt="edit this image",
345+
image_paths=[sample_img],
346+
)
347+
assert result == ["edited_b64"]
348+
349+
350+
@patch.object(OpenAITextToImageBase, "_send_image_edit_request", new_callable=AsyncMock)
351+
async def test_edit_image_mixed_response(mock_edit, openai_unit_test_env):
352+
"""Test editing images handles mixed b64_json and url responses."""
353+
mock_edit.return_value = ImagesResponse(
354+
created=2,
355+
data=[Image(b64_json="b64data"), Image(url="http://example.com/edited.png")],
356+
usage=None,
357+
)
358+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
359+
result = await service.edit_image(
360+
prompt="edit these images",
361+
image_paths=[sample_img],
362+
)
363+
assert result == ["b64data", "http://example.com/edited.png"]
364+
365+
366+
@patch.object(OpenAITextToImageBase, "_send_image_edit_request", new_callable=AsyncMock)
367+
async def test_edit_image_response_no_data_attribute(mock_edit, openai_unit_test_env):
368+
"""Test that edit_image raises error when response has no valid data."""
369+
mock_edit.return_value = ImagesResponse(created=1, data=None, usage=None)
370+
service = OpenAITextToImage(ai_model_id=openai_unit_test_env["OPENAI_TEXT_TO_IMAGE_MODEL_ID"])
371+
with pytest.raises(ServiceResponseException):
372+
await service.edit_image(
373+
prompt="edit",
374+
image_paths=[sample_img],
375+
)

0 commit comments

Comments
 (0)