Skip to content

Commit cc96f44

Browse files
feat(api): custom voices
1 parent 6d26c2e commit cc96f44

16 files changed

Lines changed: 372 additions & 77 deletions

.stats.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
configured_endpoints: 151
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-3e207c26eea3b15837c78ef2fe0e1c68937708fd0763971ce749c0bdb7db6376.yml
3-
openapi_spec_hash: 626982004d5a594a822fa7883422efb4
4-
config_hash: 0dda4b3af379312c9c55467a5e1e1ec0
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-cb3e4451108eed58d59cff25bf77ec0dc960ec9c6f3dba68f90e7a9847c09d21.yml
3+
openapi_spec_hash: dec6d9be64a5ba8f474a1f2a7a4fafef
4+
config_hash: e922f01e25accd07d8fd3641c37fbd62

lib/openai/models/audio/speech_create_params.rb

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@ class SpeechCreateParams < OpenAI::Internal::Type::BaseModel
2424
# @!attribute voice
2525
# The voice to use when generating the audio. Supported built-in voices are
2626
# `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`,
27-
# `shimmer`, `verse`, `marin`, and `cedar`. Previews of the voices are available
28-
# in the
27+
# `shimmer`, `verse`, `marin`, and `cedar`. You may also provide a custom voice
28+
# object with an `id`, for example `{ "id": "voice_1234" }`. Previews of the
29+
# voices are available in the
2930
# [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
3031
#
31-
# @return [String, Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice]
32+
# @return [String, Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice::ID, OpenAI::Models::Audio::SpeechCreateParams::Voice]
3233
required :voice, union: -> { OpenAI::Audio::SpeechCreateParams::Voice }
3334

3435
# @!attribute instructions
@@ -67,7 +68,7 @@ class SpeechCreateParams < OpenAI::Internal::Type::BaseModel
6768
#
6869
# @param model [String, Symbol, OpenAI::Models::Audio::SpeechModel] One of the available [TTS models](https://platform.openai.com/docs/models#tts):
6970
#
70-
# @param voice [String, Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice] The voice to use when generating the audio. Supported built-in voices are `alloy
71+
# @param voice [String, Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice::ID, OpenAI::Models::Audio::SpeechCreateParams::Voice] The voice to use when generating the audio. Supported built-in voices are `alloy
7172
#
7273
# @param instructions [String] Control the voice of your generated audio with additional instructions. Does not
7374
#
@@ -95,8 +96,9 @@ module Model
9596

9697
# The voice to use when generating the audio. Supported built-in voices are
9798
# `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`,
98-
# `shimmer`, `verse`, `marin`, and `cedar`. Previews of the voices are available
99-
# in the
99+
# `shimmer`, `verse`, `marin`, and `cedar`. You may also provide a custom voice
100+
# object with an `id`, for example `{ "id": "voice_1234" }`. Previews of the
101+
# voices are available in the
100102
# [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
101103
module Voice
102104
extend OpenAI::Internal::Type::Union
@@ -123,11 +125,33 @@ module Voice
123125

124126
variant const: -> { OpenAI::Models::Audio::SpeechCreateParams::Voice::CEDAR }
125127

128+
# Custom voice reference.
129+
variant -> { OpenAI::Audio::SpeechCreateParams::Voice::ID }
130+
131+
class ID < OpenAI::Internal::Type::BaseModel
132+
# @!attribute id
133+
# The custom voice ID, e.g. `voice_1234`.
134+
#
135+
# @return [String]
136+
required :id, String
137+
138+
# @!method initialize(id:)
139+
# Custom voice reference.
140+
#
141+
# @param id [String] The custom voice ID, e.g. `voice_1234`.
142+
end
143+
126144
# @!method self.variants
127-
# @return [Array(String, Symbol)]
145+
# @return [Array(String, Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice::ID)]
128146

129147
define_sorbet_constant!(:Variants) do
130-
T.type_alias { T.any(String, OpenAI::Audio::SpeechCreateParams::Voice::TaggedSymbol) }
148+
T.type_alias do
149+
T.any(
150+
String,
151+
OpenAI::Audio::SpeechCreateParams::Voice::TaggedSymbol,
152+
OpenAI::Audio::SpeechCreateParams::Voice::ID
153+
)
154+
end
131155
end
132156

133157
# @!group

lib/openai/models/chat/chat_completion_audio_param.rb

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,10 @@ class ChatCompletionAudioParam < OpenAI::Internal::Type::BaseModel
1414
# @!attribute voice
1515
# The voice the model uses to respond. Supported built-in voices are `alloy`,
1616
# `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`, `onyx`, `sage`, `shimmer`,
17-
# `marin`, and `cedar`.
17+
# `marin`, and `cedar`. You may also provide a custom voice object with an `id`,
18+
# for example `{ "id": "voice_1234" }`.
1819
#
19-
# @return [String, Symbol, OpenAI::Models::Chat::ChatCompletionAudioParam::Voice]
20+
# @return [String, Symbol, OpenAI::Models::Chat::ChatCompletionAudioParam::Voice::ID, OpenAI::Models::Chat::ChatCompletionAudioParam::Voice]
2021
required :voice, union: -> { OpenAI::Chat::ChatCompletionAudioParam::Voice }
2122

2223
# @!method initialize(format_:, voice:)
@@ -29,7 +30,7 @@ class ChatCompletionAudioParam < OpenAI::Internal::Type::BaseModel
2930
#
3031
# @param format_ [Symbol, OpenAI::Models::Chat::ChatCompletionAudioParam::Format] Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`,
3132
#
32-
# @param voice [String, Symbol, OpenAI::Models::Chat::ChatCompletionAudioParam::Voice] The voice the model uses to respond. Supported built-in voices are `alloy`, `ash
33+
# @param voice [String, Symbol, OpenAI::Models::Chat::ChatCompletionAudioParam::Voice::ID, OpenAI::Models::Chat::ChatCompletionAudioParam::Voice] The voice the model uses to respond. Supported built-in voices are
3334

3435
# Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`,
3536
# or `pcm16`.
@@ -51,7 +52,8 @@ module Format
5152

5253
# The voice the model uses to respond. Supported built-in voices are `alloy`,
5354
# `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`, `onyx`, `sage`, `shimmer`,
54-
# `marin`, and `cedar`.
55+
# `marin`, and `cedar`. You may also provide a custom voice object with an `id`,
56+
# for example `{ "id": "voice_1234" }`.
5557
#
5658
# @see OpenAI::Models::Chat::ChatCompletionAudioParam#voice
5759
module Voice
@@ -79,11 +81,33 @@ module Voice
7981

8082
variant const: -> { OpenAI::Models::Chat::ChatCompletionAudioParam::Voice::CEDAR }
8183

84+
# Custom voice reference.
85+
variant -> { OpenAI::Chat::ChatCompletionAudioParam::Voice::ID }
86+
87+
class ID < OpenAI::Internal::Type::BaseModel
88+
# @!attribute id
89+
# The custom voice ID, e.g. `voice_1234`.
90+
#
91+
# @return [String]
92+
required :id, String
93+
94+
# @!method initialize(id:)
95+
# Custom voice reference.
96+
#
97+
# @param id [String] The custom voice ID, e.g. `voice_1234`.
98+
end
99+
82100
# @!method self.variants
83-
# @return [Array(String, Symbol)]
101+
# @return [Array(String, Symbol, OpenAI::Models::Chat::ChatCompletionAudioParam::Voice::ID)]
84102

85103
define_sorbet_constant!(:Variants) do
86-
T.type_alias { T.any(String, OpenAI::Chat::ChatCompletionAudioParam::Voice::TaggedSymbol) }
104+
T.type_alias do
105+
T.any(
106+
String,
107+
OpenAI::Chat::ChatCompletionAudioParam::Voice::TaggedSymbol,
108+
OpenAI::Chat::ChatCompletionAudioParam::Voice::ID
109+
)
110+
end
87111
end
88112

89113
# @!group

lib/openai/models/realtime/realtime_audio_config_output.rb

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@ class RealtimeAudioConfigOutput < OpenAI::Internal::Type::BaseModel
2525
# @!attribute voice
2626
# The voice the model uses to respond. Supported built-in voices are `alloy`,
2727
# `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and
28-
# `cedar`. Voice cannot be changed during the session once the model has responded
29-
# with audio at least once. We recommend `marin` and `cedar` for best quality.
28+
# `cedar`. You may also provide a custom voice object with an `id`, for example
29+
# `{ "id": "voice_1234" }`. Voice cannot be changed during the session once the
30+
# model has responded with audio at least once. We recommend `marin` and `cedar`
31+
# for best quality.
3032
#
31-
# @return [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice, nil]
33+
# @return [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ID, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice, nil]
3234
optional :voice, union: -> { OpenAI::Realtime::RealtimeAudioConfigOutput::Voice }
3335

3436
# @!method initialize(format_: nil, speed: nil, voice: nil)
@@ -39,12 +41,14 @@ class RealtimeAudioConfigOutput < OpenAI::Internal::Type::BaseModel
3941
#
4042
# @param speed [Float] The speed of the model's spoken response as a multiple of the original speed.
4143
#
42-
# @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice] The voice the model uses to respond. Supported built-in voices are `alloy`, `ash
44+
# @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ID, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice] The voice the model uses to respond. Supported built-in voices are
4345

4446
# The voice the model uses to respond. Supported built-in voices are `alloy`,
4547
# `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and
46-
# `cedar`. Voice cannot be changed during the session once the model has responded
47-
# with audio at least once. We recommend `marin` and `cedar` for best quality.
48+
# `cedar`. You may also provide a custom voice object with an `id`, for example
49+
# `{ "id": "voice_1234" }`. Voice cannot be changed during the session once the
50+
# model has responded with audio at least once. We recommend `marin` and `cedar`
51+
# for best quality.
4852
#
4953
# @see OpenAI::Models::Realtime::RealtimeAudioConfigOutput#voice
5054
module Voice
@@ -72,11 +76,33 @@ module Voice
7276

7377
variant const: -> { OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::CEDAR }
7478

79+
# Custom voice reference.
80+
variant -> { OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::ID }
81+
82+
class ID < OpenAI::Internal::Type::BaseModel
83+
# @!attribute id
84+
# The custom voice ID, e.g. `voice_1234`.
85+
#
86+
# @return [String]
87+
required :id, String
88+
89+
# @!method initialize(id:)
90+
# Custom voice reference.
91+
#
92+
# @param id [String] The custom voice ID, e.g. `voice_1234`.
93+
end
94+
7595
# @!method self.variants
76-
# @return [Array(String, Symbol)]
96+
# @return [Array(String, Symbol, OpenAI::Models::Realtime::RealtimeAudioConfigOutput::Voice::ID)]
7797

7898
define_sorbet_constant!(:Variants) do
79-
T.type_alias { T.any(String, OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol) }
99+
T.type_alias do
100+
T.any(
101+
String,
102+
OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::TaggedSymbol,
103+
OpenAI::Realtime::RealtimeAudioConfigOutput::Voice::ID
104+
)
105+
end
80106
end
81107

82108
# @!group

lib/openai/models/realtime/realtime_response_create_audio_output.rb

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@ class Output < OpenAI::Internal::Type::BaseModel
2525
# @!attribute voice
2626
# The voice the model uses to respond. Supported built-in voices are `alloy`,
2727
# `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and
28-
# `cedar`. Voice cannot be changed during the session once the model has responded
29-
# with audio at least once.
28+
# `cedar`. You may also provide a custom voice object with an `id`, for example
29+
# `{ "id": "voice_1234" }`. Voice cannot be changed during the session once the
30+
# model has responded with audio at least once. We recommend `marin` and `cedar`
31+
# for best quality.
3032
#
31-
# @return [String, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice, nil]
33+
# @return [String, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::ID, OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice, nil]
3234
optional :voice, union: -> { OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice }
3335

3436
# @!method initialize(format_: nil, voice: nil)
@@ -38,12 +40,14 @@ class Output < OpenAI::Internal::Type::BaseModel
3840
#
3941
# @param format_ [OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCM, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMU, OpenAI::Models::Realtime::RealtimeAudioFormats::AudioPCMA] The format of the output audio.
4042
#
41-
# @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice] The voice the model uses to respond. Supported built-in voices are `alloy`, `ash
43+
# @param voice [String, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::ID, OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice] The voice the model uses to respond. Supported built-in voices are
4244

4345
# The voice the model uses to respond. Supported built-in voices are `alloy`,
4446
# `ash`, `ballad`, `coral`, `echo`, `sage`, `shimmer`, `verse`, `marin`, and
45-
# `cedar`. Voice cannot be changed during the session once the model has responded
46-
# with audio at least once.
47+
# `cedar`. You may also provide a custom voice object with an `id`, for example
48+
# `{ "id": "voice_1234" }`. Voice cannot be changed during the session once the
49+
# model has responded with audio at least once. We recommend `marin` and `cedar`
50+
# for best quality.
4751
#
4852
# @see OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output#voice
4953
module Voice
@@ -71,11 +75,33 @@ module Voice
7175

7276
variant const: -> { OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::CEDAR }
7377

78+
# Custom voice reference.
79+
variant -> { OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::ID }
80+
81+
class ID < OpenAI::Internal::Type::BaseModel
82+
# @!attribute id
83+
# The custom voice ID, e.g. `voice_1234`.
84+
#
85+
# @return [String]
86+
required :id, String
87+
88+
# @!method initialize(id:)
89+
# Custom voice reference.
90+
#
91+
# @param id [String] The custom voice ID, e.g. `voice_1234`.
92+
end
93+
7494
# @!method self.variants
75-
# @return [Array(String, Symbol)]
95+
# @return [Array(String, Symbol, OpenAI::Models::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::ID)]
7696

7797
define_sorbet_constant!(:Variants) do
78-
T.type_alias { T.any(String, OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol) }
98+
T.type_alias do
99+
T.any(
100+
String,
101+
OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::TaggedSymbol,
102+
OpenAI::Realtime::RealtimeResponseCreateAudioOutput::Output::Voice::ID
103+
)
104+
end
79105
end
80106

81107
# @!group

lib/openai/resources/audio/speech.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class Speech
1818
#
1919
# @param model [String, Symbol, OpenAI::Models::Audio::SpeechModel] One of the available [TTS models](https://platform.openai.com/docs/models#tts):
2020
#
21-
# @param voice [String, Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice] The voice to use when generating the audio. Supported built-in voices are `alloy
21+
# @param voice [String, Symbol, OpenAI::Models::Audio::SpeechCreateParams::Voice::ID, OpenAI::Models::Audio::SpeechCreateParams::Voice] The voice to use when generating the audio. Supported built-in voices are `alloy
2222
#
2323
# @param instructions [String] Control the voice of your generated audio with additional instructions. Does not
2424
#

0 commit comments

Comments
 (0)