2828_VOICE_ENDPOINT = '/v1/ai/voice/{id}'
2929_CREATE_VOICE_ENDPOINT = '/v1/ai/voice'
3030_SPEECH_ENDPOINT = '/v1/ai/speech'
31+ _CONVERT_ENDPOINT = '/v1/ai/speech/convert'
3132_ACCOUNT_ENDPOINT = '/v1/account'
3233
3334
@@ -158,6 +159,7 @@ async def list_voices(self, starred: bool = False, owner: str = 'all'):
158159 if not isinstance (starred , bool ):
159160 raise ValueError (f'Invalid starred: { starred } ' )
160161 self ._lazy_init ()
162+ assert self ._session is not None , 'Session was not initialized'
161163 url = f'{ self ._base_url } { _LIST_VOICES_ENDPOINT } ?starred={ starred } &owner={ owner } '
162164
163165 async with self ._session .get (url , headers = self ._build_headers ()) as resp :
@@ -174,13 +176,14 @@ async def voice_info(self, voice_id: str):
174176 Returns a dictionary containing details of the voice.
175177 """
176178 self ._lazy_init ()
179+ assert self ._session is not None , 'Session was not initialized'
177180 url = f'{ self ._base_url } { _VOICE_ENDPOINT } ' .format (id = voice_id )
178181
179182 async with self ._session .get (url , headers = self ._build_headers ()) as resp :
180183 await self ._handle_response_errors (resp , 'Speech.voice_info' )
181184 return await resp .json ()
182185
183- async def create_voice (self , name : str , enhance : bool , filenames : List [str ], type : str = 'instant' , gender : str = None , description : str = None ):
186+ async def create_voice (self , name : str , enhance : bool , filenames : List [str ], type : str = 'instant' , gender : Optional [ str ] = None , description : Optional [ str ] = None ):
184187 """
185188 Creates a new voice from a set of audio files. Returns the voice metadata object.
186189
@@ -217,6 +220,7 @@ async def create_voice(self, name: str, enhance: bool, filenames: List[str], typ
217220 raise ValueError ('[Speech.create_voice] Enhance must not be None.' )
218221
219222 self ._lazy_init ()
223+ assert self ._session is not None , 'Session was not initialized'
220224
221225 metadata = json .dumps ({
222226 'name' : name ,
@@ -256,6 +260,7 @@ async def update_voice(self, voice_id: str, **kwargs):
256260 - `description` (str): A description of the voice.
257261 """
258262 self ._lazy_init ()
263+ assert self ._session is not None , 'Session was not initialized'
259264 url = f'{ self ._base_url } { _VOICE_ENDPOINT } ' .format (id = voice_id )
260265
261266 data = {
@@ -298,6 +303,7 @@ async def delete_voice(self, voice_id: str):
298303 - `voice_id` (str): The id of the voice to delete. If you don't know the id, you can get it from `list_voices()`.
299304 """
300305 self ._lazy_init ()
306+ assert self ._session is not None , 'Session was not initialized'
301307 url = f'{ self ._base_url } { _VOICE_ENDPOINT } ' .format (id = voice_id )
302308
303309 async with self ._session .delete (url , headers = self ._build_headers ()) as resp :
@@ -343,6 +349,7 @@ async def synthesize(self, text: str, voice: str, **kwargs):
343349 assert len (voice ) > 0 , '[Speech.synthesize] `voice` must be non-empty.'
344350
345351 self ._lazy_init ()
352+ assert self ._session is not None , 'Session was not initialized'
346353 url = f'{ self ._base_url } { _SPEECH_ENDPOINT } '
347354
348355 model = kwargs .get ('model' , 'aurora' )
@@ -384,13 +391,53 @@ async def synthesize(self, text: str, voice: str, **kwargs):
384391 synthesis_result ['seed' ] = response_data ['seed' ]
385392 return synthesis_result
386393
394+ async def convert (self , audio : bytes , voice : str , ** kwargs ) -> bytes :
395+ """
396+ Converts speech from one voice to another.
397+
398+ Required parameters:
399+ - `audio` (bytes): The audio file to be converted into a new voice. Max file size: 1 MB.
400+ - `voice` (str): The voice id to convert the speech into. Voice ids can be retrieved from `list_voices()` or `voice_info()`.
401+
402+ Optional parameters:
403+ - `format` (str): The audio format to use for conversion. Defaults to `mp3`.
404+ - `sample_rate` (int): 8000, 16000, or 24000 - the desired output sample rate. Defaults to 24000.
405+ - `language` (str): The language of the source audio. Two letter ISO 639-1 code. Defaults to `en`.
406+
407+ Returns:
408+ - bytes: The binary audio data of the converted speech.
409+ """
410+ assert audio is not None , '[Speech.convert] `audio` must not be None.'
411+ assert len (audio ) > 0 , '[Speech.convert] `audio` must be non-empty.'
412+ assert voice is not None , '[Speech.convert] `voice` must not be None.'
413+ assert len (voice ) > 0 , '[Speech.convert] `voice` must be non-empty.'
414+
415+ self ._lazy_init ()
416+ assert self ._session is not None , 'Session was not initialized'
417+ url = f'{ self ._base_url } { _CONVERT_ENDPOINT } '
418+
419+ form_data = aiohttp .FormData ()
420+ form_data .add_field ('audio' , audio )
421+ form_data .add_field ('voice' , voice )
422+
423+ if 'format' in kwargs :
424+ form_data .add_field ('format' , kwargs ['format' ])
425+ if 'sample_rate' in kwargs :
426+ form_data .add_field ('sample_rate' , kwargs ['sample_rate' ])
427+ if 'language' in kwargs :
428+ form_data .add_field ('language' , kwargs ['language' ])
429+
430+ async with self ._session .post (url , data = form_data , headers = self ._build_headers ()) as resp :
431+ await self ._handle_response_errors (resp , 'Speech.convert' )
432+ return await resp .read ()
433+
387434 async def synthesize_streaming (self , voice : str , return_extras : bool = False , ** kwargs ):
388435 """
389436 Initiates a full-duplex streaming connection with the server that allows you to send text and receive audio in real-time.
390437
391438 Parameters:
392- - `format` (str): `mp3`, `raw`, or `ulaw` – the desired output format. Defaults to `mp3`.
393- - `sample_rate` (int): 8000, 16000, or 24000 – the desired output sample rate. Defaults to 24000.
439+ - `format` (str): `mp3`, `raw`, or `ulaw` - the desired output format. Defaults to `mp3`.
440+ - `sample_rate` (int): 8000, 16000, or 24000 - the desired output sample rate. Defaults to 24000.
394441 - `voice` (str): The voice id to use for this connection.
395442 - `speed` (float): The speed to use for synthesis. Defaults to 1.0.
396443 - `return_extras` (bool): If `True`, the response will include word durations detail. Defaults to `False`.
@@ -404,6 +451,7 @@ async def synthesize_streaming(self, voice: str, return_extras: bool = False, **
404451 raise ValueError ('[Speech.synthesize_streaming] `voice` must not be None.' )
405452
406453 self ._lazy_init ()
454+ assert self ._session is not None , 'Session was not initialized'
407455
408456 init_msg = {
409457 'X-API-Key' : self ._api_key ,
@@ -431,6 +479,7 @@ async def account_info(self):
431479 Returns details about your account.
432480 """
433481 self ._lazy_init ()
482+ assert self ._session is not None , 'Session was not initialized'
434483 url = f'{ self ._base_url } { _ACCOUNT_ENDPOINT } '
435484
436485 async with self ._session .get (url , headers = self ._build_headers ()) as resp :
@@ -441,7 +490,7 @@ def _lazy_init(self):
441490 if self ._session is None :
442491 self ._session = aiohttp .ClientSession (connector = self ._connector )
443492
444- def _build_headers (self , type : str = None ):
493+ def _build_headers (self , type : Optional [ str ] = None ):
445494 headers = {'X-API-Key' : self ._api_key }
446495 if type is not None :
447496 headers ['Content-Type' ] = type
0 commit comments