32 lines
1.5 KiB
Python
32 lines
1.5 KiB
Python
import os
|
|
from openai import OpenAI
|
|
|
|
def text_to_speech_api(text_to_synthesize: str):
|
|
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
|
|
|
|
response = client.audio.speech.create(
|
|
model="gpt-4o-mini-tts",
|
|
voice="alloy", # Other voices: echo, fable, onyx, nova, shimmer
|
|
input=text_to_synthesize,
|
|
response_format="mp3" # Other formats: opus, aac, flac. MP3 is widely supported.
|
|
# OpenAI default sample rate for tts-1 is 24kHz.
|
|
)
|
|
# The response.content is already bytes (the audio data)
|
|
# Alternatively, for streaming and saving to file: response.stream_to_file("output.mp3")
|
|
audio_data_bytes = response.content
|
|
sample_rate = 24000 # OpenAI TTS model tts-1 outputs 24kHz
|
|
return audio_data_bytes, sample_rate
|
|
|
|
if __name__ == "__main__":
|
|
print("Testing Text-to-Speech API...")
|
|
# The OpenAI client will raise an error if API key is not found or invalid.
|
|
# No explicit check here to keep it minimal.
|
|
text = "Hello from PocketFlow! This is a test of the text-to-speech functionality."
|
|
audio_bytes, rate = text_to_speech_api(text)
|
|
if audio_bytes and rate:
|
|
print(f"Successfully converted text to speech. Audio data length: {len(audio_bytes)} bytes, Sample rate: {rate} Hz.")
|
|
with open('tts_output.mp3', 'wb') as f:
|
|
f.write(audio_bytes)
|
|
print("Saved TTS output to tts_output.mp3")
|
|
else:
|
|
print("Failed to convert text to speech (API returned empty data).") |