sci/test_tts.py

import torch
from TTS.api import TTS
from datetime import datetime
# Get device
from TTS.tts.utils.speakers import SpeakerManager
device = "cuda" if torch.cuda.is_available() else "cpu"


# Init TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)


exit()


text = """Hi there, thanks for having me! My interest in electric cars really started back when I was a teenager. I remember learning about the history of EVs and how they've been around since the late 1800s, even before gasoline cars took over. The fact that these vehicles could run on electricity instead of fossil fuels just fascinated me.

Then, in the 90s, General Motors introduced the EV1 - it was a real game-changer. It showed that electric cars could be practical and enjoyable to drive. And when Tesla came along with their Roadster in 2007, proving that EVs could have a long range, I was hooked.

But what really sealed my interest was learning about the environmental impact of EVs. They produce zero tailpipe emissions, which means they can help reduce air pollution and greenhouse gas emissions. That's something I'm really passionate about.
"""
text_se = """Antalet bilar ger dock bara en del av bilden. För att förstå bilberoendet bör vi framför allt titta på hur mycket bilarna faktiskt används.
Stockholmarnas genomsnittliga körsträcka med bil har minskat sedan millennieskiftet. Den är dock lägre i Göteborg och i Malmö.
I procent har bilanvändningen sedan år 2000 minskat lika mycket i Stockholm och Malmö, 9 procent. I Göteborg är minskningen 13 procent, i riket är minskningen 7 procent."""
# Run TTS
# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language
# Text to speech list of amplitude values as output
#wav = tts.tts(text=text, speaker_wav="my/cloning/audio.wav", language="en")
# Text to speech to a file
time_now = datetime.now().strftime("%Y%m%d%H%M%S")
output_path = f"output/tts_{time_now}.wav"
tts.tts_to_file(text=text, speaker_wav='voices/test/test_en.wav', language="en", file_path=output_path)


# api = TTS("tts_models/se/fairseq/vits")

# api.tts_with_vc_to_file(
#     text_se,
#     speaker_wav="test_audio_se.wav",
#     file_path="output_se.wav"
# )