You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

33 lines
1.1 KiB

from TTS.tts.configs.tortoise_config import TortoiseConfig
from TTS.tts.models.tortoise import Tortoise
import torch
import os
import torchaudio
# Initialize Tortoise model
config = TortoiseConfig()
model = Tortoise.init_from_config(config)
model.load_checkpoint(config, checkpoint_dir="tts_models/en/multi-dataset/tortoise-v2", eval=True)
# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)
# Define the text and voice directory
text = "There is, therefore, an increasing need to understand BEVs from a systems perspective."
voice_dir = "voices"
speaker = "test"
# Load voice samples
voice_samples = []
for file_name in os.listdir(os.path.join(voice_dir, speaker)):
file_path = os.path.join(voice_dir, speaker, file_name)
waveform, sample_rate = torchaudio.load(file_path)
voice_samples.append(waveform)
# Get conditioning latents
conditioning_latents = model.get_conditioning_latents(voice_samples)
# Save conditioning latents to a file
torch.save(conditioning_latents, "conditioning_latents.pth")