You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
33 lines
1.1 KiB
33 lines
1.1 KiB
from TTS.tts.configs.tortoise_config import TortoiseConfig |
|
from TTS.tts.models.tortoise import Tortoise |
|
import torch |
|
import os |
|
import torchaudio |
|
|
|
# Initialize Tortoise model |
|
config = TortoiseConfig() |
|
model = Tortoise.init_from_config(config) |
|
model.load_checkpoint(config, checkpoint_dir="tts_models/en/multi-dataset/tortoise-v2", eval=True) |
|
|
|
# Move model to GPU if available |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
print(device) |
|
model.to(device) |
|
|
|
# Define the text and voice directory |
|
text = "There is, therefore, an increasing need to understand BEVs from a systems perspective." |
|
voice_dir = "voices" |
|
speaker = "test" |
|
|
|
# Load voice samples |
|
voice_samples = [] |
|
for file_name in os.listdir(os.path.join(voice_dir, speaker)): |
|
file_path = os.path.join(voice_dir, speaker, file_name) |
|
waveform, sample_rate = torchaudio.load(file_path) |
|
voice_samples.append(waveform) |
|
|
|
# Get conditioning latents |
|
conditioning_latents = model.get_conditioning_latents(voice_samples) |
|
|
|
# Save conditioning latents to a file |
|
torch.save(conditioning_latents, "conditioning_latents.pth") |