55 lines
1.3 KiB
YAML
55 lines
1.3 KiB
YAML
|
|
# 数据集参数
|
|||
|
|
dataset_conf:
|
|||
|
|
# 训练的批量大小
|
|||
|
|
batch_size: 256
|
|||
|
|
# 说话人数量,即分类大小
|
|||
|
|
num_speakers: 3242
|
|||
|
|
# 读取数据的线程数量
|
|||
|
|
num_workers: 12
|
|||
|
|
# 过滤最短的音频长度
|
|||
|
|
min_duration: 0.5
|
|||
|
|
# 最长的音频长度,大于这个长度会裁剪掉
|
|||
|
|
max_duration: 6
|
|||
|
|
# 是否裁剪静音片段
|
|||
|
|
do_vad: False
|
|||
|
|
# 音频的采样率
|
|||
|
|
sample_rate: 16000
|
|||
|
|
# 是否对音频进行音量归一化
|
|||
|
|
use_dB_normalization: False
|
|||
|
|
# 对音频进行音量归一化的音量分贝值
|
|||
|
|
target_dB: -20
|
|||
|
|
# 训练数据的数据列表路径
|
|||
|
|
train_list: 'dataset/train_list.txt'
|
|||
|
|
# 测试数据的数据列表路径
|
|||
|
|
test_list: 'dataset/test_list.txt'
|
|||
|
|
# 标签列表
|
|||
|
|
label_list_path: 'dataset/label_list.txt'
|
|||
|
|
|
|||
|
|
# 数据预处理参数
|
|||
|
|
preprocess_conf:
|
|||
|
|
# 音频预处理方法,支持:MelSpectrogram、Spectrogram、MFCC、Fbank
|
|||
|
|
feature_method: 'Fbank'
|
|||
|
|
|
|||
|
|
feature_conf:
|
|||
|
|
sample_frequency: 16000
|
|||
|
|
num_mel_bins: 80
|
|||
|
|
|
|||
|
|
optimizer_conf:
|
|||
|
|
# 优化方法,支持Adam、AdamW、SGD
|
|||
|
|
optimizer: 'Adam'
|
|||
|
|
# 初始学习率的大小
|
|||
|
|
learning_rate: 0.001
|
|||
|
|
weight_decay: 1e-6
|
|||
|
|
|
|||
|
|
model_conf:
|
|||
|
|
embd_dim: 192
|
|||
|
|
channels: 512
|
|||
|
|
|
|||
|
|
train_conf:
|
|||
|
|
# 训练的轮数
|
|||
|
|
max_epoch: 30
|
|||
|
|
log_interval: 100
|
|||
|
|
|
|||
|
|
# 所使用的模型
|
|||
|
|
use_model: 'ecapa_tdnn'
|