58 lines
2.5 KiB
Python
58 lines
2.5 KiB
Python
|
|
"""Contains the noise perturb augmentation model."""
|
|||
|
|
import os
|
|||
|
|
import random
|
|||
|
|
|
|||
|
|
import numpy as np
|
|||
|
|
|
|||
|
|
from mvector.data_utils.augmentor.base import AugmentorBase
|
|||
|
|
from mvector.data_utils.audio import AudioSegment
|
|||
|
|
|
|||
|
|
|
|||
|
|
class NoisePerturbAugmentor(AugmentorBase):
|
|||
|
|
"""用于添加背景噪声的增强模型
|
|||
|
|
|
|||
|
|
:param min_snr_dB: Minimal signal noise ratio, in decibels.
|
|||
|
|
:type min_snr_dB: float
|
|||
|
|
:param max_snr_dB: Maximal signal noise ratio, in decibels.
|
|||
|
|
:type max_snr_dB: float
|
|||
|
|
:param repetition: repetition noise sum
|
|||
|
|
:type repetition: int
|
|||
|
|
:param noise_dir: noise audio file dir.
|
|||
|
|
:type noise_dir: str
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, min_snr_dB, max_snr_dB, repetition, noise_dir):
|
|||
|
|
self._min_snr_dB = min_snr_dB
|
|||
|
|
self._max_snr_dB = max_snr_dB
|
|||
|
|
self.repetition = repetition
|
|||
|
|
self.noises_path = []
|
|||
|
|
if os.path.exists(noise_dir):
|
|||
|
|
for file in os.listdir(noise_dir):
|
|||
|
|
self.noises_path.append(os.path.join(noise_dir, file))
|
|||
|
|
|
|||
|
|
def transform_audio(self, audio_segment: AudioSegment):
|
|||
|
|
"""Add background noise audio.
|
|||
|
|
|
|||
|
|
Note that this is an in-place transformation.
|
|||
|
|
|
|||
|
|
:param audio_segment: Audio segment to add effects to.
|
|||
|
|
:type audio_segment: AudioSegmenet
|
|||
|
|
"""
|
|||
|
|
if len(self.noises_path) > 0:
|
|||
|
|
for _ in range(random.randint(1, self.repetition)):
|
|||
|
|
# 随机选择一个noises_path中的一个
|
|||
|
|
noise_path = random.sample(self.noises_path, 1)[0]
|
|||
|
|
# 读取噪声音频
|
|||
|
|
noise_segment = AudioSegment.from_file(noise_path)
|
|||
|
|
# 如果噪声采样率不等于audio_segment的采样率,则重采样
|
|||
|
|
if noise_segment.sample_rate != audio_segment.sample_rate:
|
|||
|
|
noise_segment.resample(audio_segment.sample_rate)
|
|||
|
|
# 随机生成snr_dB的值
|
|||
|
|
snr_dB = random.uniform(self._min_snr_dB, self._max_snr_dB)
|
|||
|
|
# 如果噪声的长度小于audio_segment的长度,则将噪声的前面的部分填充噪声末尾补长
|
|||
|
|
if noise_segment.duration < audio_segment.duration:
|
|||
|
|
diff_duration = audio_segment.num_samples - noise_segment.num_samples
|
|||
|
|
noise_segment._samples = np.pad(noise_segment.samples, (0, diff_duration), 'wrap')
|
|||
|
|
# 将噪声添加到audio_segment中,并将snr_dB调整到最小值和最大值之间
|
|||
|
|
audio_segment.add_noise(noise_segment, snr_dB)
|