58 lines
2.5 KiB
Python
58 lines
2.5 KiB
Python
"""Contains the noise perturb augmentation model."""
|
||
import os
|
||
import random
|
||
|
||
import numpy as np
|
||
|
||
from mvector.data_utils.augmentor.base import AugmentorBase
|
||
from mvector.data_utils.audio import AudioSegment
|
||
|
||
|
||
class NoisePerturbAugmentor(AugmentorBase):
|
||
"""用于添加背景噪声的增强模型
|
||
|
||
:param min_snr_dB: Minimal signal noise ratio, in decibels.
|
||
:type min_snr_dB: float
|
||
:param max_snr_dB: Maximal signal noise ratio, in decibels.
|
||
:type max_snr_dB: float
|
||
:param repetition: repetition noise sum
|
||
:type repetition: int
|
||
:param noise_dir: noise audio file dir.
|
||
:type noise_dir: str
|
||
"""
|
||
|
||
def __init__(self, min_snr_dB, max_snr_dB, repetition, noise_dir):
|
||
self._min_snr_dB = min_snr_dB
|
||
self._max_snr_dB = max_snr_dB
|
||
self.repetition = repetition
|
||
self.noises_path = []
|
||
if os.path.exists(noise_dir):
|
||
for file in os.listdir(noise_dir):
|
||
self.noises_path.append(os.path.join(noise_dir, file))
|
||
|
||
def transform_audio(self, audio_segment: AudioSegment):
|
||
"""Add background noise audio.
|
||
|
||
Note that this is an in-place transformation.
|
||
|
||
:param audio_segment: Audio segment to add effects to.
|
||
:type audio_segment: AudioSegmenet
|
||
"""
|
||
if len(self.noises_path) > 0:
|
||
for _ in range(random.randint(1, self.repetition)):
|
||
# 随机选择一个noises_path中的一个
|
||
noise_path = random.sample(self.noises_path, 1)[0]
|
||
# 读取噪声音频
|
||
noise_segment = AudioSegment.from_file(noise_path)
|
||
# 如果噪声采样率不等于audio_segment的采样率,则重采样
|
||
if noise_segment.sample_rate != audio_segment.sample_rate:
|
||
noise_segment.resample(audio_segment.sample_rate)
|
||
# 随机生成snr_dB的值
|
||
snr_dB = random.uniform(self._min_snr_dB, self._max_snr_dB)
|
||
# 如果噪声的长度小于audio_segment的长度,则将噪声的前面的部分填充噪声末尾补长
|
||
if noise_segment.duration < audio_segment.duration:
|
||
diff_duration = audio_segment.num_samples - noise_segment.num_samples
|
||
noise_segment._samples = np.pad(noise_segment.samples, (0, diff_duration), 'wrap')
|
||
# 将噪声添加到audio_segment中,并将snr_dB调整到最小值和最大值之间
|
||
audio_segment.add_noise(noise_segment, snr_dB)
|