Python 是否有任何方法可以提取音频功能并以比此代码更快的速度保存?
我有一个pytorch代码,用于提取大约60万个音频特征。为了节省时间,我需要尽可能快地提取。我尝试了几种方法,最后,我提出了以下解决方案。目前,我正在使用pytorch dataloader的循环系统 我想知道是否有一种更快的方法来完成这一切。是否有某种方法可以在我的数据加载循环中找到最大值?同样,我不允许使用内置算法进行搜索或排序。任何建议都会有帮助 多谢各位Python 是否有任何方法可以提取音频功能并以比此代码更快的速度保存?,python,algorithm,performance,loops,pytorch,Python,Algorithm,Performance,Loops,Pytorch,我有一个pytorch代码,用于提取大约60万个音频特征。为了节省时间,我需要尽可能快地提取。我尝试了几种方法,最后,我提出了以下解决方案。目前,我正在使用pytorch dataloader的循环系统 我想知道是否有一种更快的方法来完成这一切。是否有某种方法可以在我的数据加载循环中找到最大值?同样,我不允许使用内置算法进行搜索或排序。任何建议都会有帮助 多谢各位 start_time = time.time() dataset = AudioLoader( base_root=BAS
start_time = time.time()
dataset = AudioLoader(
base_root=BASE_ROOT,
date=DATE,
data_type=DATA_TYPE,
feat_type=FEAT_TYPE,
params=PARAMS,
samp_rate=SAMP_RATE
)
loader = DataLoader(
dataset,
batch_size=1,
shuffle=False,
num_workers=4
)
total_len = 0
mean = torch.zeros(FEAT_DIM)
sq_mean = torch.zeros(FEAT_DIM)
std = torch.zeros(FEAT_DIM)
for k, data in enumerate(loader):
id = data['ID'][0]
input_file = data['feat'][0]
output_file = data['output'][0]
if CAL_CMVN == True:
total_len += input_file.shape[0]
mean += torch.sum(input_file,0)
sq_mean += torch.sum(torch.square(input_file), 0)
save_features(input_file, WAV_PATH, output_file, OUTPUT_PATH, id)
if k > 0 and k % 1000 == 0:
print("-----{}s seconds-----".format(time.time() - start_time))
if CAL_CMVN == True:
mean /= total_len
sq_mean /= total_len
std = torch.sqrt(sq_mean - mean*mean)
torch.save(mean, os.path.join('./feat', 'mean.pt'))
torch.save(std, os.path.join('./feat', 'std.pt'))
print("Total consumed time : {}s seconds".format(time.time() - start_time))
“保存功能”用于在路径中保存功能。特征文件是输入文件,输出文件是神经网络训练输入特征的目标。另外,我可以向您展示AudioLoader的详细信息
class AudioLoader(Dataset):
"""Pytorch audio loader."""
def __init__(
self,
base_root=BASE_ROOT,
date="210223",
data_type="train",
feat_type="mfcc",
params={
"channel": -1,
"dither": 0.0,
"window_type": "hamming",
"sample_frequency": 16000
},
samp_rate=16000
):
"""
Args:
"""
wavfile = os.path.join(base_root, date, "data", data_type) + ".scp"
txtfile = os.path.join(base_root, date, "data", data_type) + ".txt"
self.wavlist = open(wavfile, "r", encoding="UTF8").read().splitlines()
self.txtlist = open(txtfile, "r", encoding="UTF8").read().splitlines()
self.feat_type = feat_type
self.samp_rate = samp_rate
self.params = params
def __len__(self):
return len(self.wavlist)
def __getitem__(self, idx):
FEAT=torchaudio.compliance.kaldi.spectrogram
if self.feat_type == "fbank":
FEAT=torchaudio.compliance.kaldi.fbank
elif self.feat_type == "mfcc":
FEAT=torchaudio.compliance.kaldi.mfcc
wav_id = self.wavlist[idx].split()[0]
txt_id = self.txtlist[idx].split()[0]
if wav_id != txt_id:
raise Exception('Both id are unmatched. You shoud sort again.')
wav_path = self.wavlist[idx].split()[-1]
output_txt = self.txtlist[idx].split(' ', 1)[1]
if os.path.isfile(wav_path):
try:
waveform, samp = torchaudio.load(wav_path)
resamp_waveform = torchaudio.compliance.kaldi.resample_waveform(waveform, samp, self.samp_rate)
feature = FEAT(resamp_waveform, **self.params)
except:
print('Extraction failed')
return {'ID': wav_id}
else:
print('Extraction failed')
return {'ID': wav_id}
return {'ID': wav_id, 'feat': feature, 'output': output_txt}