Python 高斯混合模型给出负值分数

Python 高斯混合模型给出负值分数,python,gmm,Python,Gmm,我尝试用不同的训练词训练多个GMM模型。然后我试着用一个看不见的测试词来测试我的模型,我得到了负值。知道我做错了什么吗 from python_speech_features import mfcc from python_speech_features import delta from sklearn.mixture import GaussianMixture import pandas as pd import scipy.io.wavfile as wav import os, gl

我尝试用不同的训练词训练多个GMM模型。然后我试着用一个看不见的测试词来测试我的模型,我得到了负值。知道我做错了什么吗

from python_speech_features import mfcc
from python_speech_features import delta
from sklearn.mixture import GaussianMixture 
import pandas as pd
import scipy.io.wavfile as wav
import os, glob
import numpy as np
读取所有培训文件

rate = []#reading rates and signals of all Training wav files
sig = []
for filename in glob.glob('Data\Training\*.wav'):
    sr_value, x_value = wav.read(filename)
    rate.append(sr_value)
    sig.append(x_value)
计算每个信号的mfcc

all_mfcc_feat = []
for audio in sig:
    #defaults
    all_mfcc_feat.append(mfcc(signal = audio, samplerate = 16000, winlen = 0.025, winstep = 0.01, nfilt=26, nfft = 512, numcep = 13, preemph = 0.97, ceplifter=22, appendEnergy =False))
delta_oneT = []
double_deltaT = []
for mfcc in all_mfcc_feat:
    delta1 = (delta(mfcc, 2))
    delta_oneT.append(delta1) #calculating delta
    double_deltaT.append(delta(delta1, 2)) #calculating double delta from previous delta

training_feat = []
for i in range (0, len(all_mfcc_feat)): #iterate through signals
    df = pd.DataFrame(data = None, )

    for j in range (0, len(all_mfcc_feat[i])): #iterate through list of mfcc's
        combined = np.concatenate([all_mfcc_feat[i][j],delta_oneT[i][j], double_deltaT[i][j] ])
        df = df.append(pd.Series(combined), ignore_index = True)
    dfnew = df.values
    training_feat.append(dfnew)


(sr_valueX, x_valueX) = wav.read('Data\Testing\wiehedT.wav')

mfcc_test = mfcc(x_valueX, sr_valueX)

delta_oneTest = []
double_deltaTest = []
delta1T = delta(mfcc_test, 2)
delta_oneTest.append(delta1T) #calculating delta
double_deltaTest.append(delta(delta1, 2)) #calculating double delta from previous delta


df = pd.DataFrame(data = None, )
for i in range (0, len(mfcc_test)):  
    combined = np.concatenate([mfcc_test[i],delta_oneTest[0][i],double_deltaTest[0][i]])
    df = df.append(pd.Series(combined), ignore_index = True) 
testingFeat = df.values

allmodels = []
for feat in training_feat:
    gmm = GaussianMixture() #default weights and means
    gmm.fit(feat)
    allmodels.append(gmm)

i = 1
for gmm in allmodels:
    print 'Model ',i
    scores = gmm.score(testingFeat)
    print scores
    i = i+1
计算每个信号的增量

all_mfcc_feat = []
for audio in sig:
    #defaults
    all_mfcc_feat.append(mfcc(signal = audio, samplerate = 16000, winlen = 0.025, winstep = 0.01, nfilt=26, nfft = 512, numcep = 13, preemph = 0.97, ceplifter=22, appendEnergy =False))
delta_oneT = []
double_deltaT = []
for mfcc in all_mfcc_feat:
    delta1 = (delta(mfcc, 2))
    delta_oneT.append(delta1) #calculating delta
    double_deltaT.append(delta(delta1, 2)) #calculating double delta from previous delta

training_feat = []
for i in range (0, len(all_mfcc_feat)): #iterate through signals
    df = pd.DataFrame(data = None, )

    for j in range (0, len(all_mfcc_feat[i])): #iterate through list of mfcc's
        combined = np.concatenate([all_mfcc_feat[i][j],delta_oneT[i][j], double_deltaT[i][j] ])
        df = df.append(pd.Series(combined), ignore_index = True)
    dfnew = df.values
    training_feat.append(dfnew)


(sr_valueX, x_valueX) = wav.read('Data\Testing\wiehedT.wav')

mfcc_test = mfcc(x_valueX, sr_valueX)

delta_oneTest = []
double_deltaTest = []
delta1T = delta(mfcc_test, 2)
delta_oneTest.append(delta1T) #calculating delta
double_deltaTest.append(delta(delta1, 2)) #calculating double delta from previous delta


df = pd.DataFrame(data = None, )
for i in range (0, len(mfcc_test)):  
    combined = np.concatenate([mfcc_test[i],delta_oneTest[0][i],double_deltaTest[0][i]])
    df = df.append(pd.Series(combined), ignore_index = True) 
testingFeat = df.values

allmodels = []
for feat in training_feat:
    gmm = GaussianMixture() #default weights and means
    gmm.fit(feat)
    allmodels.append(gmm)

i = 1
for gmm in allmodels:
    print 'Model ',i
    scores = gmm.score(testingFeat)
    print scores
    i = i+1

代码按预期工作。函数
gmm.score(testingFeat)
返回输入数据中每个点的计算日志概率。这是

对数概率只是概率的对数-属于区间(0,1)-因此它们是负数。要逆转这一点,您可以应用如下所示的指数函数


但是,这不会产生分数这样的百分比/概率,因为您的数据可能不是均匀分布的。这将得到更详细的解释

数据是什么样子的?你这是什么意思?什么?语法//