Python 理解高斯混合模型
我试图理解scikit学习高斯混合模型实现的结果。请看以下示例:Python 理解高斯混合模型,python,scikit-learn,Python,Scikit Learn,我试图理解scikit学习高斯混合模型实现的结果。请看以下示例: #!/opt/local/bin/python import numpy as np import matplotlib.pyplot as plt from sklearn.mixture import GaussianMixture # Define simple gaussian def gauss_function(x, amp, x0, sigma): return amp * np.exp(-(x - x0)
#!/opt/local/bin/python
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
# Define simple gaussian
def gauss_function(x, amp, x0, sigma):
return amp * np.exp(-(x - x0) ** 2. / (2. * sigma ** 2.))
# Generate sample from three gaussian distributions
samples = np.random.normal(-0.5, 0.2, 2000)
samples = np.append(samples, np.random.normal(-0.1, 0.07, 5000))
samples = np.append(samples, np.random.normal(0.2, 0.13, 10000))
# Fit GMM
gmm = GaussianMixture(n_components=3, covariance_type="full", tol=0.001)
gmm = gmm.fit(X=np.expand_dims(samples, 1))
# Evaluate GMM
gmm_x = np.linspace(-2, 1.5, 5000)
gmm_y = np.exp(gmm.score_samples(gmm_x.reshape(-1, 1)))
# Construct function manually as sum of gaussians
gmm_y_sum = np.full_like(gmm_x, fill_value=0, dtype=np.float32)
for m, c, w in zip(gmm.means_.ravel(), gmm.covariances_.ravel(),
gmm.weights_.ravel()):
gmm_y_sum += gauss_function(x=gmm_x, amp=w, x0=m, sigma=np.sqrt(c))
# Normalize so that integral is 1
gmm_y_sum /= np.trapz(gmm_y_sum, gmm_x)
# Make regular histogram
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[8, 5])
ax.hist(samples, bins=50, normed=True, alpha=0.5, color="#0070FF")
ax.plot(gmm_x, gmm_y, color="crimson", lw=4, label="GMM")
ax.plot(gmm_x, gmm_y_sum, color="black", lw=4, label="Gauss_sum")
# Annotate diagram
ax.set_ylabel("Probability density")
ax.set_xlabel("Arbitrary units")
# Draw legend
plt.legend()
plt.show()
在这里,我首先生成一个由高斯分布构造的样本分布,然后用高斯混合模型拟合这些数据。接下来,我想计算一些给定输入的概率。scikit实现方便地提供了
score\u samples
方法来实现这一点。现在我正在努力理解这些结果。我一直认为,我可以从GMM拟合中提取高斯函数的参数,通过对它们求和,然后将积分归一化为1来构造完全相同的分布。但是,如图中所示,从score\u samples
方法中提取的样本与原始数据(蓝色直方图)非常吻合(红线),而手动构建的分布(黑线)则不吻合。我想了解我的想法哪里出了问题,以及为什么我不能通过对GMM拟合给出的高斯分布求和来构造分布!?!非常感谢您的任何意见 以防万一,未来的任何人都会对同样的事情感到疑惑:我们必须将单个组件标准化,而不是求和:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
# Define simple gaussian
def gauss_function(x, amp, x0, sigma):
return amp * np.exp(-(x - x0) ** 2. / (2. * sigma ** 2.))
# Generate sample from three gaussian distributions
samples = np.random.normal(-0.5, 0.2, 2000)
samples = np.append(samples, np.random.normal(-0.1, 0.07, 5000))
samples = np.append(samples, np.random.normal(0.2, 0.13, 10000))
# Fit GMM
gmm = GaussianMixture(n_components=3, covariance_type="full", tol=0.001)
gmm = gmm.fit(X=np.expand_dims(samples, 1))
# Evaluate GMM
gmm_x = np.linspace(-2, 1.5, 5000)
gmm_y = np.exp(gmm.score_samples(gmm_x.reshape(-1, 1)))
# Construct function manually as sum of gaussians
gmm_y_sum = np.full_like(gmm_x, fill_value=0, dtype=np.float32)
for m, c, w in zip(gmm.means_.ravel(), gmm.covariances_.ravel(), gmm.weights_.ravel()):
gauss = gauss_function(x=gmm_x, amp=1, x0=m, sigma=np.sqrt(c))
gmm_y_sum += gauss / np.trapz(gauss, gmm_x) * w
# Make regular histogram
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=[8, 5])
ax.hist(samples, bins=50, normed=True, alpha=0.5, color="#0070FF")
ax.plot(gmm_x, gmm_y, color="crimson", lw=4, label="GMM")
ax.plot(gmm_x, gmm_y_sum, color="black", lw=4, label="Gauss_sum", linestyle="dashed")
# Annotate diagram
ax.set_ylabel("Probability density")
ax.set_xlabel("Arbitrary units")
# Make legend
plt.legend()
plt.show()
感谢您发布答案这非常简洁,谢谢。我在将数据传递到
GaussianMixture.fit
时遇到了很多麻烦,因为我没有意识到形状需要np。扩展(样本,1)。形状而不是样本。形状
现在你如何计算新测试样本X的概率(因此,您可以估计数据点是否可能是新颖的)?根据我的理解,np.exp(gmm.score_samples(X))给出了PDF在X处的值,而不是X的概率。