Python 2.7 向TSNE中添加单选按钮

Python 2.7 向TSNE中添加单选按钮,python-2.7,Python 2.7,我是python编程新手。在下面的链接中,介绍了TSNE在python中的简单实现。 我想知道如何添加单选按钮来在列表中表示TSNE结果中的所有颜色,这样我就可以只选择一定数量的颜色,并查看它们是如何分布的 代码如下: # # tsne.py # # Implementation of t-SNE in Python. The implementation was tested on Python 2.7.10, and it requires a working # insta

我是python编程新手。在下面的链接中,介绍了TSNE在python中的简单实现。

我想知道如何添加单选按钮来在列表中表示TSNE结果中的所有颜色,这样我就可以只选择一定数量的颜色,并查看它们是如何分布的

代码如下:

      #
#  tsne.py
#
# Implementation of t-SNE in Python. The implementation was tested on Python 2.7.10, and it requires a working
# installation of NumPy. The implementation comes with an example on the MNIST dataset. In order to plot the
# results of this example, a working installation of matplotlib is required.
#
# The example can be run by executing: `ipython tsne.py`
#
#
#  Created by Laurens van der Maaten on 20-12-08.
#  Copyright (c) 2008 Tilburg University. All rights reserved.

import numpy as Math
import pylab as Plot

def Hbeta(D = Math.array([]), beta = 1.0):
"""Compute the perplexity and the P-row for a specific value of the precision of a Gaussian distribution."""

# Compute P-row and corresponding perplexity
P = Math.exp(-D.copy() * beta);
sumP = sum(P);
H = Math.log(sumP) + beta * Math.sum(D * P) / sumP;
P = P / sumP;
return H, P;


def x2p(X = Math.array([]), tol = 1e-5, perplexity = 30.0):
"""Performs a binary search to get P-values in such a way that each conditional Gaussian has the same perplexity."""

# Initialize some variables
print "Computing pairwise distances..."
(n, d) = X.shape;
sum_X = Math.sum(Math.square(X), 1);
D = Math.add(Math.add(-2 * Math.dot(X, X.T), sum_X).T, sum_X);
P = Math.zeros((n, n));
beta = Math.ones((n, 1));
logU = Math.log(perplexity);

# Loop over all datapoints
for i in range(n):

    # Print progress
    if i % 500 == 0:
        print "Computing P-values for point ", i, " of ", n, "..."

    # Compute the Gaussian kernel and entropy for the current precision
    betamin = -Math.inf;
    betamax =  Math.inf;
    Di = D[i, Math.concatenate((Math.r_[0:i], Math.r_[i+1:n]))];
    (H, thisP) = Hbeta(Di, beta[i]);

    # Evaluate whether the perplexity is within tolerance
    Hdiff = H - logU;
    tries = 0;
    while Math.abs(Hdiff) > tol and tries < 50:

        # If not, increase or decrease precision
        if Hdiff > 0:
            betamin = beta[i].copy();
            if betamax == Math.inf or betamax == -Math.inf:
                beta[i] = beta[i] * 2;
            else:
                beta[i] = (beta[i] + betamax) / 2;
        else:
            betamax = beta[i].copy();
            if betamin == Math.inf or betamin == -Math.inf:
                beta[i] = beta[i] / 2;
            else:
                beta[i] = (beta[i] + betamin) / 2;

        # Recompute the values
        (H, thisP) = Hbeta(Di, beta[i]);
        Hdiff = H - logU;
        tries = tries + 1;

    # Set the final row of P
    P[i, Math.concatenate((Math.r_[0:i], Math.r_[i+1:n]))] = thisP;

# Return final P-matrix
print "Mean value of sigma: ", Math.mean(Math.sqrt(1 / beta));
return P;


def pca(X = Math.array([]), no_dims = 50):
"""Runs PCA on the NxD array X in order to reduce its dimensionality to no_dims dimensions."""

print "Preprocessing the data using PCA..."
(n, d) = X.shape;
X = X - Math.tile(Math.mean(X, 0), (n, 1));
(l, M) = Math.linalg.eig(Math.dot(X.T, X));
Y = Math.dot(X, M[:,0:no_dims]);
return Y;


def tsne(X = Math.array([]), no_dims = 2, initial_dims = 50, perplexity = 30.0):
"""Runs t-SNE on the dataset in the NxD array X to reduce its dimensionality to no_dims dimensions.
The syntaxis of the function is Y = tsne.tsne(X, no_dims, perplexity), where X is an NxD NumPy array."""

# Check inputs
if isinstance(no_dims, float):
    print "Error: array X should have type float.";
    return -1;
if round(no_dims) != no_dims:
    print "Error: number of dimensions should be an integer.";
    return -1;

# Initialize variables
X = pca(X, initial_dims).real;
(n, d) = X.shape;
max_iter = 1000;
initial_momentum = 0.5;
final_momentum = 0.8;
eta = 500;
min_gain = 0.01;
Y = Math.random.randn(n, no_dims);
dY = Math.zeros((n, no_dims));
iY = Math.zeros((n, no_dims));
gains = Math.ones((n, no_dims));

# Compute P-values
P = x2p(X, 1e-5, perplexity);
P = P + Math.transpose(P);
P = P / Math.sum(P);
P = P * 4;                                  # early exaggeration
P = Math.maximum(P, 1e-12);

# Run iterations
for iter in range(max_iter):

    # Compute pairwise affinities
    sum_Y = Math.sum(Math.square(Y), 1);
    num = 1 / (1 + Math.add(Math.add(-2 * Math.dot(Y, Y.T), sum_Y).T, sum_Y));
    num[range(n), range(n)] = 0;
    Q = num / Math.sum(num);
    Q = Math.maximum(Q, 1e-12);

    # Compute gradient
    PQ = P - Q;
    for i in range(n):
        dY[i,:] = Math.sum(Math.tile(PQ[:,i] * num[:,i], (no_dims, 1)).T * (Y[i,:] - Y), 0);

    # Perform the update
    if iter < 20:
        momentum = initial_momentum
    else:
        momentum = final_momentum
    gains = (gains + 0.2) * ((dY > 0) != (iY > 0)) + (gains * 0.8) * ((dY > 0) == (iY > 0));
    gains[gains < min_gain] = min_gain;
    iY = momentum * iY - eta * (gains * dY);
    Y = Y + iY;
    Y = Y - Math.tile(Math.mean(Y, 0), (n, 1));

    # Compute current value of cost function
    if (iter + 1) % 10 == 0:
        C = Math.sum(P * Math.log(P / Q));
        print "Iteration ", (iter + 1), ": error is ", C

    # Stop lying about P-values
    if iter == 100:
        P = P / 4;

# Return solution
return Y;


if __name__ == "__main__":
print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."
print "Running example on 2,500 MNIST digits..."
X = Math.loadtxt("mnist2500_X.txt");
labels = Math.loadtxt("mnist2500_labels.txt");
Y = tsne(X, 2, 50, 20.0);
Plot.scatter(Y[:,0], Y[:,1], 20, labels);
Plot.show();    
#
#tsne.py
#
#在Python中实现t-SNE。该实现在Python2.7.10上进行了测试,它需要一个有效的
#安装NumPy。该实现附带了MNIST数据集上的一个示例。为了绘制
#根据本例的结果,需要进行matplotlib的工作安装。
#
#可以通过执行:`ipython tsne.py来运行该示例`
#
#
#由劳伦斯·范德马腾于2008年12月20日创作。
#版权所有(c)2008蒂尔堡大学。版权所有。
将numpy导入为数学
导入pylab作为绘图
def Hbeta(D=数学数组([]),beta=1.0):
“”“计算高斯分布精度的特定值的困惑度和P行。”“”
#计算P-row及相应的困惑
P=Math.exp(-D.copy()*beta);
集水坑=总和(P);
H=数学对数(集水坑)+β*数学和(D*P)/集水坑;
P=P/集水坑;
返回H,P;
def x2p(X=数学数组([]),tol=1e-5,困惑=30.0):
“”“执行二进制搜索以获取P值,使每个条件高斯函数具有相同的复杂性。”“”
#初始化一些变量
打印“计算成对距离…”
(n,d)=X.形状;
sum_X=数学sum(数学平方(X),1);
D=Math.add(Math.add(-2*Math.dot(X,X.T),sum_X).T,sum_X);
P=数学零((n,n));
β=数学一((n,1));
logU=Math.log(困惑);
#在所有数据点上循环
对于范围(n)中的i:
#打印进度
如果i%500==0:
打印“计算点的P值”,i,“of”,n,“…”
#计算当前精度的高斯核和熵
betamin=-Math.inf;
betamax=Math.inf;
Di=D[i,Math.concatenate((Math.r[0:i],Math.r[i+1:n]));
(H,thisP)=Hbeta(Di,beta[i]);
#评估困惑是否在容许范围内
Hdiff=H-logU;
尝试=0;
而Math.abs(Hdiff)>tol并尝试<50:
#如果不是,则增加或减少精度
如果Hdiff>0:
betamin=beta[i].copy();
如果betamax==Math.inf或betamax==Math.inf:
β[i]=β[i]*2;
其他:
β[i]=(β[i]+βmax)/2;
其他:
betamax=beta[i]。复制();
如果betamin==Math.inf或betamin===Math.inf:
β[i]=β[i]/2;
其他:
β[i]=(β[i]+β胺)/2;
#重新计算值
(H,thisP)=Hbeta(Di,beta[i]);
Hdiff=H-logU;
尝试=尝试+1;
#设置P的最后一行
P[i,Math.concatenate((Math.r[0:i],Math.r[i+1:n])]=thisP;
#返回最终P-矩阵
打印“西格玛平均值:”,数学平均值(数学sqrt(1/beta));
返回P;
def pca(X=Math.array([]),无尺寸=50):
“”“在NxD数组X上运行PCA,以便将其维数降低到零维数。”“”
打印“使用PCA预处理数据…”
(n,d)=X.形状;
X=X-Math.tile(数学平均值(X,0),(n,1));
(l,M)=Math.linalg.eig(Math.dot(X.T,X));
Y=数学点(X,M[:,0:no_dims]);
返回Y;
def tsne(X=Math.array([]),无尺寸=2,初始尺寸=50,困惑=30.0):
“”“在NxD数组X中的数据集上运行t-SNE,以将其维数降低到零维。”。
函数的语法是Y=tsne.tsne(X,无模糊,复杂),其中X是NxD NumPy数组
#检查输入
如果isinstance(无阴影,浮动):
打印“错误:数组X应具有浮点型。”;
返回-1;
如果为圆形(无阴影)!=无阴影:
打印“错误:维度数应为整数。”;
返回-1;
#初始化变量
X=主成分分析(X,初始尺寸)。真实值;
(n,d)=X.形状;
最大电阻=1000;
初始动量=0.5;
最终动量=0.8;
eta=500;
最小增益=0.01;
Y=Math.random.randn(n,无dims);
dY=数学上的零((n,无_dims));
iY=数学零点((n,无零点));
增益=数学一((n,无二次方));
#计算P值
P=x2p(X,1e-5,困惑);
P=P+数学转置(P);
P=P/数学和(P);
P=P*4;#早期夸张
P=数学最大值(P,1e-12);
#运行迭代
对于范围内的iter(最大iter):
#计算两两相似性
sum_Y=Math.sum(Math.square(Y),1);
num=1/(1+Math.add(Math.add(-2*Math.dot(Y,Y.T),sum_Y).T,sum_Y));
num[范围(n),范围(n)]=0;
Q=num/Math.sum(num);
Q=数学最大值(Q,1e-12);
#计算梯度
PQ=P-Q;
对于范围(n)中的i:
dY[i,:]=Math.sum(Math.tile(PQ[:,i]*num[:,i],(no_dims,1)).T*(Y[i,:]-Y),0);
#执行更新
如果iter<20:
动量=初始动量
其他:
动量=最终动量
增益=(增益+0.2)*((dY>0)!=(iY>0))+(增益*0.8)*((dY>0)=(iY>0));
增益[增益<最小增益]=最小增益;
iY=动量*iY-预计到达时间*(增益*dY);
Y=Y+iY;
Y=Y-Math.tile(Math.mean(Y,0),(n,1));
#计算成本函数的现值
如果(iter+1)%10==0:
C=数学和(P*Math.log(P/Q));
打印“迭代”(iter+1),“:错误为”,C
#不要在P值上撒谎
如果iter==100:
P=P/4;
#返回溶液
返回Y;
如果名称=“\uuuuu main\uuuuuuuu”:
打印“运行Y=tsne.tsne(X,无阴影,困惑)以在数据集上执行t-SNE”
打印“在2500 MNIST数字上运行示例…”
X=Math.loadtxt(“mnist2500_X.txt”);
labels=Math.loadtxt(“mnist2500_labels.txt”);
Y=tsne(X,2,50,20.0);
散点图(Y[:,0],Y[:,1],20,标签);
Plot.show();