如何使用hyperopt为python中的内核PCA选择hyperparameters?

如何使用hyperopt为python中的内核PCA选择hyperparameters?,python,scikit-learn,pca,hyperparameters,hyperopt,Python,Scikit Learn,Pca,Hyperparameters,Hyperopt,我正在研究应用核主成分分析(KPCA)来降低特征矩阵集的维数,以获得数据点的聚类。我在scikit学习包中浏览了KPCA中使用的参数,了解到如果选择了其中一个参数(例如,如果选择了gamma,则不使用度和系数),则有些参数应该有效。此外,我还浏览了以下链接,了解用于分类模型的超参数方法: 我试图编码hyperopt代码并将其与KPCA相结合,但是,在处理PCA模型评分方面,我不断出现错误。我知道KPCA没有分数来确定PCA模型的准确性,因此,我如何克服这个错误?我尝试了几种计分方法,

我正在研究应用核主成分分析(KPCA)来降低特征矩阵集的维数,以获得数据点的聚类。我在scikit学习包中浏览了KPCA中使用的参数,了解到如果选择了其中一个参数(例如,如果选择了gamma,则不使用度和系数),则有些参数应该有效。此外,我还浏览了以下链接,了解用于分类模型的超参数方法:

我试图编码hyperopt代码并将其与KPCA相结合,但是,在处理PCA模型评分方面,我不断出现错误。我知道KPCA没有分数来确定PCA模型的准确性,因此,我如何克服这个错误?我尝试了几种计分方法,要么是因为反向拟合,要么是因为数组的大小。请在下面查找代码和错误消息

代码:

错误消息:

错误消息(1):

错误消息(2):

from sklearn.decomposition import PCA, KernelPCA, SparsePCA, IncrementalPCA
from hyperopt import hp, tpe, atpe, fmin, Trials, rand, STATUS_OK

# Implementing Hyperparamater method:
models = {'pca'      : PCA,
          'kpca'     : KernelPCA,
          'spca'     : SparsePCA,
          # 'ipca'     : IncrementalPCA
          }

def search_space(model):
    # Initialising variables:
    model = model.lower()
    space = {}

    # Calling the models:
    if model == 'pca':
        space = {'svd_solver'        : hp.choice('svd_solver', ["auto", "full", "arpack", "randomized"]),
                 }

    elif model == 'kpca':
        space = {'kernel'            : hp.choice('kernel', ['linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed']),
                 'gamma'             : hp.choice('gamma', np.arange(0.03, 0.05, 0.002)),
                 'degree'            : hp.choice('degree', range(1, 10, 1)),
                 'coef0'             : hp.choice('coef0', np.arange(1, 10, 0.2))
                 }

    elif model == 'spca':
        space = {'alpha'             : hp.choice('alpha', np.arange(1.0, 15.0, 0.2)),
                 'ridge_alpha'       : hp.choice('ridge_alpha', np.linspace(0.01, 0.3, 30)),
                 'method'            : hp.choice('method', ['lars', 'cd']),
                 'max_iter'          : hp.choice('max_iter', [1000, 1500, 2000, 2500, 3000])
                 }

    # elif model == 'ipca':
    #     space = {'batch_size'        : hp.choice('batch_size', ['gini', 'entropy']),
    #              }
    space['model'] = model
    return space

def obj_fnc(params):
    model = params.get('model').lower()
    # X_ = scale_normalize(params, X[:])
    del params['model']
    clf = models[model](**params)
    return (get_acc_status(clf, X))

def get_acc_status(clf, X):
    X_reduced = clf.fit_transform(X)
    # X_prereduced = clf.fit_inverse_transform(X_reduced)
    # acc = -1 * mean_squared_error(X, X_prereduced)
    X_prereduced = clf.inverse_transform(X_reduced)
    # acc = -1 * mean_absolute_error(X, X_prereduced)
    acc = -1 * r2_score(X, X_prereduced)
    # acc = cross_val_score(clf, X).mean()
    return {'loss': -acc, 'status': STATUS_OK}

##### Hyperparameter optimisation:
# Running Bayesian Optimisation to get the best parameters:
start = time.time()

# Create the algorithms
tpe_algo = tpe.suggest
# rand_algo = rand.suggest
# atpe_algo = atpe.suggest

# Assigning model:
model = 'kpca'

# Creating the trial objects:
hypopt_trials = Trials()

# Getting the best parameters:
best_params = fmin(obj_fnc, search_space(model), algo=tpe_algo, max_evals=500, trials=hypopt_trials)
print("Best params: ", best_params)
print('Best accuracy: ', hypopt_trials.best_trial['result']['loss'])
print("[INFO] Baye. Opt. search took {:.2f} seconds".format(time.time() - start))

# Calling parameters:
## PCA:
svd_solver = ["auto", "full", "arpack", "randomized"]
## KPCA:
kernel =  ["linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"]
gamma = np.arange(0.1, 0.9, 0.01)
degree = range(1, 10, 1)
coef0 = np.arange(1, 10, 0.2)
kernel_gamma = ["poly", "rbf", "sigmoid"]
kernel_degree = "poly"
kernel_coef0 = "sigmoid"
## SPCA:
alpha = np.arange(1.0, 15.0, 0.2)
ridge_alpha = np.linspace(0.01, 0.3, 30)
method = ['lars', 'cd']
max_iter = [1000, 1500, 2000, 2500, 3000]

# Creating the PCA models:
# pca = PCA(n_components=2, svd_solver=svd_solver[best_params['svd_solver'])
if any(x in best_params for x in kernel_gamma):
    pca = KernelPCA(n_components=2, kernel=kernel[best_params['kernel']], gamma='{0}'.format(gamma[best_params['gamma']]))
    if any(x in best_params for x in kernel_degree):
        pca = KernelPCA(n_components=2, kernel=kernel[best_params['kernel']], gamma='{0}'.format(gamma[best_params['gamma']]), degree='{0}'.format(degree[best_params['degree']]), coef0='{0}'.format(coef0[best_params['coef0']]))                  
    if any(x in best_params for x in kernel_coef0):
        pca = KernelPCA(n_components=2, kernel=kernel[best_params['kernel']], gamma='{0}'.format(gamma[best_params['gamma']]), coef0='{0}'.format(coef0[best_params['coef0']]))                  
# pca = SparsePCA(n_components=2, alpha='{0}'.format(alpha[best_params['alpha']]), ridge_alpha='{0}'.format(ridge_alpha[best_params['ridge_alpha']]), method=method[best_params['method']], max_iter='{0}'.format(max_iter[best_params['max_iter']]))
# pca = IncrementalPCA(n_components=2)
print('Model: ', pca)
PrincipalComponents = pca.fit_transform(X_std)
principalDf = pd.DataFrame(data = PrincipalComponents, columns = ['principal component 1', 'principal component 2'])
finalDf = pd.concat([principalDf, dataframe[['Label']]], axis = 1)
print('Principal Component Analysis: ')
print(principalDf)
ValueError: There are significant negative eigenvalues (1.11715 of the maximum positive). Either the matrix is not PSD, or there was an issue while computing the eigendecomposition of the matrix.
ValueError: Precomputed metric requires shape (n_queries, n_indexed). Got (50, 14) for 50 indexed.