Python Neuraxe AutoML-为什么会出错?
我正在按照AutoML示例对NeuralAxix进行实验。 未经修改的示例效果良好。 当我在Python Neuraxe AutoML-为什么会出错?,python,scikit-learn,neuraxle,Python,Scikit Learn,Neuraxle,我正在按照AutoML示例对NeuralAxix进行实验。 未经修改的示例效果良好。 当我在选择onestepof(分类器)之前修改它以包含我自己的管道组件时,它失败了,我不明白为什么 from neuraxle.base import BaseTransformer from neuraxle.pipeline import Pipeline from neuraxle.hyperparams.space import HyperparameterSpace from neuraxle.ste
选择onestepof(分类器)
之前修改它以包含我自己的管道组件时,它失败了,我不明白为什么
from neuraxle.base import BaseTransformer
from neuraxle.pipeline import Pipeline
from neuraxle.hyperparams.space import HyperparameterSpace
from neuraxle.steps.numpy import NumpyRavel
from neuraxle.steps.output_handlers import OutputTransformerWrapper
from typing import List
from sklearn.preprocessing import OneHotEncoder
from neuraxle.pipeline import Pipeline
from neuraxle.union import FeatureUnion
from sklearn.impute import SimpleImputer
# sklearn classifiers, and sklearn wrapper for neuraxle
from neuraxle.steps.sklearn import SKLearnWrapper
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.linear_model import RidgeClassifier, LogisticRegression
# neuraxle distributions
from neuraxle.hyperparams.distributions import Choice, RandInt, Boolean, LogUniform
from neuraxle.steps.flow import ChooseOneStepOf
from neuraxle.base import BaseTransformer, ForceHandleMixin
from neuraxle.metaopt.auto_ml import ValidationSplitter
from neuraxle.metaopt.callbacks import ScoringCallback
from sklearn.metrics import accuracy_score
from neuraxle.metaopt.callbacks import MetricCallback
from sklearn.metrics import f1_score, precision_score, recall_score
from neuraxle.metaopt.auto_ml import InMemoryHyperparamsRepository
from neuraxle.plotting import TrialMetricsPlottingObserver
from neuraxle.metaopt.tpe import TreeParzenEstimatorHyperparameterSelectionStrategy
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
from neuraxle.metaopt.auto_ml import AutoML
import os
classifiers: List[BaseTransformer] = [
SKLearnWrapper(DecisionTreeClassifier(), HyperparameterSpace({
'criterion': Choice(['gini', 'entropy']),
'splitter': Choice(['best', 'random']),
'min_samples_leaf': RandInt(2, 5),
'min_samples_split': RandInt(1, 3)
})).set_name('DecisionTreeClassifier'),
Pipeline([
OutputTransformerWrapper(NumpyRavel()),
SKLearnWrapper(RidgeClassifier(), HyperparameterSpace({
'alpha': Choice([(0.0, 1.0, 10.0), (0.0, 10.0, 100.0)]),
'fit_intercept': Boolean(),
'normalize': Boolean()
}))
]).set_name('RidgeClassifier'),
Pipeline([
OutputTransformerWrapper(NumpyRavel()),
SKLearnWrapper(LogisticRegression(), HyperparameterSpace({
'C': LogUniform(0.01, 10.0),
'fit_intercept': Boolean(),
'dual': Boolean(),
'penalty': Choice(['l1', 'l2']),
'max_iter': RandInt(20, 200)
}))
]).set_name('LogisticRegression')
]
class ColumnSelectTransformer(BaseTransformer, ForceHandleMixin):
def __init__(self, required_columns):
BaseTransformer.__init__(self)
ForceHandleMixin.__init__(self)
self.required_columns = required_columns
def inverse_transform(self, processed_outputs):
pass
def fit(self, X, y=None):
return self
def transform(self, X):
if not isinstance(X, pd.DataFrame):
X = pd.DataFrame(X)
return X[self.required_columns]
columns = ['BEDCERT', 'RESTOT', 'INHOSP', 'CCRC_FACIL',
'SFF', 'CHOW_LAST_12MOS', 'SPRINKLER_STATUS',
'EXP_TOTAL', 'ADJ_TOTAL']
simple_features = Pipeline([ColumnSelectTransformer(columns),
SimpleImputer(missing_values=np.nan,
strategy='mean')])
categorical_features = Pipeline([ColumnSelectTransformer(['OWNERSHIP', 'CERTIFICATION']),
OneHotEncoder(sparse=False)
])
business_features = FeatureUnion([simple_features,
categorical_features])
p: Pipeline = Pipeline([
business_features,
ChooseOneStepOf(classifiers)
])
validation_splitter = ValidationSplitter(test_size=0.20)
scoring_callback = ScoringCallback(
metric_function=accuracy_score,
name='accuracy',
higher_score_is_better=False,
print_metrics=False
)
callbacks = [
MetricCallback('f1', metric_function=f1_score, higher_score_is_better=True, print_metrics=False),
MetricCallback('precision', metric_function=precision_score, higher_score_is_better=True, print_metrics=False),
MetricCallback('recall', metric_function=recall_score, higher_score_is_better=True, print_metrics=False)
]
hyperparams_repository = InMemoryHyperparamsRepository(cache_folder='cache')
hyperparams_repository.subscribe(TrialMetricsPlottingObserver(
plotting_folder_name='metric_results',
save_plots=False,
plot_trial_on_next=False,
plot_all_trials_on_complete=True,
plot_individual_trials_on_complete=False
))
hyperparams_optimizer = TreeParzenEstimatorHyperparameterSelectionStrategy(
number_of_initial_random_step=10,
quantile_threshold=0.3,
number_good_trials_max_cap=25,
number_possible_hyperparams_candidates=100,
prior_weight=0.,
use_linear_forgetting_weights=False,
number_recent_trial_at_full_weights=25
)
tmpdir = 'cache'
if not os.path.exists(tmpdir):
os.makedirs(tmpdir)
n_trials = 10
n_epochs = 10
auto_ml = AutoML(
pipeline=p,
validation_splitter=validation_splitter,
refit_trial=True,
n_trials=n_trials,
epochs=n_epochs,
cache_folder_when_no_handle=str(tmpdir),
scoring_callback=scoring_callback,
callbacks=callbacks,
hyperparams_repository=hyperparams_repository
)
def generate_classification_data():
# data_inputs, expected_outputs = make_classification(
# n_samples=10000,
# n_repeated=0,
# n_classes=3,
# n_features=4,
# n_clusters_per_class=1,
# class_sep=1.5,
# flip_y=0,
# weights=[0.5, 0.5, 0.5]
# )
data = pd.read_csv('./ml-data/providers-train.csv', encoding='latin1')
fine_counts = data.pop('FINE_CNT')
fine_totals = data.pop('FINE_TOT')
cycle_2_score = data.pop('CYCLE_2_TOTAL_SCORE')
X_train, X_test, y_train, y_test = train_test_split(
data,
fine_counts > 1,
test_size=0.20
)
return X_train, y_train, X_test, y_test
X_train, y_train, X_test, y_test = generate_classification_data()
auto_ml = auto_ml.fit(X_train, y_train)
Output as follows:-
/Users/simon/venvs/wqu_q4/bin/python/Users/simon/Dev/wqu_q4/main.py
新试验:{“ChooseOneStepOf”:{“Choose”:“RidgeClassifier”}
审判1/10
回溯(most)
最近调用(最后一次):文件
“/Users/simon/venvs/wqu_q4/lib/python3.9/site packages/neuraxix/metaopt/auto_ml.py”,
第785行,内装数据容器回购试验拆分=
自我培训师执行试验(文件
“/Users/simon/venvs/wqu_q4/lib/python3.9/site packages/neuraxe/metaopt/trial.py”,
第290行,在退出文件中
“/Users/simon/venvs/wqu_q4/lib/python3.9/site packages/neuraxix/metaopt/auto_ml.py”,
第785行,内装数据容器回购试验拆分=
自我培训师执行试验(文件
“/Users/simon/venvs/wqu_q4/lib/python3.9/site packages/neuraxix/metaopt/auto_ml.py”,
第595行,在execute_-trial self.print_-func('success-trial{}分数:
{}.格式(文件)
“/Users/simon/venvs/wqu_q4/lib/python3.9/site packages/neuraxe/metaopt/trial.py”,
第570行,在退出文件中
“/Users/simon/venvs/wqu_q4/lib/python3.9/site packages/neuraxix/metaopt/auto_ml.py”,
第574行,执行\试验\分割\说明=
_获取试用版拆分说明(文件“/Users/simon/venvs/wqu\u q4/lib/python3.9/site packages/neuraxix/metaopt/auto\u ml.py”,
第876行,在“获取、试用、拆分”描述中
json.dumps(repo_-trial.hyperparams,sort_-keys=True,indent=4)文件
“/usr/本地/酒窖/python@3.9/3.9.0_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/init.py“,
第234行,在转储中返回cls(文件
“/usr/本地/酒窖/python@3.9/3.9.0_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py“,
第201行,在encode chunks=列表(chunks)文件中
“/usr/本地/酒窖/python@3.9/3.9.0_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py“,
第431行,从第(o)行,
_当前缩进级别)文件“/usr/local/cillar/python@3.9/3.9.0_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py“,
第405行,in_iterencode_dict yield from chunks File
“/usr/本地/酒窖/python@3.9/3.9.0_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py“,
第438行,在_iterencodeo=_default(o)文件中
“/usr/本地/酒窖/python@3.9/3.9.0_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/json/encoder.py“,
第179行,默认情况下,raise TypeError(f'类型的对象
{o.类名称}类型错误:类型的对象不是JSON
在处理上述异常期间可序列化,另一个异常
发生:回溯(最近一次调用上次):文件
“/Users/simon/Dev/wqu_q4/main.py”,第210行,在auto_ml中=
自动拟合(X列,y列)文件
“/Users/simon/venvs/wqu_q4/lib/python3.9/site packages/neuraxix/base.py”,
第3475行,在fit new\u self=self.handle\u fit中(数据容器,上下文)
文件
“/Users/simon/venvs/wqu_q4/lib/python3.9/site packages/neuraxix/base.py”,
第980行,在手柄中安装新的=
self.\u fit\u data\u容器(数据容器,上下文)文件
“/Users/simon/venvs/wqu_q4/lib/python3.9/site packages/neuraxix/metaopt/auto_ml.py”,
第802行,in_fit_data_container repo_trial_split=repo_trial_split,
UnboundLocalError:之前引用的局部变量“repo\u trial\u split”
分配过程已完成,退出代码为1
以下几点可以帮助您解决当前问题:
p、 另一方面,我将在未来几天发布0.5.7版本。NeuralAxix的版本是什么?你可以尝试更新它吗?我不知道失败的原因。它似乎试图使用一个不存在的变量,这很奇怪。也许重置你的.pyc预编译文件,或者尝试重新安装你的venv?0.5.6在pycharm中,这很奇怪,因为Github表明您仅处于0.5.5。我想知道这是否与我的ColumnTransformer没有以正确的格式返回数据有关,它是否需要在numpy数组中,或者我可以返回熊猫数据帧?我只是为此目的创建了虚拟环境,但可以重试。感谢您的响应,我将进一步调查呃