Python Can'；t解决错误消息“；应为2D数组，但改为1D数组“；？_Python_Scikit Learn

Python Can'；t解决错误消息“；应为2D数组，但改为1D数组“；？

python scikit-learn

Python Can'；t解决错误消息“；应为2D数组，但改为1D数组“；？,python,scikit-learn,Python,Scikit Learn,我不知道该怎么做才能让这个模型工作。它说要重塑，但我已经这样做了，但我得到了一个不一致的样本数据错误。我不知道这是怎么发生的。我运行过其他模型，但我不明白为什么现在会发生这种情况 import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LassoLarsCV from sklearn.model_selection import train_te

我不知道该怎么做才能让这个模型工作。它说要重塑，但我已经这样做了，但我得到了一个不一致的样本数据错误。我不知道这是怎么发生的。我运行过其他模型，但我不明白为什么现在会发生这种情况

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LassoLarsCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import Normalizer
from tpot.builtins import StackingEstimator
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
y = data['y1']
x = data[['x1','x2','x3','x4','x5']]


x.values.ravel()
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=0)


# Average CV score on the training set was: -0.1116338317020572
exported_pipeline = make_pipeline(
    Normalizer(norm="max"),
    StackingEstimator(estimator=LassoLarsCV(normalize=True)),
    RandomForestRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=1, min_samples_split=19, n_estimators=100)
)

exported_pipeline.fit(x_train, y_train)
results = exported_pipeline.predict(y_test)
print(np.mean(abs(y_test-results)))

这个代码给了我这个错误

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-93-5e4ac0c63791> in <module>
     28 
     29 exported_pipeline.fit(x_train, y_train)
---> 30 results = exported_pipeline.predict(y_test)
     31 print(np.mean(abs(y_test-results)))

~/anaconda3/lib/python3.8/site-packages/sklearn/utils/metaestimators.py in <lambda>(*args, **kwargs)
    114 
    115         # lambda, but not partial, allows help() to work with update_wrapper
--> 116         out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
    117         # update the docstring of the returned function
    118         update_wrapper(out, self.fn)

~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in predict(self, X, **predict_params)
    417         Xt = X
    418         for _, name, transform in self._iter(with_final=False):
--> 419             Xt = transform.transform(Xt)
    420         return self.steps[-1][-1].predict(Xt, **predict_params)
    421 

~/anaconda3/lib/python3.8/site-packages/sklearn/preprocessing/_data.py in transform(self, X, copy)
   1827         """
   1828         copy = copy if copy is not None else self.copy
-> 1829         X = check_array(X, accept_sparse='csr')
   1830         return normalize(X, norm=self.norm, axis=1, copy=copy)
   1831 

~/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    550             # If input is 1D raise error
    551             if array.ndim == 1:
--> 552                 raise ValueError(
    553                     "Expected 2D array, got 1D array instead:\narray={}.\n"
    554                     "Reshape your data either using array.reshape(-1, 1) if "

ValueError: Expected 2D array, got 1D array instead:
array=[-0.54719445  0.01222733  0.89720391 ... -1.22633808 -0.19243653
 -0.1420281 ].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

结果就是这个错误

ValueError                                Traceback (most recent call last)
<ipython-input-96-fb56ff22e193> in <module>
     27 )
     28 
---> 29 exported_pipeline.fit(x_train.values.reshape(-1,1), y_train)
     30 results = exported_pipeline.predict(y_test)
     31 print(np.mean(abs(y_test-results)))

~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in fit(self, X, y, **fit_params)
    348             This estimator
    349         """
--> 350         Xt, fit_params = self._fit(X, y, **fit_params)
    351         with _print_elapsed_time('Pipeline',
    352                                  self._log_message(len(self.steps) - 1)):

~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in _fit(self, X, y, **fit_params)
    309                 cloned_transformer = clone(transformer)
    310             # Fit or load from cache the current transformer
--> 311             X, fitted_transformer = fit_transform_one_cached(
    312                 cloned_transformer, X, y, None,
    313                 message_clsname='Pipeline',

~/.local/lib/python3.8/site-packages/joblib/memory.py in __call__(self, *args, **kwargs)
    350 
    351     def __call__(self, *args, **kwargs):
--> 352         return self.func(*args, **kwargs)
    353 
    354     def call_and_shelve(self, *args, **kwargs):

~/anaconda3/lib/python3.8/site-packages/sklearn/pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
    726     with _print_elapsed_time(message_clsname, message):
    727         if hasattr(transformer, 'fit_transform'):
--> 728             res = transformer.fit_transform(X, y, **fit_params)
    729         else:
    730             res = transformer.fit(X, y, **fit_params).transform(X)

~/anaconda3/lib/python3.8/site-packages/sklearn/base.py in fit_transform(self, X, y, **fit_params)
    572         else:
    573             # fit method of arity 2 (supervised transformation)
--> 574             return self.fit(X, y, **fit_params).transform(X)
    575 
    576 

~/anaconda3/lib/python3.8/site-packages/tpot/builtins/stacking_estimator.py in fit(self, X, y, **fit_params)
     65             Returns a copy of the estimator
     66         """
---> 67         self.estimator.fit(X, y, **fit_params)
     68         return self
     69 

~/anaconda3/lib/python3.8/site-packages/sklearn/linear_model/_least_angle.py in fit(self, X, y)
   1378             returns an instance of self.
   1379         """
-> 1380         X, y = check_X_y(X, y, y_numeric=True)
   1381         X = as_float_array(X, copy=self.copy_X)
   1382         y = as_float_array(y, copy=self.copy_X)

~/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
    763         y = y.astype(np.float64)
    764 
--> 765     check_consistent_length(X, y)
    766 
    767     return X, y

~/anaconda3/lib/python3.8/site-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
    209     uniques = np.unique(lengths)
    210     if len(uniques) > 1:
--> 211         raise ValueError("Found input variables with inconsistent numbers of"
    212                          " samples: %r" % [int(l) for l in lengths])
    213 

ValueError: Found input variables with inconsistent numbers of samples: [120498, 5738]

我试过这个

x.values.reshape(-1,1)
y.values.reshape(-1,1)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=0)


# Average CV score on the training set was: -0.1116338317020572
exported_pipeline = make_pipeline(
    Normalizer(norm="max"),
    StackingEstimator(estimator=LassoLarsCV(normalize=True)),
    RandomForestRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=1, min_samples_split=19, n_estimators=100)
)

exported_pipeline.fit(x_train, y_train)
results = exported_pipeline.predict(y_test)
print(np.mean(abs(y_test-results)))

非常感谢你的帮助。我似乎不明白为什么会发生这种情况，所以非常感谢任何帮助或评论。该模型由tpot制作。

预测通常基于x值而不是y值。因此，我认为正确的路线应该是：

results = exported_pipeline.predict(x_test)

换句话说，你从x预测y，而不是相反。正如错误消息所示，x通常是一个2D数组。

哦，我的天哪！非常感谢你。我最近太累了，真不敢相信我犯了这样一个概念上的错误。现在可以了！如果这个评论不应该出现，我将删除它。

x.values.reshape(-1,1)
y.values.reshape(-1,1)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2,random_state=0)


# Average CV score on the training set was: -0.1116338317020572
exported_pipeline = make_pipeline(
    Normalizer(norm="max"),
    StackingEstimator(estimator=LassoLarsCV(normalize=True)),
    RandomForestRegressor(bootstrap=False, max_features=0.5, min_samples_leaf=1, min_samples_split=19, n_estimators=100)
)

exported_pipeline.fit(x_train, y_train)
results = exported_pipeline.predict(y_test)
print(np.mean(abs(y_test-results)))

results = exported_pipeline.predict(x_test)