如何修复';X.toarray()转换为密集numpy数组';sklearn fit_变换出错?
代码 我不断地犯这样的错误-如何修复';X.toarray()转换为密集numpy数组';sklearn fit_变换出错?,numpy,machine-learning,scikit-learn,regression,data-science,Numpy,Machine Learning,Scikit Learn,Regression,Data Science,代码 我不断地犯这样的错误- from sklearn.preprocessing import LabelEncoder,OneHotEncoder from sklearn.compose import ColumnTransformer ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough') X = np.array(ct.fit_transform(X), dtype=np.fl
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X), dtype=np.float)
TypeError回溯(最近一次调用)
在里面
3.
4 ct=ColumnTransformer([('State',OneHotEncoder(),[3])],余数='passthrough')
---->5x=np.array(ct.fit_变换(X),dtype=object)
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\compose\\u column\u transformer.py in fit\u transform(self,X,y)
516自验证余数(X)
517
-->518结果=自拟合变换(X,y,拟合变换)
519
520如果没有结果:
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\compose\\u column\u transformer.py in\u fit\u transform(self、X、y、func、fitted)
446自测试仪(已安装=已安装,更换字符串=正确))
447尝试:
-->448返回并行(n_作业=self.n_作业)(
449延迟(func)(
450变压器=克隆(变速器)(如果未安装其他变速器),
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py in\uuuuuu调用(self,iterable)
1002#剩余工作。
1003自迭代=假
->1004如果自行调度一批(迭代器):
1005 self.\u iterating=self.\u original\u iterator不是None
1006
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py在dispatch\u one\u批处理中(self,迭代器)
833返回错误
834其他:
-->835自我派遣(任务)
836返回真值
837
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py in\u dispatch(self,batch)
752带自锁:
753作业idx=len(自作业)
-->754 job=self.\u backend.apply\u async(批处理,回调=cb)
755#一个作业完成的速度比它的回调速度要快
756#在我们到达这里之前打电话给self.#u jobs
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\\u parallel\u backends.py in apply\u async(self、func、callback)
207 def apply_async(self、func、callback=None):
208“计划要运行的func”
-->209结果=立即结果(func)
210如果回调:
211回调(结果)
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\\u parallel\u backends.py in\uuuuuu init\uuuuu(self,batch)
588#不要延迟应用程序,以免保留输入
589#内存中的参数
-->590 self.results=batch()
591
592 def get(自我):
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py in\uuuuu调用(self)
253#将默认进程数更改为-1
254具有并行_后端(self._后端,n_作业=self._n_作业):
-->255返回[func(*args,**kwargs)
256用于自身项目中的func、args、kwargs]
257
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py in(.0)
253#将默认进程数更改为-1
254具有并行_后端(self._后端,n_作业=self._n_作业):
-->255返回[func(*args,**kwargs)
256用于自身项目中的func、args、kwargs]
257
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\pipeline.py in\u fit\u transform\u one(transformer,X,y,weight,message\u clsname,message,**fit\u参数)
726带有_print_exposed_time(消息名称,消息):
727如果hasattr(变压器,“拟合变换”):
-->728 res=变换器。拟合变换(X,y,**拟合参数)
729其他:
730 res=变换器.fit(X,y,**拟合参数).transform(X)
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\preprocessing\\u encoders.py in fit\u transform(self,X,y)
370 """
371自我验证关键字()
-->372 return super().fit_变换(X,y)
373
374 def变换(自,X):
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\base.py在fit\u转换中(self、X、y、**fit\u参数)
569如果y为无:
570算术1的拟合方法(无监督变换)
-->571返回自拟合(X,**拟合参数).transform(X)
572其他:
573算术2的拟合方法(监督变换)
c:\users\dell\appdata\local\programs\python\38\lib\site packages\sklearn\preprocessing\\u encoders.py in fit(self,X,y)
345 """
346自我验证关键字()
-->347自适配(X,手柄未知=自。手柄未知)
348 self.drop\u idx=self.\u compute\u drop\u idx()
349回归自我
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\preprocessing\\u encoders.py in\u fit(self,X,handle\u未知)
72
73 def_fit(self,X,handle_unknown='error'):
--->74 X_列表,n_样本,n_特征=自我检查X(X)
75
76如果self.categories!=“自动”:
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\preprocessing\\u encoders.py in\u check\u X(self,X)
41如果不是(hasattr(X,'iloc')和getattr(X,'ndim',0)=2):
42#如果不是数据帧,则执行常规检查#阵列验证
--->43 X_temp=检查数组(X,dtype=无)
44如果(不是hasattr(X,'dtype'))
45和np.issubdtype(X_temp.dtype,np.str)):
检查数组中的c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\utils\validation.py(数组、接受稀疏、接受大稀疏、数据类型、顺序、复制、强制所有)
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X), dtype=np.float)
TypeError Traceback (most recent call last)
<ipython-input-36-17f64bed7e4c> in <module>
3
4 ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough')
----> 5 X = np.array(ct.fit_transform(X), dtype=object)
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\compose\_column_transformer.py in fit_transform(self, X, y)
516 self._validate_remainder(X)
517
--> 518 result = self._fit_transform(X, y, _fit_transform_one)
519
520 if not result:
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\compose\_column_transformer.py in _fit_transform(self, X, y, func, fitted)
446 self._iter(fitted=fitted, replace_strings=True))
447 try:
--> 448 return Parallel(n_jobs=self.n_jobs)(
449 delayed(func)(
450 transformer=clone(trans) if not fitted else trans,
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1002 # remaining jobs.
1003 self._iterating = False
-> 1004 if self.dispatch_one_batch(iterator):
1005 self._iterating = self._original_iterator is not None
1006
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
833 return False
834 else:
--> 835 self._dispatch(tasks)
836 return True
837
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
752 with self._lock:
753 job_idx = len(self._jobs)
--> 754 job = self._backend.apply_async(batch, callback=cb)
755 # A job can complete so quickly than its callback is
756 # called before we get here, causing self._jobs to
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
207 def apply_async(self, func, callback=None):
208 """Schedule a func to be run"""
--> 209 result = ImmediateResult(func)
210 if callback:
211 callback(result)
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
588 # Don't delay the application, to avoid keeping the input
589 # arguments in memory
--> 590 self.results = batch()
591
592 def get(self):
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in __call__(self)
253 # change the default number of processes to -1
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 255 return [func(*args, **kwargs)
256 for func, args, kwargs in self.items]
257
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
253 # change the default number of processes to -1
254 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 255 return [func(*args, **kwargs)
256 for func, args, kwargs in self.items]
257
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
726 with _print_elapsed_time(message_clsname, message):
727 if hasattr(transformer, 'fit_transform'):
--> 728 res = transformer.fit_transform(X, y, **fit_params)
729 else:
730 res = transformer.fit(X, y, **fit_params).transform(X)
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
370 """
371 self._validate_keywords()
--> 372 return super().fit_transform(X, y)
373
374 def transform(self, X):
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
569 if y is None:
570 # fit method of arity 1 (unsupervised transformation)
--> 571 return self.fit(X, **fit_params).transform(X)
572 else:
573 # fit method of arity 2 (supervised transformation)
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in fit(self, X, y)
345 """
346 self._validate_keywords()
--> 347 self._fit(X, handle_unknown=self.handle_unknown)
348 self.drop_idx_ = self._compute_drop_idx()
349 return self
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in _fit(self, X, handle_unknown)
72
73 def _fit(self, X, handle_unknown='error'):
---> 74 X_list, n_samples, n_features = self._check_X(X)
75
76 if self.categories != 'auto':
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in _check_X(self, X)
41 if not (hasattr(X, 'iloc') and getattr(X, 'ndim', 0) == 2):
42 # if not a dataframe, do normal check_array validation
---> 43 X_temp = check_array(X, dtype=None)
44 if (not hasattr(X, 'dtype')
45 and np.issubdtype(X_temp.dtype, np.str_)):
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
506 if sp.issparse(array):
507 _ensure_no_complex_data(array)
--> 508 array = _ensure_sparse_format(array, accept_sparse=accept_sparse,
509 dtype=dtype, copy=copy,
510 force_all_finite=force_all_finite,
c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\utils\validation.py in _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, force_all_finite, accept_large_sparse)
304
305 if accept_sparse is False:
--> 306 raise TypeError('A sparse matrix was passed, but dense '
307 'data is required. Use X.toarray() to '
308 'convert to a dense numpy array.')
TypeError: A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.compose import ColumnTransformer
import pandas as pd
import numpy as np
X = pd.DataFrame({"R&D":[1,2,3,4],
"State":["New Tork","Florida","New York","California"]})
ct = ColumnTransformer([('State', OneHotEncoder(), [0])],
sparse_threshold=0,remainder='passthrough')
np.array(ct.fit_transform(X[['State']]), dtype=np.float)
array([[0., 0., 1., 0.],
[0., 1., 0., 0.],
[0., 0., 0., 1.],
[1., 0., 0., 0.]])