如何修复';X.toarray()转换为密集numpy数组';sklearn fit_变换出错?

如何修复';X.toarray()转换为密集numpy数组';sklearn fit_变换出错?,numpy,machine-learning,scikit-learn,regression,data-science,Numpy,Machine Learning,Scikit Learn,Regression,Data Science,代码 我不断地犯这样的错误- from sklearn.preprocessing import LabelEncoder,OneHotEncoder from sklearn.compose import ColumnTransformer ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough') X = np.array(ct.fit_transform(X), dtype=np.fl

代码

我不断地犯这样的错误-

from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X), dtype=np.float)
TypeError回溯(最近一次调用)
在里面
3.
4 ct=ColumnTransformer([('State',OneHotEncoder(),[3])],余数='passthrough')
---->5x=np.array(ct.fit_变换(X),dtype=object)
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\compose\\u column\u transformer.py in fit\u transform(self,X,y)
516自验证余数(X)
517
-->518结果=自拟合变换(X,y,拟合变换)
519
520如果没有结果:
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\compose\\u column\u transformer.py in\u fit\u transform(self、X、y、func、fitted)
446自测试仪(已安装=已安装,更换字符串=正确))
447尝试:
-->448返回并行(n_作业=self.n_作业)(
449延迟(func)(
450变压器=克隆(变速器)(如果未安装其他变速器),
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py in\uuuuuu调用(self,iterable)
1002#剩余工作。
1003自迭代=假
->1004如果自行调度一批(迭代器):
1005 self.\u iterating=self.\u original\u iterator不是None
1006
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py在dispatch\u one\u批处理中(self,迭代器)
833返回错误
834其他:
-->835自我派遣(任务)
836返回真值
837
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py in\u dispatch(self,batch)
752带自锁:
753作业idx=len(自作业)
-->754 job=self.\u backend.apply\u async(批处理,回调=cb)
755#一个作业完成的速度比它的回调速度要快
756#在我们到达这里之前打电话给self.#u jobs
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\\u parallel\u backends.py in apply\u async(self、func、callback)
207 def apply_async(self、func、callback=None):
208“计划要运行的func”
-->209结果=立即结果(func)
210如果回调:
211回调(结果)
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\\u parallel\u backends.py in\uuuuuu init\uuuuu(self,batch)
588#不要延迟应用程序,以免保留输入
589#内存中的参数
-->590 self.results=batch()
591
592 def get(自我):
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py in\uuuuu调用(self)
253#将默认进程数更改为-1
254具有并行_后端(self._后端,n_作业=self._n_作业):
-->255返回[func(*args,**kwargs)
256用于自身项目中的func、args、kwargs]
257
c:\users\dell\appdata\local\programs\python38\lib\site packages\joblib\parallel.py in(.0)
253#将默认进程数更改为-1
254具有并行_后端(self._后端,n_作业=self._n_作业):
-->255返回[func(*args,**kwargs)
256用于自身项目中的func、args、kwargs]
257
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\pipeline.py in\u fit\u transform\u one(transformer,X,y,weight,message\u clsname,message,**fit\u参数)
726带有_print_exposed_time(消息名称,消息):
727如果hasattr(变压器,“拟合变换”):
-->728 res=变换器。拟合变换(X,y,**拟合参数)
729其他:
730 res=变换器.fit(X,y,**拟合参数).transform(X)
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\preprocessing\\u encoders.py in fit\u transform(self,X,y)
370         """
371自我验证关键字()
-->372 return super().fit_变换(X,y)
373
374 def变换(自,X):
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\base.py在fit\u转换中(self、X、y、**fit\u参数)
569如果y为无:
570算术1的拟合方法(无监督变换)
-->571返回自拟合(X,**拟合参数).transform(X)
572其他:
573算术2的拟合方法(监督变换)
c:\users\dell\appdata\local\programs\python\38\lib\site packages\sklearn\preprocessing\\u encoders.py in fit(self,X,y)
345         """
346自我验证关键字()
-->347自适配(X,手柄未知=自。手柄未知)
348 self.drop\u idx=self.\u compute\u drop\u idx()
349回归自我
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\preprocessing\\u encoders.py in\u fit(self,X,handle\u未知)
72
73 def_fit(self,X,handle_unknown='error'):
--->74 X_列表,n_样本,n_特征=自我检查X(X)
75
76如果self.categories!=“自动”:
c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\preprocessing\\u encoders.py in\u check\u X(self,X)
41如果不是(hasattr(X,'iloc')和getattr(X,'ndim',0)=2):
42#如果不是数据帧,则执行常规检查#阵列验证
--->43 X_temp=检查数组(X,dtype=无)
44如果(不是hasattr(X,'dtype'))
45和np.issubdtype(X_temp.dtype,np.str)):
检查数组中的c:\users\dell\appdata\local\programs\python38\lib\site packages\sklearn\utils\validation.py(数组、接受稀疏、接受大稀疏、数据类型、顺序、复制、强制所有)
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough')
X = np.array(ct.fit_transform(X), dtype=np.float)
TypeError                   Traceback (most recent call last)
<ipython-input-36-17f64bed7e4c> in <module>
      3 
      4 ct = ColumnTransformer([('State', OneHotEncoder(), [3])], remainder='passthrough')
----> 5 X = np.array(ct.fit_transform(X), dtype=object)

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\compose\_column_transformer.py in fit_transform(self, X, y)
    516         self._validate_remainder(X)
    517 
--> 518         result = self._fit_transform(X, y, _fit_transform_one)
    519 
    520         if not result:

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\compose\_column_transformer.py in _fit_transform(self, X, y, func, fitted)
    446             self._iter(fitted=fitted, replace_strings=True))
    447         try:
--> 448             return Parallel(n_jobs=self.n_jobs)(
    449                 delayed(func)(
    450                     transformer=clone(trans) if not fitted else trans,

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
   1002             # remaining jobs.
   1003             self._iterating = False
-> 1004             if self.dispatch_one_batch(iterator):
   1005                 self._iterating = self._original_iterator is not None
   1006 

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
    833                 return False
    834             else:
--> 835                 self._dispatch(tasks)
    836                 return True
    837 

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
    752         with self._lock:
    753             job_idx = len(self._jobs)
--> 754             job = self._backend.apply_async(batch, callback=cb)
    755             # A job can complete so quickly than its callback is
    756             # called before we get here, causing self._jobs to

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
    207     def apply_async(self, func, callback=None):
    208         """Schedule a func to be run"""
--> 209         result = ImmediateResult(func)
    210         if callback:
    211             callback(result)

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
    588         # Don't delay the application, to avoid keeping the input
    589         # arguments in memory
--> 590         self.results = batch()
    591 
    592     def get(self):

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in __call__(self)
    253         # change the default number of processes to -1
    254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 255             return [func(*args, **kwargs)
    256                     for func, args, kwargs in self.items]
    257 

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
    253         # change the default number of processes to -1
    254         with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 255             return [func(*args, **kwargs)
    256                     for func, args, kwargs in self.items]
    257 

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
    726     with _print_elapsed_time(message_clsname, message):
    727         if hasattr(transformer, 'fit_transform'):
--> 728             res = transformer.fit_transform(X, y, **fit_params)
    729         else:
    730             res = transformer.fit(X, y, **fit_params).transform(X)

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
    370         """
    371         self._validate_keywords()
--> 372         return super().fit_transform(X, y)
    373 
    374     def transform(self, X):

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
    569         if y is None:
    570             # fit method of arity 1 (unsupervised transformation)
--> 571             return self.fit(X, **fit_params).transform(X)
    572         else:
    573             # fit method of arity 2 (supervised transformation)

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in fit(self, X, y)
    345         """
    346         self._validate_keywords()
--> 347         self._fit(X, handle_unknown=self.handle_unknown)
    348         self.drop_idx_ = self._compute_drop_idx()
    349         return self

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in _fit(self, X, handle_unknown)
     72 
     73     def _fit(self, X, handle_unknown='error'):
---> 74         X_list, n_samples, n_features = self._check_X(X)
     75 
     76         if self.categories != 'auto':

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\preprocessing\_encoders.py in _check_X(self, X)
     41         if not (hasattr(X, 'iloc') and getattr(X, 'ndim', 0) == 2):
     42             # if not a dataframe, do normal check_array validation
---> 43             X_temp = check_array(X, dtype=None)
     44             if (not hasattr(X, 'dtype')
     45                     and np.issubdtype(X_temp.dtype, np.str_)):

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
    506     if sp.issparse(array):
    507         _ensure_no_complex_data(array)
--> 508         array = _ensure_sparse_format(array, accept_sparse=accept_sparse,
    509                                       dtype=dtype, copy=copy,
    510                                       force_all_finite=force_all_finite,

c:\users\dell\appdata\local\programs\python\python38\lib\site-packages\sklearn\utils\validation.py in _ensure_sparse_format(spmatrix, accept_sparse, dtype, copy, force_all_finite, accept_large_sparse)
    304 
    305     if accept_sparse is False:
--> 306         raise TypeError('A sparse matrix was passed, but dense '
    307                         'data is required. Use X.toarray() to '
    308                         'convert to a dense numpy array.')

TypeError: A sparse matrix was passed, but dense data is required. Use X.toarray() to convert to a dense numpy array.
from sklearn.preprocessing import LabelEncoder,OneHotEncoder 
from sklearn.compose import ColumnTransformer                                           
import pandas as pd                                                                     
import numpy as np   

X = pd.DataFrame({"R&D":[1,2,3,4],
"State":["New Tork","Florida","New York","California"]}) 

ct = ColumnTransformer([('State', OneHotEncoder(), [0])], 
sparse_threshold=0,remainder='passthrough')
np.array(ct.fit_transform(X[['State']]), dtype=np.float)

array([[0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.]])