Pandas Jupyter“;TypeError:无效的类型比较“;
好的,我刚开始一份新工作,我的任务是用jupyter写一个简单的笔记本。我真的很想给我的主管留下深刻的印象,我已经花了好几个小时来编写这个代码,但却无法让它正常工作,希望这里有人能帮助我 以下是我一直在研究的代码:Pandas Jupyter“;TypeError:无效的类型比较“;,pandas,numpy,matplotlib,seaborn,jupyter,Pandas,Numpy,Matplotlib,Seaborn,Jupyter,好的,我刚开始一份新工作,我的任务是用jupyter写一个简单的笔记本。我真的很想给我的主管留下深刻的印象,我已经花了好几个小时来编写这个代码,但却无法让它正常工作,希望这里有人能帮助我 以下是我一直在研究的代码: import numpy as np import pandas as pd import matplotlib import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import seaborn
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
df = pd.read_csv(r'C:\Users\jk2588\Documents\EDA\EDA Practice\top1000_dataset.csv', converters={'GENDER': lambda x: int(x == 'Male')}, usecols = ['MEMBER_ID', 'GENDER', 'Age', 'Dement'])
df_gp_1 = df[['MEMBER_ID', 'Dement']].groupby('MEMBER_ID').agg(np.mean).reset_index()
df_gp_2 = df[['MEMBER_ID', 'GENDER', 'Age']].groupby('MEMBER_ID').agg(max).reset_index()
df_gp = pd.merge(df_gp_1, df_gp_2, on = ['MEMBER_ID'])
df.head()
输出:MEMBER\u ID年龄部门性别
0000000020136NAN0
100000000 2 01 36 NaN 0
2 00000000 2 01 36 NaN 0
3 00000000 2 01 36 NaN 0
40000000 2 01 36 NaN 0
df['Dement'] = df['Dement'].fillna(0)
df['Dement'] = df['Dement'].astype('int64')
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 4 columns):
MEMBER_ID 999 non-null object
Age 999 non-null int64
Dement 999 non-null int64
GENDER 999 non-null int64
dtypes: int64(3), object(1)
memory usage: 31.3+ KB
freq = ((df_gp.Age.value_counts(normalize = True).reset_index().sort_values(by = 'index').Age)*100).tolist()
number_gp = 7
def ax_settings(ax, var_name, x_min, x_max):
ax.set_xlim(x_min,x_max)
ax.set_yticks([])
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_edgecolor('#444444')
ax.spines['bottom'].set_linewidth(2)
ax.text(0.02, 0.05, var_name, fontsize=17, fontweight="bold", transform = ax.transAxes)
return None
fig = plt.figure(figsize=(12,7))
gs = gridspec.GridSpec(nrows=number_gp,
ncols=2,
figure=fig,
width_ratios= [3, 1],
height_ratios= [1]*number_gp,
wspace=0.2, hspace=0.05
)
ax = [None]*(number_gp + 1)
features = ['0-17', '18-25', '26-35', '36-45', '46-50', '51-55', '55+']
for i in range(number_gp):
ax[i] = fig.add_subplot(gs[i, 0])
ax_settings(ax[i], 'Age: ' + str(features[i]), -1000, 20000)
sns.kdeplot(data=df_gp[(df_gp.GENDER == 'M') & (df_gp.Age == features[i])].Dement, ax=ax[i], shade=True, color="blue", bw=300, legend=False)
sns.kdeplot(data=df_gp[(df_gp.GENDER == 'F') & (df_gp.Age == features[i])].Dement, ax=ax[i], shade=True, color="red", bw=300, legend=False)
if i < (number_gp - 1): ax[i].set_xticks([])
ax[0].legend(['Male', 'Female'], facecolor='w')
ax[number_gp] = fig.add_subplot(gs[:, 1])
ax[number_gp].spines['right'].set_visible(False)
ax[number_gp].spines['top'].set_visible(False)
ax[number_gp].barh(features, freq, color='#004c99', height=0.4)
ax[number_gp].set_xlim(0,100)
ax[number_gp].invert_yaxis()
ax[number_gp].text(1.09, -0.04, '(%)', fontsize=10, transform = ax[number_gp].transAxes)
ax[number_gp].tick_params(axis='y', labelsize = 14)
plt.show()
df['Dement']=df['Dement'].fillna(0)
df['Dement']=df['Dement'].aType('int64'))
df.info()
范围索引:999个条目,0到998
数据列(共4列):
成员ID 999非空对象
年龄999非空int64
Dement 999非空int64
性别999非空int64
数据类型:int64(3),对象(1)
内存使用率:31.3+KB
freq=((df_gp.Age.value_counts(normalize=True).reset_index().排序_值(by='index').Age)*100).tolist()
数字\u gp=7
def ax_设置(ax、var_名称、x_最小值、x_最大值):
最大设定值(最小x,最大x)
ax.set_-yticks([])
ax.脊椎['left'].set_可见(假)
ax.spines['右'].set_可见(假)
ax.spines['top'].set_可见(假)
斧形刺['bottom'].set_edgecolor('#4444'))
ax.脊椎['bottom'].设置线宽(2)
ax.text(0.02,0.05,变量名称,fontsize=17,fontwweight=“bold”,transform=ax.transAxes)
一无所获
图=plt.图(图尺寸=(12,7))
gs=gridspec.gridspec(nrows=number\u gp,
ncols=2,
图=图,
宽度比=[3,1],
高度比=[1]*数量比,
wspace=0.2,hspace=0.05
)
ax=[None]*(数字\u gp+1)
特征=['0-17','18-25','26-35','36-45','46-50','51-55','55+']
对于范围内的i(编号\总成):
ax[i]=图add_子图(gs[i,0])
ax_设置(ax[i],“年龄:”+str(特征[i]),-100020000)
sns.kdeplot(数据=df_gp[(df_gp.GENDER='M')和(df_gp.Age==features[i])。Dement,ax=ax[i],shade=True,color=“blue”,bw=300,legend=False)
sns.kdeplot(数据=df_gp[(df_gp.GENDER='F')和(df_gp.Age==features[i])。Dement,ax=ax[i],shade=True,color=“red”,bw=300,legend=False)
如果i<(数字\u gp-1):ax[i]。设置\u xticks([])
ax[0]。图例(['Male','Female'],facecolor='w')
ax[编号\u gp]=图添加\u子批次(gs[:,1])
ax[number_gp]。脊椎['right']。设置_可见(False)
ax[number_gp]。脊椎['top']。设置_可见(False)
ax[数字总成].barh(特征、频率、颜色='#004c99',高度=0.4)
ax[编号\总成].设置\ xlim(0100)
ax[编号\总成]。反转\亚克斯()
ax[number\u gp].文本(1.09,-0.04,(%)',fontsize=10,transform=ax[number\u gp].传输)
ax[编号总成].勾选参数(轴=y',标签大小=14)
plt.show()
然后,我会见了:
C:\Users\jk2588\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py:1167: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
result = method(y)
--------------------------------------------------------------------------
TypeError Traceback (most recent call last
<ipython-input-38-8665030edb1c> in <module>()
24 ax[i] = fig.add_subplot(gs[i, 0])
25 ax_settings(ax[i], 'Age: ' + str(features[i]), -1000, 20000)
---> 26 sns.kdeplot(data=df_gp[(df_gp.GENDER == 'M') & (df_gp.Age == features[i])].Dement, ax=ax[i], shade=True, color="blue", bw=300, legend=False)
27 sns.kdeplot(data=df_gp[(df_gp.GENDER == 'F') & (df_gp.Age == features[i])].Dement, ax=ax[i], shade=True, color="red", bw=300, legend=False)
28 if i < (number_gp - 1): ax[i].set_xticks([])
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in wrapper(self, other, axis)
1281
1282 with np.errstate(all='ignore'):
-> 1283 res = na_op(values, other)
1284 if is_scalar(res):
1285 raise TypeError('Could not compare {typ} type with Series'
~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
1167 result = method(y)
1168 if result is NotImplemented:
-> 1169 raise TypeError("invalid type comparison")
1170 else:
1171 result = op(x, y)
TypeError: invalid type comparison
C:\Users\jk2588\AppData\Local\Continuum\anaconda3\lib\site packages\pandas\core\ops.py:1167:FutureWarning:elementwise比较失败;而是返回标量,但将来将执行元素级比较
结果=方法(y)
--------------------------------------------------------------------------
TypeError回溯(最近一次调用上次
在()
24 ax[i]=图add_子批次(gs[i,0])
25 ax_设置(ax[i],“年龄:”+str(特征[i]),-100020000)
--->26 sns.kdeplot(数据=df_gp[(df_gp.GENDER='M')和(df_gp.Age==features[i])。Dement,ax=ax[i],shade=True,color=“blue”,bw=300,legend=False)
27 sns.kdeplot(数据=df_gp[(df_gp.GENDER='F')和(df_gp.Age==features[i])。Dement,ax=ax[i],shade=True,color=“red”,bw=300,legend=False)
28如果i<(数字\u gp-1):ax[i]。设置\u xticks([])
包装器中的~\AppData\Local\Continuum\anaconda3\lib\site packages\pandas\core\ops.py(self、other、axis)
1281
1282,带有np.errstate(all='ignore'):
->1283 res=na_op(值,其他)
1284如果是标量(res):
1285 raise TypeError('无法将{typ}类型与序列进行比较'
na_op(x,y)中的~\AppData\Local\Continuum\anaconda3\lib\site packages\pandas\core\ops.py
1167结果=方法(y)
1168如果未执行结果:
->1169 raise TypeError(“无效类型比较”)
1170其他:
1171结果=op(x,y)
TypeError:类型比较无效
请帮帮我,我这周遇到了大量荒谬的错误如果不知道数据是什么样子,很难说。也许
'age'
列是一个数字,你正在将它与功能中的字符串进行比较
?刚刚用df.head()和df.info()的输出进行了更新.我知道'Dement'列中有一些NaN,但这就是为什么我试图用.fillna()将'Dement'转换为零,然后用.astype()将其更改为'int64'。但我仍然看到无效的Comparison TypeError。如果您需要更多信息,请告诉我。age
似乎是一个数字,features
只包含字符串。这不起作用。您需要更复杂的工具来测试年龄和范围。或者为每个人创建一个额外的列来存储功能“?如果不知道数据是什么样子,很难说。可能'age'
列是一个数字,您正在将它与功能中的字符串进行比较。刚刚使用df.head()和df.info()的输出进行了更新.我知道'Dement'列中有一些NaN,但这就是为什么我试图用.fillna()将'Dement'转换为零,然后用.astype()将其更改为'int64'。但我仍然看到无效的Comparison TypeError。如果您需要更多信息,请告诉我。age
似乎是一个数字,features
只包含字符串。这不起作用。您需要更复杂的工具来测试年龄和范围。或者为每个人创建一个额外的列来存储特征'?