Pandas Jupyter“;TypeError:无效的类型比较“;

Pandas Jupyter“;TypeError:无效的类型比较“;,pandas,numpy,matplotlib,seaborn,jupyter,Pandas,Numpy,Matplotlib,Seaborn,Jupyter,好的,我刚开始一份新工作,我的任务是用jupyter写一个简单的笔记本。我真的很想给我的主管留下深刻的印象,我已经花了好几个小时来编写这个代码,但却无法让它正常工作,希望这里有人能帮助我 以下是我一直在研究的代码: import numpy as np import pandas as pd import matplotlib import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import seaborn

好的,我刚开始一份新工作,我的任务是用jupyter写一个简单的笔记本。我真的很想给我的主管留下深刻的印象,我已经花了好几个小时来编写这个代码,但却无法让它正常工作,希望这里有人能帮助我

以下是我一直在研究的代码:

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

df = pd.read_csv(r'C:\Users\jk2588\Documents\EDA\EDA Practice\top1000_dataset.csv', converters={'GENDER': lambda x: int(x == 'Male')}, usecols = ['MEMBER_ID', 'GENDER', 'Age', 'Dement'])
df_gp_1 = df[['MEMBER_ID', 'Dement']].groupby('MEMBER_ID').agg(np.mean).reset_index()
df_gp_2 = df[['MEMBER_ID', 'GENDER', 'Age']].groupby('MEMBER_ID').agg(max).reset_index()
df_gp = pd.merge(df_gp_1, df_gp_2, on = ['MEMBER_ID'])
df.head()
输出:
MEMBER\u ID年龄部门性别
0000000020136NAN0
100000000 2 01 36 NaN 0
2 00000000 2 01 36 NaN 0
3 00000000 2 01 36 NaN 0
40000000 2 01 36 NaN 0

df['Dement'] = df['Dement'].fillna(0)
df['Dement'] = df['Dement'].astype('int64')

df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 999 entries, 0 to 998
Data columns (total 4 columns):
MEMBER_ID    999 non-null object
Age          999 non-null int64
Dement       999 non-null int64
GENDER       999 non-null int64
dtypes: int64(3), object(1)
memory usage: 31.3+ KB

freq = ((df_gp.Age.value_counts(normalize = True).reset_index().sort_values(by = 'index').Age)*100).tolist()
number_gp = 7
def ax_settings(ax, var_name, x_min, x_max):
    ax.set_xlim(x_min,x_max)
    ax.set_yticks([])
    ax.spines['left'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['bottom'].set_edgecolor('#444444')
    ax.spines['bottom'].set_linewidth(2)
    ax.text(0.02, 0.05, var_name, fontsize=17, fontweight="bold", transform = ax.transAxes) 
    return None
fig = plt.figure(figsize=(12,7))
gs = gridspec.GridSpec(nrows=number_gp, 
                   ncols=2, 
                   figure=fig, 
                   width_ratios= [3, 1],
                   height_ratios= [1]*number_gp,
                   wspace=0.2, hspace=0.05
                  )
ax = [None]*(number_gp + 1)
features = ['0-17', '18-25', '26-35', '36-45', '46-50', '51-55', '55+']  
for i in range(number_gp):
    ax[i] = fig.add_subplot(gs[i, 0])
    ax_settings(ax[i], 'Age: ' + str(features[i]), -1000, 20000)    
    sns.kdeplot(data=df_gp[(df_gp.GENDER == 'M') & (df_gp.Age == features[i])].Dement, ax=ax[i], shade=True, color="blue",  bw=300, legend=False)
    sns.kdeplot(data=df_gp[(df_gp.GENDER == 'F') & (df_gp.Age == features[i])].Dement, ax=ax[i], shade=True, color="red",  bw=300, legend=False)
    if i < (number_gp - 1): ax[i].set_xticks([])
ax[0].legend(['Male', 'Female'], facecolor='w')
ax[number_gp] = fig.add_subplot(gs[:, 1])
ax[number_gp].spines['right'].set_visible(False)
ax[number_gp].spines['top'].set_visible(False)
ax[number_gp].barh(features, freq, color='#004c99', height=0.4)
ax[number_gp].set_xlim(0,100)
ax[number_gp].invert_yaxis()
ax[number_gp].text(1.09, -0.04, '(%)', fontsize=10, transform = ax[number_gp].transAxes)   
ax[number_gp].tick_params(axis='y', labelsize = 14)
plt.show()
df['Dement']=df['Dement'].fillna(0)
df['Dement']=df['Dement'].aType('int64'))
df.info()
范围索引:999个条目,0到998
数据列(共4列):
成员ID 999非空对象
年龄999非空int64
Dement 999非空int64
性别999非空int64
数据类型:int64(3),对象(1)
内存使用率:31.3+KB
freq=((df_gp.Age.value_counts(normalize=True).reset_index().排序_值(by='index').Age)*100).tolist()
数字\u gp=7
def ax_设置(ax、var_名称、x_最小值、x_最大值):
最大设定值(最小x,最大x)
ax.set_-yticks([])
ax.脊椎['left'].set_可见(假)
ax.spines['右'].set_可见(假)
ax.spines['top'].set_可见(假)
斧形刺['bottom'].set_edgecolor('#4444'))
ax.脊椎['bottom'].设置线宽(2)
ax.text(0.02,0.05,变量名称,fontsize=17,fontwweight=“bold”,transform=ax.transAxes)
一无所获
图=plt.图(图尺寸=(12,7))
gs=gridspec.gridspec(nrows=number\u gp,
ncols=2,
图=图,
宽度比=[3,1],
高度比=[1]*数量比,
wspace=0.2,hspace=0.05
)
ax=[None]*(数字\u gp+1)
特征=['0-17','18-25','26-35','36-45','46-50','51-55','55+']
对于范围内的i(编号\总成):
ax[i]=图add_子图(gs[i,0])
ax_设置(ax[i],“年龄:”+str(特征[i]),-100020000)
sns.kdeplot(数据=df_gp[(df_gp.GENDER='M')和(df_gp.Age==features[i])。Dement,ax=ax[i],shade=True,color=“blue”,bw=300,legend=False)
sns.kdeplot(数据=df_gp[(df_gp.GENDER='F')和(df_gp.Age==features[i])。Dement,ax=ax[i],shade=True,color=“red”,bw=300,legend=False)
如果i<(数字\u gp-1):ax[i]。设置\u xticks([])
ax[0]。图例(['Male','Female'],facecolor='w')
ax[编号\u gp]=图添加\u子批次(gs[:,1])
ax[number_gp]。脊椎['right']。设置_可见(False)
ax[number_gp]。脊椎['top']。设置_可见(False)
ax[数字总成].barh(特征、频率、颜色='#004c99',高度=0.4)
ax[编号\总成].设置\ xlim(0100)
ax[编号\总成]。反转\亚克斯()
ax[number\u gp].文本(1.09,-0.04,(%)',fontsize=10,transform=ax[number\u gp].传输)
ax[编号总成].勾选参数(轴=y',标签大小=14)
plt.show()
然后,我会见了:

    C:\Users\jk2588\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py:1167: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
    result = method(y)
--------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last
<ipython-input-38-8665030edb1c> in <module>()
 24     ax[i] = fig.add_subplot(gs[i, 0])
 25     ax_settings(ax[i], 'Age: ' + str(features[i]), -1000, 20000)
---> 26     sns.kdeplot(data=df_gp[(df_gp.GENDER == 'M') & (df_gp.Age == features[i])].Dement, ax=ax[i], shade=True, color="blue",  bw=300, legend=False)
27     sns.kdeplot(data=df_gp[(df_gp.GENDER == 'F') & (df_gp.Age == features[i])].Dement, ax=ax[i], shade=True, color="red",  bw=300, legend=False)
28     if i < (number_gp - 1): ax[i].set_xticks([])

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in wrapper(self, other, axis)
1281 
1282             with np.errstate(all='ignore'):
-> 1283                 res = na_op(values, other)
1284             if is_scalar(res):
1285                 raise TypeError('Could not compare {typ} type with Series'

~\AppData\Local\Continuum\anaconda3\lib\site-packages\pandas\core\ops.py in na_op(x, y)
1167                     result = method(y)
1168                 if result is NotImplemented:
-> 1169                     raise TypeError("invalid type comparison")
1170             else:
1171                 result = op(x, y)

TypeError: invalid type comparison
C:\Users\jk2588\AppData\Local\Continuum\anaconda3\lib\site packages\pandas\core\ops.py:1167:FutureWarning:elementwise比较失败;而是返回标量,但将来将执行元素级比较
结果=方法(y)
--------------------------------------------------------------------------
TypeError回溯(最近一次调用上次
在()
24 ax[i]=图add_子批次(gs[i,0])
25 ax_设置(ax[i],“年龄:”+str(特征[i]),-100020000)
--->26 sns.kdeplot(数据=df_gp[(df_gp.GENDER='M')和(df_gp.Age==features[i])。Dement,ax=ax[i],shade=True,color=“blue”,bw=300,legend=False)
27 sns.kdeplot(数据=df_gp[(df_gp.GENDER='F')和(df_gp.Age==features[i])。Dement,ax=ax[i],shade=True,color=“red”,bw=300,legend=False)
28如果i<(数字\u gp-1):ax[i]。设置\u xticks([])
包装器中的~\AppData\Local\Continuum\anaconda3\lib\site packages\pandas\core\ops.py(self、other、axis)
1281
1282,带有np.errstate(all='ignore'):
->1283 res=na_op(值,其他)
1284如果是标量(res):
1285 raise TypeError('无法将{typ}类型与序列进行比较'
na_op(x,y)中的~\AppData\Local\Continuum\anaconda3\lib\site packages\pandas\core\ops.py
1167结果=方法(y)
1168如果未执行结果:
->1169 raise TypeError(“无效类型比较”)
1170其他:
1171结果=op(x,y)
TypeError:类型比较无效

请帮帮我,我这周遇到了大量荒谬的错误

如果不知道数据是什么样子,很难说。也许
'age'
列是一个数字,你正在将它与
功能中的字符串进行比较
?刚刚用df.head()和df.info()的输出进行了更新.我知道'Dement'列中有一些NaN,但这就是为什么我试图用.fillna()将'Dement'转换为零,然后用.astype()将其更改为'int64'。但我仍然看到无效的Comparison TypeError。如果您需要更多信息,请告诉我。
age
似乎是一个数字,
features
只包含字符串。这不起作用。您需要更复杂的工具来测试年龄和范围。或者为每个人创建一个额外的列来存储功能“?如果不知道数据是什么样子,很难说。可能
'age'
列是一个数字,您正在将它与
功能中的字符串进行比较。刚刚使用df.head()和df.info()的输出进行了更新.我知道'Dement'列中有一些NaN,但这就是为什么我试图用.fillna()将'Dement'转换为零,然后用.astype()将其更改为'int64'。但我仍然看到无效的Comparison TypeError。如果您需要更多信息,请告诉我。
age
似乎是一个数字,
features
只包含字符串。这不起作用。您需要更复杂的工具来测试年龄和范围。或者为每个人创建一个额外的列来存储特征'?