Python 如果dataframe中有字符串列,为什么得到NaN列

Python 如果dataframe中有字符串列,为什么得到NaN列,python,pandas,Python,Pandas,代码 如果我设置统计列emtry 它是工作价格栏可以输出值 输出 import gspread from oauth2client.service_account import ServiceAccountCredentials from gspread_dataframe import get_as_dataframe, set_with_dataframe scope = ["https://spreadsheets.google.com/feeds",'https

代码

如果我设置统计列emtry 它是工作价格栏可以输出值

输出

import gspread
from oauth2client.service_account import ServiceAccountCredentials
from gspread_dataframe import get_as_dataframe, set_with_dataframe

scope = ["https://spreadsheets.google.com/feeds",'https://www.googleapis.com/auth/spreadsheets',"https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("API.json", scope)
gc = gspread.authorize(creds)

wsMap = gc.open("Data").worksheet('Test')
dfMap = get_as_dataframe(wsMap)

def check():
    for i, row in dfMap.iterrows():
        row['price'] = i
        print(row['price'])
        if i == 5:
            print(dfMap)
            dfMapp = dfMap.loc[:, ~dfMap.columns.str.contains('^Unnamed')]  
            set_with_dataframe(gc.open("Data").worksheet('Test'), dfMapp) 
            return

check()
0.0
1.0
2.0
3.0
4.0
5.0
     Number  price  Stat  ... 
0     0.190    0.0   NaN  ...  
1     0.195    1.0   NaN  ...  
2     0.200    2.0   NaN  ...   
3     0.205    3.0   NaN  ...  
4     0.210    4.0   NaN  ...   
0.0
1.0
2.0
3.0
4.0
5.0
     Number  price  Stat  ... 
0     0.190    0.0   9.0  ...        
1     0.195    1.0   9.0  ...        
2     0.200    2.0   9.0  ...         
3     0.205    3.0   9.0  ...     
4     0.210    4.0   9.0  ...   
0
1
2
3
4
5
     Number  price Stat  ...
0     0.190    NaN  buy  ...  
1     0.195    NaN  buy  ... 
2     0.200    NaN  buy  ... 
3     0.205    NaN  buy  ...  
4     0.210    NaN  buy  ...     
或者如果统计列为int,则浮动值价格列仍可以输出

输出

import gspread
from oauth2client.service_account import ServiceAccountCredentials
from gspread_dataframe import get_as_dataframe, set_with_dataframe

scope = ["https://spreadsheets.google.com/feeds",'https://www.googleapis.com/auth/spreadsheets',"https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("API.json", scope)
gc = gspread.authorize(creds)

wsMap = gc.open("Data").worksheet('Test')
dfMap = get_as_dataframe(wsMap)

def check():
    for i, row in dfMap.iterrows():
        row['price'] = i
        print(row['price'])
        if i == 5:
            print(dfMap)
            dfMapp = dfMap.loc[:, ~dfMap.columns.str.contains('^Unnamed')]  
            set_with_dataframe(gc.open("Data").worksheet('Test'), dfMapp) 
            return

check()
0.0
1.0
2.0
3.0
4.0
5.0
     Number  price  Stat  ... 
0     0.190    0.0   NaN  ...  
1     0.195    1.0   NaN  ...  
2     0.200    2.0   NaN  ...   
3     0.205    3.0   NaN  ...  
4     0.210    4.0   NaN  ...   
0.0
1.0
2.0
3.0
4.0
5.0
     Number  price  Stat  ... 
0     0.190    0.0   9.0  ...        
1     0.195    1.0   9.0  ...        
2     0.200    2.0   9.0  ...         
3     0.205    3.0   9.0  ...     
4     0.210    4.0   9.0  ...   
0
1
2
3
4
5
     Number  price Stat  ...
0     0.190    NaN  buy  ...  
1     0.195    NaN  buy  ... 
2     0.200    NaN  buy  ... 
3     0.205    NaN  buy  ...  
4     0.210    NaN  buy  ...     
但如果我设置统计列的srting值为'buy'或数字与字母混合,如'x1' 价格列不能按返回值输出值

输出

import gspread
from oauth2client.service_account import ServiceAccountCredentials
from gspread_dataframe import get_as_dataframe, set_with_dataframe

scope = ["https://spreadsheets.google.com/feeds",'https://www.googleapis.com/auth/spreadsheets',"https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("API.json", scope)
gc = gspread.authorize(creds)

wsMap = gc.open("Data").worksheet('Test')
dfMap = get_as_dataframe(wsMap)

def check():
    for i, row in dfMap.iterrows():
        row['price'] = i
        print(row['price'])
        if i == 5:
            print(dfMap)
            dfMapp = dfMap.loc[:, ~dfMap.columns.str.contains('^Unnamed')]  
            set_with_dataframe(gc.open("Data").worksheet('Test'), dfMapp) 
            return

check()
0.0
1.0
2.0
3.0
4.0
5.0
     Number  price  Stat  ... 
0     0.190    0.0   NaN  ...  
1     0.195    1.0   NaN  ...  
2     0.200    2.0   NaN  ...   
3     0.205    3.0   NaN  ...  
4     0.210    4.0   NaN  ...   
0.0
1.0
2.0
3.0
4.0
5.0
     Number  price  Stat  ... 
0     0.190    0.0   9.0  ...        
1     0.195    1.0   9.0  ...        
2     0.200    2.0   9.0  ...         
3     0.205    3.0   9.0  ...     
4     0.210    4.0   9.0  ...   
0
1
2
3
4
5
     Number  price Stat  ...
0     0.190    NaN  buy  ...  
1     0.195    NaN  buy  ... 
2     0.200    NaN  buy  ... 
3     0.205    NaN  buy  ...  
4     0.210    NaN  buy  ...     
输出

import gspread
from oauth2client.service_account import ServiceAccountCredentials
from gspread_dataframe import get_as_dataframe, set_with_dataframe

scope = ["https://spreadsheets.google.com/feeds",'https://www.googleapis.com/auth/spreadsheets',"https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
creds = ServiceAccountCredentials.from_json_keyfile_name("API.json", scope)
gc = gspread.authorize(creds)

wsMap = gc.open("Data").worksheet('Test')
dfMap = get_as_dataframe(wsMap)

def check():
    for i, row in dfMap.iterrows():
        row['price'] = i
        print(row['price'])
        if i == 5:
            print(dfMap)
            dfMapp = dfMap.loc[:, ~dfMap.columns.str.contains('^Unnamed')]  
            set_with_dataframe(gc.open("Data").worksheet('Test'), dfMapp) 
            return

check()
0.0
1.0
2.0
3.0
4.0
5.0
     Number  price  Stat  ... 
0     0.190    0.0   NaN  ...  
1     0.195    1.0   NaN  ...  
2     0.200    2.0   NaN  ...   
3     0.205    3.0   NaN  ...  
4     0.210    4.0   NaN  ...   
0.0
1.0
2.0
3.0
4.0
5.0
     Number  price  Stat  ... 
0     0.190    0.0   9.0  ...        
1     0.195    1.0   9.0  ...        
2     0.200    2.0   9.0  ...         
3     0.205    3.0   9.0  ...     
4     0.210    4.0   9.0  ...   
0
1
2
3
4
5
     Number  price Stat  ...
0     0.190    NaN  buy  ...  
1     0.195    NaN  buy  ... 
2     0.200    NaN  buy  ... 
3     0.205    NaN  buy  ...  
4     0.210    NaN  buy  ...     

如何修复此错误?我想要价格列输出0 2 3 4,如果统计列是字符串值

您的问题很长,但我认为这一点是直截了当的。创建数据帧后

  • 删除名为Unnamed的列。我们使用了一个列表来理解这一点
  • 删除不相关的行(
    dropna(how=“all”)
  • 现在,无论我在统计中随机放置什么,价格仍然是浮动的。要获得工作表中的活动数据,所有的清理代码都过于复杂

    0
    1
    2
    3
    4
    5
         Number  price Stat  ...  
    0     0.190    NaN  xi1  ...          
    1     0.195    NaN  gc7  ...  
    2     0.200    NaN  ys0  ...       
    3     0.205    NaN    9  ...  
    4     0.210    NaN    9  ...
    
    输出

    import gspread
    from oauth2client.service_account import ServiceAccountCredentials
    from oauth2client.file import Storage
    from gspread_dataframe import get_as_dataframe, set_with_dataframe
    from pathlib import Path
    import random
    
    # different oauth setup
    f = Path("~/.credentials//sheets.googleapis.com-python-quickstart.json").expanduser()
    gc = gspread.authorize(Storage(str(f)).get())
    
    # open google sheet and read as data frame
    df = get_as_dataframe(gc.open("Data").worksheet("Test"))
    
    # cleanup the dataframe,  remove columns that don't have names and rows that are full of NaNs
    df = df.drop(columns=[c for c in df.columns if "Unnamed" in c]).dropna(how="all")
    
    # set Stat to string values...
    df = df.assign(Stat=[["buy","sell",1,np.nan][random.randint(0,3)] for x in df["Stat"].values])
    
    print(f"{df.dtypes}\n\n{df.to_string(index=False)}")