Python 检查熊猫中是否存在行
我想检查dataframe中是否存在一行,下面是我的代码:Python 检查熊猫中是否存在行,python,pandas,Python,Pandas,我想检查dataframe中是否存在一行,下面是我的代码: df = pd.read_csv('dbo.Access_Stat_all.csv',error_bad_lines=False, usecols=['Name','Format','Resource_ID','Number']) df1 = df[df['Resource_ID'] == 30957] df1 = df1[['Format','Name','Number']] df1 = df1.groupby(['Format','
df = pd.read_csv('dbo.Access_Stat_all.csv',error_bad_lines=False, usecols=['Name','Format','Resource_ID','Number'])
df1 = df[df['Resource_ID'] == 30957]
df1 = df1[['Format','Name','Number']]
df1 = df1.groupby(['Format','Name'], as_index=True).last()
pd.options.display.float_format = '{:,.0f}'.format
df1 = df1.unstack()
df1.columns = df1.columns.droplevel()
if 'entry' in df1:
df2 = df1[1:4].sum(axis=0)
else:
df2 = df1[0:3].sum(axis=0)
df2.name = 'sum'
df2 = df1.append(df2)
print(df2)
这是输出:
Name Apr 2013 Apr 2014 Apr 2015 Apr 2016 Apr 2017 Aug 2010 Aug 2013
Format
entry 0 0 0 1 4 1 0
pdf 13 12 4 23 7 1 9
sum 13 12 4 24 11 2 9
df2中的if'entry'是否仅检查'entry'是否作为列存在?我想一定是这样。我们可以看到行“entry”存在,但我们仍然在else条件下着陆(如果已着陆,2016年4月的报表金额为23)
如果我检查了没有“entry”行的文件,它会再次进入else语句(正如我所期望的),所以我假设它总是进入else条件
如何检查pandas中是否存在一行?我认为您需要比较索引值-输出为
True
和False
numpy数组。
对于标量需求any
-检查至少一个True
或all
,检查所有值是否均True
s:
(df.index == 'entry').any()
(df.index == 'entry').all()
另一个解决方案来自以下评论:
如果需要检查子字符串:
df.index.str.contains('en').any()
样本:
df = pd.DataFrame({'Apr 2013':[1,2,3]}, index=['entry','pdf','sum'])
print(df)
Apr 2013
entry 1
pdf 2
sum 3
print (df.index == 'entry')
[ True False False]
print ((df.index == 'entry').any())
True
print ((df.index == 'entry').all())
False
检查数据帧中是否存在行/行的另一种方法是使用df.loc: subDataFrame=dataFrame.loc[dataFrame[columnName]==value] 此代码检查给定行中的每个“值”(用逗号分隔), 如果数据帧中存在行,则返回True/False
# ***** Code for 'Check if a line exists in dataframe' using Pandas *****
# Checks if value can be converted to a number
# Return: True/False
def isfloat(value):
try:
float(value)
return True
except:
return False
# Example:
# list1 = ['D','C','B','A']
# list2 = ['OK','Good','82','Great']
# mergedList = [['D','OK'],['C','Good'],['B',82],['A','Great']
def getMergedListFromTwoLists(list1, list2):
mergedList = []
numOfColumns = min(len(list1), len(list2))
for col in range(0, numOfColumns):
val1 = list1[col]
val2 = list2[col]
# In the dataframe value stored as a number
if isfloat(val2):
val2 = float(val2)
mergedList.append([val1, val2])
return mergedList
# Returns only rows that have valuesAsArray[1] in the valuesAsArray[0]
# Example: valuesAsArray = ['Symbol','AAPL'], returns rows with 'AAPL'
def getSubDataFrame(dataFrame, valuesAsArray):
subDataFrame = dataFrame.loc[dataFrame[valuesAsArray[0]] == valuesAsArray[1]]
return subDataFrame
def createDataFrameAsExample():
import pandas as pd
data = {
'MarketCenter': ['T', 'T', 'T', 'T'],
'Symbol': ['AAPL', 'FB', 'AAPL', 'FB'],
'Date': [20190101, 20190102, 20190201, 20190301],
'Time': ['08:00:00', '08:00:00', '09:00:00', '09:00:00'],
'ShortType': ['S', 'S', 'S', 'S'],
'Size': [10, 10, 20, 30],
'Price': [100, 100, 300, 200]
}
dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']
df = pd.DataFrame(data, columns=dfHeadLineAsArray)
return df
def adapterCheckIfLineExistsInDataFrame(originalDataFrame, headlineAsArray, line):
dfHeadLineAsArray = headlineAsArray
# Line example: 'T,AAPL,20190101,08:00:00,S,10,100'
lineAsArray = line.split(',')
valuesAsArray = getMergedListFromTwoLists(dfHeadLineAsArray, lineAsArray)
return checkIfLineExistsInDataFrame(originalDataFrame, valuesAsArray)
def checkIfLineExistsInDataFrame(originalDataFrame, valuesAsArray):
if not originalDataFrame.empty:
subDateFrame = originalDataFrame
for value in valuesAsArray:
if subDateFrame.empty:
return False
subDateFrame = getSubDataFrame(subDateFrame, value)
if subDateFrame.empty:
False
else:
return True
return False
def testExample():
dataFrame = createDataFrameAsExample()
dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']
# Three made up lines (not in df)
lineToCheck1 = 'T,FB,20190102,13:00:00,S,10,100'
lineToCheck2 = 'T,FB,20190102,08:00:00,S,60,100'
lineToCheck3 = 'T,FB,20190102,08:00:00,S,10,150'
# This line exists in the dataframe
lineToCheck4 = 'T,FB,20190102,08:00:00,S,10,100'
lineExists1 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck1)
lineExists2 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck2)
lineExists3 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck3)
lineExists4 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck4)
expected = 'False False False True'
print('Expected:',expected)
print('Method:',lineExists1,lineExists2,lineExists3,lineExists4)
testExample()
有一个简短的示例,使用Stocks作为数据帧
# ***** Code for 'Check if a line exists in dataframe' using Pandas *****
# Checks if value can be converted to a number
# Return: True/False
def isfloat(value):
try:
float(value)
return True
except:
return False
# Example:
# list1 = ['D','C','B','A']
# list2 = ['OK','Good','82','Great']
# mergedList = [['D','OK'],['C','Good'],['B',82],['A','Great']
def getMergedListFromTwoLists(list1, list2):
mergedList = []
numOfColumns = min(len(list1), len(list2))
for col in range(0, numOfColumns):
val1 = list1[col]
val2 = list2[col]
# In the dataframe value stored as a number
if isfloat(val2):
val2 = float(val2)
mergedList.append([val1, val2])
return mergedList
# Returns only rows that have valuesAsArray[1] in the valuesAsArray[0]
# Example: valuesAsArray = ['Symbol','AAPL'], returns rows with 'AAPL'
def getSubDataFrame(dataFrame, valuesAsArray):
subDataFrame = dataFrame.loc[dataFrame[valuesAsArray[0]] == valuesAsArray[1]]
return subDataFrame
def createDataFrameAsExample():
import pandas as pd
data = {
'MarketCenter': ['T', 'T', 'T', 'T'],
'Symbol': ['AAPL', 'FB', 'AAPL', 'FB'],
'Date': [20190101, 20190102, 20190201, 20190301],
'Time': ['08:00:00', '08:00:00', '09:00:00', '09:00:00'],
'ShortType': ['S', 'S', 'S', 'S'],
'Size': [10, 10, 20, 30],
'Price': [100, 100, 300, 200]
}
dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']
df = pd.DataFrame(data, columns=dfHeadLineAsArray)
return df
def adapterCheckIfLineExistsInDataFrame(originalDataFrame, headlineAsArray, line):
dfHeadLineAsArray = headlineAsArray
# Line example: 'T,AAPL,20190101,08:00:00,S,10,100'
lineAsArray = line.split(',')
valuesAsArray = getMergedListFromTwoLists(dfHeadLineAsArray, lineAsArray)
return checkIfLineExistsInDataFrame(originalDataFrame, valuesAsArray)
def checkIfLineExistsInDataFrame(originalDataFrame, valuesAsArray):
if not originalDataFrame.empty:
subDateFrame = originalDataFrame
for value in valuesAsArray:
if subDateFrame.empty:
return False
subDateFrame = getSubDataFrame(subDateFrame, value)
if subDateFrame.empty:
False
else:
return True
return False
def testExample():
dataFrame = createDataFrameAsExample()
dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']
# Three made up lines (not in df)
lineToCheck1 = 'T,FB,20190102,13:00:00,S,10,100'
lineToCheck2 = 'T,FB,20190102,08:00:00,S,60,100'
lineToCheck3 = 'T,FB,20190102,08:00:00,S,10,150'
# This line exists in the dataframe
lineToCheck4 = 'T,FB,20190102,08:00:00,S,10,100'
lineExists1 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck1)
lineExists2 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck2)
lineExists3 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck3)
lineExists4 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck4)
expected = 'False False False True'
print('Expected:',expected)
print('Method:',lineExists1,lineExists2,lineExists3,lineExists4)
testExample()
单击以查看数据帧
在df.index中的'entry'
或df.index.contains('entry')
?谢谢,这是我需要做的。这是我认为你如何回答一个问题(附示例)+1@AntonvBR-谢谢。不过有一件小事,你“忽略”的另一个有趣的事实是熊猫是建立在numpy的基础上的,如果你可以这么说的话,这就是为什么我们可以进行这些比较,例如。**df.index==“entry”**这实际上是一个numpy.ndarray.@AntonvBR-但是如何才能最好地写出我的答案呢?如果有时间,你能用这些信息编辑我的答案吗?(我的英语不太好)。谢谢。让我们保持原样吧!回头见。考虑到你想用你的例子来说明一个比较孤立的方面,它并不是很短。
# ***** Code for 'Check if a line exists in dataframe' using Pandas *****
# Checks if value can be converted to a number
# Return: True/False
def isfloat(value):
try:
float(value)
return True
except:
return False
# Example:
# list1 = ['D','C','B','A']
# list2 = ['OK','Good','82','Great']
# mergedList = [['D','OK'],['C','Good'],['B',82],['A','Great']
def getMergedListFromTwoLists(list1, list2):
mergedList = []
numOfColumns = min(len(list1), len(list2))
for col in range(0, numOfColumns):
val1 = list1[col]
val2 = list2[col]
# In the dataframe value stored as a number
if isfloat(val2):
val2 = float(val2)
mergedList.append([val1, val2])
return mergedList
# Returns only rows that have valuesAsArray[1] in the valuesAsArray[0]
# Example: valuesAsArray = ['Symbol','AAPL'], returns rows with 'AAPL'
def getSubDataFrame(dataFrame, valuesAsArray):
subDataFrame = dataFrame.loc[dataFrame[valuesAsArray[0]] == valuesAsArray[1]]
return subDataFrame
def createDataFrameAsExample():
import pandas as pd
data = {
'MarketCenter': ['T', 'T', 'T', 'T'],
'Symbol': ['AAPL', 'FB', 'AAPL', 'FB'],
'Date': [20190101, 20190102, 20190201, 20190301],
'Time': ['08:00:00', '08:00:00', '09:00:00', '09:00:00'],
'ShortType': ['S', 'S', 'S', 'S'],
'Size': [10, 10, 20, 30],
'Price': [100, 100, 300, 200]
}
dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']
df = pd.DataFrame(data, columns=dfHeadLineAsArray)
return df
def adapterCheckIfLineExistsInDataFrame(originalDataFrame, headlineAsArray, line):
dfHeadLineAsArray = headlineAsArray
# Line example: 'T,AAPL,20190101,08:00:00,S,10,100'
lineAsArray = line.split(',')
valuesAsArray = getMergedListFromTwoLists(dfHeadLineAsArray, lineAsArray)
return checkIfLineExistsInDataFrame(originalDataFrame, valuesAsArray)
def checkIfLineExistsInDataFrame(originalDataFrame, valuesAsArray):
if not originalDataFrame.empty:
subDateFrame = originalDataFrame
for value in valuesAsArray:
if subDateFrame.empty:
return False
subDateFrame = getSubDataFrame(subDateFrame, value)
if subDateFrame.empty:
False
else:
return True
return False
def testExample():
dataFrame = createDataFrameAsExample()
dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']
# Three made up lines (not in df)
lineToCheck1 = 'T,FB,20190102,13:00:00,S,10,100'
lineToCheck2 = 'T,FB,20190102,08:00:00,S,60,100'
lineToCheck3 = 'T,FB,20190102,08:00:00,S,10,150'
# This line exists in the dataframe
lineToCheck4 = 'T,FB,20190102,08:00:00,S,10,100'
lineExists1 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck1)
lineExists2 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck2)
lineExists3 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck3)
lineExists4 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck4)
expected = 'False False False True'
print('Expected:',expected)
print('Method:',lineExists1,lineExists2,lineExists3,lineExists4)
testExample()