Python 检查熊猫中是否存在行_Python_Pandas

Python 检查熊猫中是否存在行

python pandas

Python 检查熊猫中是否存在行,python,pandas,Python,Pandas,我想检查dataframe中是否存在一行，下面是我的代码： df = pd.read_csv('dbo.Access_Stat_all.csv',error_bad_lines=False, usecols=['Name','Format','Resource_ID','Number']) df1 = df[df['Resource_ID'] == 30957] df1 = df1[['Format','Name','Number']] df1 = df1.groupby(['Format','

我想检查dataframe中是否存在一行，下面是我的代码：

df = pd.read_csv('dbo.Access_Stat_all.csv',error_bad_lines=False, usecols=['Name','Format','Resource_ID','Number'])
df1 = df[df['Resource_ID'] == 30957]
df1 = df1[['Format','Name','Number']]
df1 = df1.groupby(['Format','Name'], as_index=True).last()
pd.options.display.float_format = '{:,.0f}'.format
df1 = df1.unstack()
df1.columns = df1.columns.droplevel()
if 'entry' in df1:
    df2 = df1[1:4].sum(axis=0)
else:
    df2 = df1[0:3].sum(axis=0)
df2.name = 'sum'
df2 = df1.append(df2)
print(df2)

这是输出：

Name    Apr 2013  Apr 2014  Apr 2015  Apr 2016  Apr 2017  Aug 2010  Aug 2013  
Format                                                                         

entry          0         0         0         1         4         1         0   
pdf           13        12         4        23         7         1         9   
sum           13        12         4        24        11         2         9

df2中的if'entry'是否仅检查'entry'是否作为列存在？我想一定是这样。我们可以看到行“entry”存在，但我们仍然在else条件下着陆（如果已着陆，2016年4月的报表金额为23）

如果我检查了没有“entry”行的文件，它会再次进入else语句（正如我所期望的），所以我假设它总是进入else条件

如何检查pandas中是否存在一行？

我认为您需要比较索引值-输出为

True

和

False

numpy数组。对于标量需求

any

-检查至少一个

True

或

all

，检查所有值是否均

True

s：

(df.index == 'entry').any()

(df.index == 'entry').all()

另一个解决方案来自以下评论：

如果需要检查子字符串：

df.index.str.contains('en').any()

样本：

df = pd.DataFrame({'Apr 2013':[1,2,3]}, index=['entry','pdf','sum'])
print(df)
       Apr 2013
entry         1
pdf           2
sum           3

print (df.index == 'entry')
[ True False False]

print ((df.index == 'entry').any())
True
print ((df.index == 'entry').all())
False

检查数据帧中是否存在行/行的另一种方法是使用df.loc：

subDataFrame=dataFrame.loc[dataFrame[columnName]==value]

此代码检查给定行中的每个“值”（用逗号分隔），如果数据帧中存在行，则返回True/False

# *****     Code for 'Check if a line exists in dataframe' using Pandas     *****

# Checks if value can be converted to a number
# Return: True/False
def isfloat(value):
  try:
    float(value)
    return True
  except:
    return False


# Example:
# list1 = ['D','C','B','A']
# list2 = ['OK','Good','82','Great']
# mergedList = [['D','OK'],['C','Good'],['B',82],['A','Great']
def getMergedListFromTwoLists(list1, list2):
    mergedList = []
    numOfColumns = min(len(list1), len(list2))
    for col in range(0, numOfColumns):
        val1 = list1[col]
        val2 = list2[col]

        # In the dataframe value stored as a number
        if isfloat(val2):
            val2 = float(val2)
        mergedList.append([val1, val2])

    return mergedList


# Returns only rows that have valuesAsArray[1] in the valuesAsArray[0]
# Example: valuesAsArray = ['Symbol','AAPL'], returns rows with 'AAPL'
def getSubDataFrame(dataFrame, valuesAsArray):
    subDataFrame = dataFrame.loc[dataFrame[valuesAsArray[0]] == valuesAsArray[1]]
    return subDataFrame




def createDataFrameAsExample():
    import pandas as pd
    data = {
        'MarketCenter': ['T', 'T', 'T', 'T'],
        'Symbol': ['AAPL', 'FB', 'AAPL', 'FB'],
        'Date': [20190101, 20190102, 20190201, 20190301],
        'Time': ['08:00:00', '08:00:00', '09:00:00', '09:00:00'],
        'ShortType': ['S', 'S', 'S', 'S'],
        'Size': [10, 10, 20, 30],
        'Price': [100, 100, 300, 200]
    }
    dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']
    df = pd.DataFrame(data, columns=dfHeadLineAsArray)
    return df



def adapterCheckIfLineExistsInDataFrame(originalDataFrame, headlineAsArray, line):
    dfHeadLineAsArray = headlineAsArray
    # Line example: 'T,AAPL,20190101,08:00:00,S,10,100'
    lineAsArray = line.split(',')

    valuesAsArray = getMergedListFromTwoLists(dfHeadLineAsArray, lineAsArray)
    return checkIfLineExistsInDataFrame(originalDataFrame, valuesAsArray)



def checkIfLineExistsInDataFrame(originalDataFrame,  valuesAsArray):

    if not originalDataFrame.empty:


        subDateFrame = originalDataFrame
        for value in valuesAsArray:
            if subDateFrame.empty:
                return False
            subDateFrame = getSubDataFrame(subDateFrame, value)

        if subDateFrame.empty:
            False
        else:
            return True
    return False


def testExample():
    dataFrame = createDataFrameAsExample()
    dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']

    # Three made up lines (not in df)
    lineToCheck1 = 'T,FB,20190102,13:00:00,S,10,100'
    lineToCheck2 = 'T,FB,20190102,08:00:00,S,60,100'
    lineToCheck3 = 'T,FB,20190102,08:00:00,S,10,150'

    # This line exists in the dataframe
    lineToCheck4 = 'T,FB,20190102,08:00:00,S,10,100'

    lineExists1 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck1)
    lineExists2 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck2)
    lineExists3 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck3)
    lineExists4 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck4)

    expected = 'False False False True'
    print('Expected:',expected)
    print('Method:',lineExists1,lineExists2,lineExists3,lineExists4)



testExample()

有一个简短的示例，使用Stocks作为数据帧

# *****     Code for 'Check if a line exists in dataframe' using Pandas     *****

# Checks if value can be converted to a number
# Return: True/False
def isfloat(value):
  try:
    float(value)
    return True
  except:
    return False


# Example:
# list1 = ['D','C','B','A']
# list2 = ['OK','Good','82','Great']
# mergedList = [['D','OK'],['C','Good'],['B',82],['A','Great']
def getMergedListFromTwoLists(list1, list2):
    mergedList = []
    numOfColumns = min(len(list1), len(list2))
    for col in range(0, numOfColumns):
        val1 = list1[col]
        val2 = list2[col]

        # In the dataframe value stored as a number
        if isfloat(val2):
            val2 = float(val2)
        mergedList.append([val1, val2])

    return mergedList


# Returns only rows that have valuesAsArray[1] in the valuesAsArray[0]
# Example: valuesAsArray = ['Symbol','AAPL'], returns rows with 'AAPL'
def getSubDataFrame(dataFrame, valuesAsArray):
    subDataFrame = dataFrame.loc[dataFrame[valuesAsArray[0]] == valuesAsArray[1]]
    return subDataFrame




def createDataFrameAsExample():
    import pandas as pd
    data = {
        'MarketCenter': ['T', 'T', 'T', 'T'],
        'Symbol': ['AAPL', 'FB', 'AAPL', 'FB'],
        'Date': [20190101, 20190102, 20190201, 20190301],
        'Time': ['08:00:00', '08:00:00', '09:00:00', '09:00:00'],
        'ShortType': ['S', 'S', 'S', 'S'],
        'Size': [10, 10, 20, 30],
        'Price': [100, 100, 300, 200]
    }
    dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']
    df = pd.DataFrame(data, columns=dfHeadLineAsArray)
    return df



def adapterCheckIfLineExistsInDataFrame(originalDataFrame, headlineAsArray, line):
    dfHeadLineAsArray = headlineAsArray
    # Line example: 'T,AAPL,20190101,08:00:00,S,10,100'
    lineAsArray = line.split(',')

    valuesAsArray = getMergedListFromTwoLists(dfHeadLineAsArray, lineAsArray)
    return checkIfLineExistsInDataFrame(originalDataFrame, valuesAsArray)



def checkIfLineExistsInDataFrame(originalDataFrame,  valuesAsArray):

    if not originalDataFrame.empty:


        subDateFrame = originalDataFrame
        for value in valuesAsArray:
            if subDateFrame.empty:
                return False
            subDateFrame = getSubDataFrame(subDateFrame, value)

        if subDateFrame.empty:
            False
        else:
            return True
    return False


def testExample():
    dataFrame = createDataFrameAsExample()
    dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']

    # Three made up lines (not in df)
    lineToCheck1 = 'T,FB,20190102,13:00:00,S,10,100'
    lineToCheck2 = 'T,FB,20190102,08:00:00,S,60,100'
    lineToCheck3 = 'T,FB,20190102,08:00:00,S,10,150'

    # This line exists in the dataframe
    lineToCheck4 = 'T,FB,20190102,08:00:00,S,10,100'

    lineExists1 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck1)
    lineExists2 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck2)
    lineExists3 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck3)
    lineExists4 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck4)

    expected = 'False False False True'
    print('Expected:',expected)
    print('Method:',lineExists1,lineExists2,lineExists3,lineExists4)



testExample()

单击以查看数据帧

在df.index中的'entry'

或

df.index.contains（'entry'）

？谢谢，这是我需要做的。这是我认为你如何回答一个问题（附示例）+1@AntonvBR-谢谢。不过有一件小事，你“忽略”的另一个有趣的事实是熊猫是建立在numpy的基础上的，如果你可以这么说的话，这就是为什么我们可以进行这些比较，例如。**df.index==“entry”**这实际上是一个numpy.ndarray.@AntonvBR-但是如何才能最好地写出我的答案呢？如果有时间，你能用这些信息编辑我的答案吗？（我的英语不太好）。谢谢。让我们保持原样吧！回头见。考虑到你想用你的例子来说明一个比较孤立的方面，它并不是很短。

# *****     Code for 'Check if a line exists in dataframe' using Pandas     *****

# Checks if value can be converted to a number
# Return: True/False
def isfloat(value):
  try:
    float(value)
    return True
  except:
    return False


# Example:
# list1 = ['D','C','B','A']
# list2 = ['OK','Good','82','Great']
# mergedList = [['D','OK'],['C','Good'],['B',82],['A','Great']
def getMergedListFromTwoLists(list1, list2):
    mergedList = []
    numOfColumns = min(len(list1), len(list2))
    for col in range(0, numOfColumns):
        val1 = list1[col]
        val2 = list2[col]

        # In the dataframe value stored as a number
        if isfloat(val2):
            val2 = float(val2)
        mergedList.append([val1, val2])

    return mergedList


# Returns only rows that have valuesAsArray[1] in the valuesAsArray[0]
# Example: valuesAsArray = ['Symbol','AAPL'], returns rows with 'AAPL'
def getSubDataFrame(dataFrame, valuesAsArray):
    subDataFrame = dataFrame.loc[dataFrame[valuesAsArray[0]] == valuesAsArray[1]]
    return subDataFrame




def createDataFrameAsExample():
    import pandas as pd
    data = {
        'MarketCenter': ['T', 'T', 'T', 'T'],
        'Symbol': ['AAPL', 'FB', 'AAPL', 'FB'],
        'Date': [20190101, 20190102, 20190201, 20190301],
        'Time': ['08:00:00', '08:00:00', '09:00:00', '09:00:00'],
        'ShortType': ['S', 'S', 'S', 'S'],
        'Size': [10, 10, 20, 30],
        'Price': [100, 100, 300, 200]
    }
    dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']
    df = pd.DataFrame(data, columns=dfHeadLineAsArray)
    return df



def adapterCheckIfLineExistsInDataFrame(originalDataFrame, headlineAsArray, line):
    dfHeadLineAsArray = headlineAsArray
    # Line example: 'T,AAPL,20190101,08:00:00,S,10,100'
    lineAsArray = line.split(',')

    valuesAsArray = getMergedListFromTwoLists(dfHeadLineAsArray, lineAsArray)
    return checkIfLineExistsInDataFrame(originalDataFrame, valuesAsArray)



def checkIfLineExistsInDataFrame(originalDataFrame,  valuesAsArray):

    if not originalDataFrame.empty:


        subDateFrame = originalDataFrame
        for value in valuesAsArray:
            if subDateFrame.empty:
                return False
            subDateFrame = getSubDataFrame(subDateFrame, value)

        if subDateFrame.empty:
            False
        else:
            return True
    return False


def testExample():
    dataFrame = createDataFrameAsExample()
    dfHeadLineAsArray = ['MarketCenter', 'Symbol', 'Date', 'Time', 'ShortType', 'Size','Price']

    # Three made up lines (not in df)
    lineToCheck1 = 'T,FB,20190102,13:00:00,S,10,100'
    lineToCheck2 = 'T,FB,20190102,08:00:00,S,60,100'
    lineToCheck3 = 'T,FB,20190102,08:00:00,S,10,150'

    # This line exists in the dataframe
    lineToCheck4 = 'T,FB,20190102,08:00:00,S,10,100'

    lineExists1 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck1)
    lineExists2 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck2)
    lineExists3 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck3)
    lineExists4 = adapterCheckIfLineExistsInDataFrame(dataFrame,dfHeadLineAsArray,lineToCheck4)

    expected = 'False False False True'
    print('Expected:',expected)
    print('Method:',lineExists1,lineExists2,lineExists3,lineExists4)



testExample()