Python 熊猫数据帧查询

Python 熊猫数据帧查询,python,pandas,Python,Pandas,我喜欢根据列名及其最小值和最大值检索数据。我不知道如何得到那个结果。我能够根据列名获取数据,但不知道如何应用限制 列表和元组中给出的列名和相应的最小值和最大值 import pandas as pd import numpy as np def c_cutoff(data_frame, column_cutoff): selected_data = data_frame.loc[:, [X[0] for X in column_cutoff]] return selecte

我喜欢根据列名及其最小值和最大值检索数据。我不知道如何得到那个结果。我能够根据列名获取数据,但不知道如何应用限制

列表和元组中给出的列名和相应的最小值和最大值

import pandas as pd
import numpy as np

def c_cutoff(data_frame, column_cutoff):

    selected_data = data_frame.loc[:, [X[0] for X in column_cutoff]]

    return selected_data


np.random.seed(5)
df = pd.DataFrame(np.random.randint(100, size=(100, 6)),
                  columns=list('ABCDEF'),
                  index=['R{}'.format(i) for i in range(100)])

column_cutoffdata = [('B',27,78),('E',44,73)]

newdata_cutoff = c_cutoff(df,column_cutoffdata)
print(df.head())


print(newdata_cutoff)
结果

    B   E
R0   78  73
R1   27   7
R2   53  44
R3   65  84
R4    9   1
..
.
预期产量
我希望所有小于27和大于78的值都应该被丢弃,对于E也一样,您可以非常明确地执行以下操作:

lim = [('B',27,78),('E',44,73)]

for lim in limiters:
    df = df[(df[lim[0]]>=lim[1]) & (df[lim[0]]<=lim[2])]
+ + 不能丢弃数组中的值;这将涉及重塑数组,数据帧的列必须具有相同的大小

但是您可以迭代并使用
pd.Series.where
将超出范围的值替换为
NaN
。请注意,通过函数向数据帧馈送数据的方法是通过
管道

import pandas as pd
import numpy as np

def c_cutoff(data_frame, column_cutoff):
    for col, min_val, max_val in column_cutoffdata:
        data_frame[col] = data_frame[col].where(data_frame[col].between(min_val, max_val))
    return data_frame

np.random.seed(5)
df = pd.DataFrame(np.random.randint(100, size=(100, 6)),
                  columns=list('ABCDEF'),
                  index=['R{}'.format(i) for i in range(100)])

column_cutoffdata = [('B',27,78),('E',44,73)]

print(df.head())

#      A   B   C   D   E   F
# R0  99  78  61  16  73   8
# R1  62  27  30  80   7  76
# R2  15  53  80  27  44  77
# R3  75  65  47  30  84  86
# R4  18   9  41  62   1  82

newdata_cutoff = df.pipe(c_cutoff, column_cutoffdata)

print(newdata_cutoff.head())

#      A     B   C   D     E   F
# R0  99  78.0  61  16  73.0   8
# R1  62  27.0  30  80   NaN  76
# R2  15  53.0  80  27  44.0  77
# R3  75  65.0  47  30   NaN  86
# R4  18   NaN  41  62   NaN  82
如果要删除具有任何
NaN
值的行,则可以使用:


只有两列,我现在可以解决它了。
import pandas as pd
import numpy as np

def c_cutoff(data_frame, column_cutoff):
    for col, min_val, max_val in column_cutoffdata:
        data_frame[col] = data_frame[col].where(data_frame[col].between(min_val, max_val))
    return data_frame

np.random.seed(5)
df = pd.DataFrame(np.random.randint(100, size=(100, 6)),
                  columns=list('ABCDEF'),
                  index=['R{}'.format(i) for i in range(100)])

column_cutoffdata = [('B',27,78),('E',44,73)]

print(df.head())

#      A   B   C   D   E   F
# R0  99  78  61  16  73   8
# R1  62  27  30  80   7  76
# R2  15  53  80  27  44  77
# R3  75  65  47  30  84  86
# R4  18   9  41  62   1  82

newdata_cutoff = df.pipe(c_cutoff, column_cutoffdata)

print(newdata_cutoff.head())

#      A     B   C   D     E   F
# R0  99  78.0  61  16  73.0   8
# R1  62  27.0  30  80   NaN  76
# R2  15  53.0  80  27  44.0  77
# R3  75  65.0  47  30   NaN  86
# R4  18   NaN  41  62   NaN  82
newdata_cutoff = newdata_cutoff.dropna()