Csv Python-从.dat文件中筛选列并从其他列返回给定值

Csv Python-从.dat文件中筛选列并从其他列返回给定值,csv,pandas,numpy,Csv,Pandas,Numpy,我是Python新手,一直在使用示例数据进行练习。我创建了150行学生ID号、年级、年龄、班级代码、地区代码等。我尝试对数据做的不仅仅是按年级、年龄等按特定列过滤,但是,还要创建一个与该行学生ID不同的列的列表。我已经设法找到了如何隔离该列,我需要通过该列找到特定的值,但无法找到如何创建需要返回的值的列表 下面是5行数据的示例: 1/A/15/13/43214 2/I/15/21/58322 3/C/17/89/68470 4/I/18/6/57362 5/I/14/4/00000 6/A/16





import numpy

data = numpy.genfromtxt('/testdata.dat', delimiter='/', dtype='unicode')

grades = data[:,1]
print (grades)

['A' 'I' 'C' 'I' 'I' 'A']


list from A = [1, 6]
list from C = [3]
list from I = [2, 4, 5]



import numpy as np

grades = np.genfromtxt('data.txt', delimiter='/', skip_header=0, dtype='unicode')

res = {}
for grade in set(grades[:, 1].tolist()):
    res[grade] = grades[grades[:, 1]==grade][:,0].tolist()

print res


import numpy as np

grades = np.genfromtxt('data.txt', delimiter='/', skip_header=0, dtype='unicode')

res = {}
for grade in set(grades[:, 1].tolist()):
    res[grade] = grades[grades[:, 1]==grade][:,0].tolist()

print res

import pandas as pd

df = pd.read_csv('data.txt', header=None, sep='/')
dfs = {k:v for k,v in df.groupby(1)}

In [59]: dfs.keys()
Out[59]: dict_keys(['I', 'C', 'A'])

In [60]: dfs['I']
   0  1   2   3      4
1  2  I  15  21  58322
3  4  I  18   6  57362
4  5  I  14   4      0

In [61]: dfs['C']
   0  1   2   3      4
2  3  C  17  89  68470

In [62]: dfs['A']
   0  1   2   3      4
0  1  A  15  13  43214
5  6  A  16  23  34567

In [67]: dfs['I'].iloc[:, 0].tolist()
Out[67]: [2, 4, 5]

In [68]: dfs['C'].iloc[:, 0].tolist()
Out[68]: [3]

In [69]: dfs['A'].iloc[:, 0].tolist()
Out[69]: [1, 6]

import pandas as pd

df = pd.read_csv('data.txt', header=None, sep='/')
dfs = {k:v for k,v in df.groupby(1)}

In [59]: dfs.keys()
Out[59]: dict_keys(['I', 'C', 'A'])

In [60]: dfs['I']
   0  1   2   3      4
1  2  I  15  21  58322
3  4  I  18   6  57362
4  5  I  14   4      0

In [61]: dfs['C']
   0  1   2   3      4
2  3  C  17  89  68470

In [62]: dfs['A']
   0  1   2   3      4
0  1  A  15  13  43214
5  6  A  16  23  34567

In [67]: dfs['I'].iloc[:, 0].tolist()
Out[67]: [2, 4, 5]

In [68]: dfs['C'].iloc[:, 0].tolist()
Out[68]: [3]

In [69]: dfs['A'].iloc[:, 0].tolist()
Out[69]: [1, 6]


students = {}  # store for our students by grade
with open("testdata.dat", "r") as f:  # open the file
    for line in f:  # read the file line by line
        row = line.strip().split("/")  # split the line into individual columns
        # you can now directly filter your row, or you can store the row in a list for later
        # let's split them by grade:
        grade = row[1]  # second column of our row is the grade
        # create/append the sublist in our `students` dict keyed by the grade
        students[grade] = students.get(grade, []) + [row]
# now your students dict contains all students split by grade, e.g.:
a_students = students["A"]
# [['1', 'A', '15', '13', '43214'], ['6', 'A', '16', '23', '34567']]

# if you want only to collect the A-grade student IDs, you can get a list of them as:
student_ids = [entry[0] for entry in students["A"]]
# ['1', '6']

# define a filter function
# filters should contain a list of filters whereas a filter would be defined as:
# [position, [values]]
# and you can define as many as you want
def filter_sublists(source, filters=None):
    result = []  # store for our result
    filters = filters or []  # in case no filter is returned
    for element in source:  # go through every element of our source data
            if all(element[f[0]] in f[1] for f in filters):  # check if all our filters match
                result.append(element)  # add the element
        except IndexError:  # invalid filter position or data position, ignore
    return result  # return the result

# now we can use it to filter our data, first lets load our data:

with open("testdata.dat", "r") as f:  # open the file
    students = [line.strip().split("/") for line in f]  # store all our students as a list

# now we have all the data in the `students` list and we can filter it by any element
a_students = filter_sublists(students, [[1, ["A"]]])
# [['1', 'A', '15', '13', '43214'], ['6', 'A', '16', '23', '34567']]

# or again, if you just need the IDs:
a_student_ids = [entry[0] for entry in filter_sublists(students, [[1, ["A"]]])]
# ['1', '6']

# but you can filter by any parameter, for example:
age_15_students = filter_sublists(students, [[2, ["15"]]])
# [['1', 'A', '15', '13', '43214'], ['2', 'I', '15', '21', '58322']]

# or you can get all I-grade students aged 14 or 15:
i_students = filter_sublists(students, [[1, ["I"]], [2, ["14", "15"]]])
# [['2', 'I', '15', '21', '58322'], ['5', 'I', '14', '4', '00000']]


students = {}  # store for our students by grade
with open("testdata.dat", "r") as f:  # open the file
    for line in f:  # read the file line by line
        row = line.strip().split("/")  # split the line into individual columns
        # you can now directly filter your row, or you can store the row in a list for later
        # let's split them by grade:
        grade = row[1]  # second column of our row is the grade
        # create/append the sublist in our `students` dict keyed by the grade
        students[grade] = students.get(grade, []) + [row]
# now your students dict contains all students split by grade, e.g.:
a_students = students["A"]
# [['1', 'A', '15', '13', '43214'], ['6', 'A', '16', '23', '34567']]

# if you want only to collect the A-grade student IDs, you can get a list of them as:
student_ids = [entry[0] for entry in students["A"]]
# ['1', '6']

# define a filter function
# filters should contain a list of filters whereas a filter would be defined as:
# [position, [values]]
# and you can define as many as you want
def filter_sublists(source, filters=None):
    result = []  # store for our result
    filters = filters or []  # in case no filter is returned
    for element in source:  # go through every element of our source data
            if all(element[f[0]] in f[1] for f in filters):  # check if all our filters match
                result.append(element)  # add the element
        except IndexError:  # invalid filter position or data position, ignore
    return result  # return the result

# now we can use it to filter our data, first lets load our data:

with open("testdata.dat", "r") as f:  # open the file
    students = [line.strip().split("/") for line in f]  # store all our students as a list

# now we have all the data in the `students` list and we can filter it by any element
a_students = filter_sublists(students, [[1, ["A"]]])
# [['1', 'A', '15', '13', '43214'], ['6', 'A', '16', '23', '34567']]

# or again, if you just need the IDs:
a_student_ids = [entry[0] for entry in filter_sublists(students, [[1, ["A"]]])]
# ['1', '6']

# but you can filter by any parameter, for example:
age_15_students = filter_sublists(students, [[2, ["15"]]])
# [['1', 'A', '15', '13', '43214'], ['2', 'I', '15', '21', '58322']]

# or you can get all I-grade students aged 14 or 15:
i_students = filter_sublists(students, [[1, ["I"]], [2, ["14", "15"]]])
# [['2', 'I', '15', '21', '58322'], ['5', 'I', '14', '4', '00000']]
