Python 在csv文件中匹配密钥并生成所需输出_Python_Python 2.7_Csv_If Statement_Match

Python 在csv文件中匹配密钥并生成所需输出

python python-2.7 csv if-statement

Python 在csv文件中匹配密钥并生成所需输出,python,python-2.7,csv,if-statement,match,Python,Python 2.7,Csv,If Statement,Match,我一直在尝试操作一些代码来查看第3列中不同的uniqueclassindicator，如果第3列是2，则查看同一uniqueclassindicator中的前一行，以从该行检索数据。此外，只有当第3列和第6列都是1时，才能实现我的输出我一直在使用的代码：示例数据：期望输出：因此，为了澄清由TitleIndicator/Uniqueclassindicator定义的每个组，我希望能够从该组的前两行中提取数据，如果第6列和第3列都等于1 如果有人能告诉我如何修复此代码，将不胜感激。非常感谢

我一直在尝试操作一些代码来查看第3列中不同的

uniqueclassindicator

，如果第3列是

，则查看同一

uniqueclassindicator

中的前一行，以从该行检索数据。此外，只有当第3列和第6列都是

时，才能实现我的输出

我一直在使用的代码：示例数据：期望输出：因此，为了澄清由TitleIndicator/Uniqueclassindicator定义的每个组，我希望能够从该组的前两行中提取数据，如果第6列和第3列都等于1

如果有人能告诉我如何修复此代码，将不胜感激。非常感谢这项工作：

from collections import defaultdict
import csv

# you probably can think up better names
fields = ('TitleA', 'TitleB', 'TitleIndicator', 'TitleRNum', 'TitleC', 'TitleD', 'TitlePNum', 'TitleBF', 'TitleCheck')

entries = defaultdict(dict)

with open("exampledata.csv", 'rb') as fd:
    reader = csv.DictReader(fd, fields)

    for counter, row in enumerate(reader):
        if counter != 0:
            TitleRNum = int(row['TitleRNum'])

            # If this row has a TitlePNum, keep it, otherwise reset to -1
            TitlePNum = -1
            if row['TitlePNum']:
                TitlePNum = int(row['TitlePNum'])

            # If we have already seen a row with the same class 
            # that has 1 at both RNum and PNum,
            # use that to print locally
            if row['TitleIndicator'] in entries:
                previousRow = entries[row['TitleIndicator']]
                currentRow = row

                itemsToPrint = ['TitleB', 'TitleRNum', 'TitlePNum', 'TitleBF', 'TitleCheck']
                output = ""
                for item in itemsToPrint:
                    output += previousRow[item] + ',' + currentRow[item] + ','

                # Finally, strip the last comma and print
                output = output[:-1]
                print output

                # Remove the previous entry from the dict
                del entries[row['TitleIndicator']]


            # If both RNum and PNum are 1, then save this as a candidate for future reference
            if TitleRNum == 1 and TitlePNum == 1:
                entries[row['TitleIndicator']] = row

你在文章的标题中使用了“键”，所以我在这里提供了一个字典解决方案：）哇（！）能够做到这一点感觉很好

from csv import DictReader

# 1) read in the data and store it row-wise in the list 'data'
data, numclasses = [], []
with open("exampledata.csv", 'rb') as fd:
    reader = DictReader(fd)
    for counter, row in enumerate(reader):
      data.append(row)
      numclasses.append(row['TitleIndicator'][-1])
numclasses = len(list(set(numclasses))) # returns unique no. of classes

# 2) group data in a dictionary where each key uniquely corresponds to a class
datagrouped = {"class%s"%(i + 1): [] for i in range(numclasses)}
for row in data:
  classID = row['TitleIndicator'][-1]
  datagrouped["class%s"%classID].append(row)

# 3) go through each class within the dictionary, then go through the data
# within the class (row-wise), and print out rows that meet requirements.
for classname in datagrouped.keys(): # class loop
  uniq_class = datagrouped[classname]
  for i, row in enumerate(uniq_class): # row loop
    if i > 0:
      cond1 = row['TitleRNum'] == '2'
      prev_row = uniq_class[i - 1]
      cond2 = prev_row['TitleRNum'] == '1' and prev_row['TitlePNum'] == '1'
      if cond1 & cond2:
        print ["%s"%x for x in prev_row.itervalues()]
        print ["%s"%x for x in row.itervalues()]

当我在同一个目录中使用

exampledata.csv

运行此命令时，我得到以下输出：

['DataA', 'Mark', '125', '4.634033328', '1', '395.1', '1', 'F', 'uniqueclassindicator4']
['DataA', 'Jason', '125', '6.470141577', '2', '', '9', '', 'uniqueclassindicator4']
['DataA', 'Joe', '125', '3.659819202', '1', '984.2', '1', 'Yes', 'uniqueclassindicator1']
['DataA', 'Bob', '125', '4.212490883', '2', '994.2', '2', 'Yes', 'uniqueclassindicator1']

嗯，比赛已经结束了，但我仍然想给出我的解决方案。以下是详细的、有评论的回答：

# Import "csv.DictReader" and put it in the name "dr".
from csv import DictReader as dr

# These are the columns we will be working with.
cols = "TitleB", "TitleRNum", "TitlePNum", "TitleBF", "TitleCheck"

# This is a variable to hold a previous row for future processing.
# It severs the same purpose as the "entries" dict in Sudipta Chatterjee's answer.
# I set it to 0 simply so its value is false.  You could also set it to "False" or "None".
mark = 0

# Open the CSV file in binary mode.
with open("exampledata.csv", "rb") as f:

    # This loops through what is returned by "DictReader".
    #
    # The expression "f.readline().strip().split(",")" reads the first line of the file,
    # (which is the column names), strips off the newline at the end,
    # and then gets the column names by splitting the line on commas.
    for row in dr(f, f.readline().strip().split(",")):

        # This checks if "mark" is true.
        # If it is, then that means "mark" contains a previous row to be processed.
        if mark:

            # This line takes the row stored in "mark" as well as the current row
            # and puts them together, separating the values with commas using "str.join".
            print ",".join([",".join([mark[c], row[c]]) for c in cols])

        # This is a compact statement equivalent to:
        #
        #    if row["TitlePNum"] == row["TitleRNum"] == "1":
        #        mark = row
        #    else:
        #        mark = 0
        #
        # It sees if the "TitlePNum" and "TitleRNum" columns in the current row are both "1".
        # If so, it saves that row in "mark" for future processing.
        #
        # It is basically the same thing as the
        #
        #    if TitleRNum == 1 and TitlePNum == 1:
        #        entries[row['TitleIndicator']] = row
        #
        # part in Sudipta Chatterjee's answer.
        mark = row if row["TitlePNum"]==row["TitleRNum"]=="1" else 0

下面是答案的正常情况：

from csv import DictReader as dr
cols = "TitleB", "TitleRNum", "TitlePNum", "TitleBF", "TitleCheck"
mark = 0
with open("exampledata.csv", "rb") as f:
    for row in dr(f, f.readline().strip().split(",")):
        if mark: 
            print ",".join([",".join([mark[c], row[c]]) for c in cols])
        mark = row if row["TitlePNum"]==row["TitleRNum"]=="1" else 0

输出：

Joe,Bob,1,2,1,2,984.2,994.2,Yes,Yes
Mark,Jason,1,2,1,9,395.1,,F,

正如您所看到的，我的解决方案更小、效率更高。

等等-因此您可以在

column3

是

的地方使用上一行的数据，但同样，您希望

column3

和

column6

都是

？在同一

uniqueclassindicator

是，不在同一行。不-再解释一遍。您

uniqueClassIndicator

是第2列（如您正在计算第0-1-2列），还是第3列（在计算第1-2-3列时）？因为你们所要求的是相互矛盾的价值观uniqueClassIndicator@SudiptaChatterjee据我所知，他希望这样做，如果

TitleRNum

和

titlenum

都等于

，那么他希望能够从下一行提取数据，只要

TitleRNum

（他的例子使用了previous，我不知道CSV有一个

next

函数）。我想他希望能够扩展这个函数，这样如果他想做三个检查，第一行：（1，1），第二行：（2，2）和第三行：（3，3），那么就需要记住多行…和“previous”变量将在找到新的

uniqueclassindicator

时发生变化。我想我已经准备好了您的解决方案-但在我发布答案之前，请告诉我为什么Tim和Tom也不包括在答案中。他们是同一类，他们是1和2。我希望您不会让+100奖金这么容易过期？：）一点也不，如果它有效，我肯定会奖励它。我只是还没有测试。：）效率更高。嗯，我可以试试游泳。您介意添加一些注释来解释它是如何工作的吗？赏金可能已经没有了，但我仍然可以给它一个可接受的答案。@SMNALLY-很抱歉耽搁了，我必须键入所有内容。：）基本上，我的解决方案利用了紧凑的语句。它也只做必要的事情（没有怨言，但其他解决方案定义变量并不必要地使用函数）。我更加感谢您的努力，因为您这样做并不是为了赏金：）非常感谢，像这样的反向工程代码，了解代码效率是非常宝贵的。代码注释也是如此！再次感谢！

# Import "csv.DictReader" and put it in the name "dr".
from csv import DictReader as dr

# These are the columns we will be working with.
cols = "TitleB", "TitleRNum", "TitlePNum", "TitleBF", "TitleCheck"

# This is a variable to hold a previous row for future processing.
# It severs the same purpose as the "entries" dict in Sudipta Chatterjee's answer.
# I set it to 0 simply so its value is false.  You could also set it to "False" or "None".
mark = 0

# Open the CSV file in binary mode.
with open("exampledata.csv", "rb") as f:

    # This loops through what is returned by "DictReader".
    #
    # The expression "f.readline().strip().split(",")" reads the first line of the file,
    # (which is the column names), strips off the newline at the end,
    # and then gets the column names by splitting the line on commas.
    for row in dr(f, f.readline().strip().split(",")):

        # This checks if "mark" is true.
        # If it is, then that means "mark" contains a previous row to be processed.
        if mark:

            # This line takes the row stored in "mark" as well as the current row
            # and puts them together, separating the values with commas using "str.join".
            print ",".join([",".join([mark[c], row[c]]) for c in cols])

        # This is a compact statement equivalent to:
        #
        #    if row["TitlePNum"] == row["TitleRNum"] == "1":
        #        mark = row
        #    else:
        #        mark = 0
        #
        # It sees if the "TitlePNum" and "TitleRNum" columns in the current row are both "1".
        # If so, it saves that row in "mark" for future processing.
        #
        # It is basically the same thing as the
        #
        #    if TitleRNum == 1 and TitlePNum == 1:
        #        entries[row['TitleIndicator']] = row
        #
        # part in Sudipta Chatterjee's answer.
        mark = row if row["TitlePNum"]==row["TitleRNum"]=="1" else 0

from csv import DictReader as dr
cols = "TitleB", "TitleRNum", "TitlePNum", "TitleBF", "TitleCheck"
mark = 0
with open("exampledata.csv", "rb") as f:
    for row in dr(f, f.readline().strip().split(",")):
        if mark: 
            print ",".join([",".join([mark[c], row[c]]) for c in cols])
        mark = row if row["TitlePNum"]==row["TitleRNum"]=="1" else 0

Joe,Bob,1,2,1,2,984.2,994.2,Yes,Yes
Mark,Jason,1,2,1,9,395.1,,F,