使用数据文件的Python嵌套循环

使用数据文件的Python嵌套循环,python,Python,我有一个包含数据的文件,我想搜索每小时的最大读数 def maximum(): source = open ( 'dataV.csv', 'r' ) result = open ( 'dataV-max.csv', 'w' ) line = source.readline() max_num = '0' while line != '' : for time in range(0, 24): line = source.readline() if ti

我有一个包含数据的文件,我想搜索每小时的最大读数

def maximum():
source = open ( 'dataV.csv', 'r' )
result = open ( 'dataV-max.csv', 'w' )
line = source.readline()

max_num = '0'

while line != '' :
    for time in range(0, 24):
        line = source.readline()
        if time == line [ 12:14 ] and line [22:len(line)] <= max_num :
            max_num = line [ 22:len(line) ]
            print ( max_num )
            result.write ( str(max_num) )

source.close()
result.close() 
我认为嵌套循环有问题。我不知道怎样才能把整个文件都看一遍

以下是文件的某些部分:

'time PST', saturn03.820.A.AlgaeWatch [microg/l]
'2014-04-25 00:04:48',3.35
'2014-04-25 00:04:54',3.225
'2014-04-25 00:05:00',3.15
'2014-04-25 00:07:48',3.4
'2014-04-25 00:07:54',3.4
'2014-04-25 00:08:00',3.375
'2014-04-25 00:10:48',3.45
'2014-04-25 00:10:54',3.325
'2014-04-25 00:11:00',3.425
'2014-04-25 00:13:49',3.45
'2014-04-25 00:13:54',3.5
'2014-04-25 00:14:00',3.525
'2014-04-25 00:16:48',3.725
3.5
3.525
3.725
英雄你去:

import sys
def fileParser(sourcefileName, destinationfileName):
    fd = open(sourcefileName)
    lines = fd.readlines()
    hourMaxDict = dict()
    for line in lines[1:]:
            hour = line.split(" ")[1].split(":")[0]
            maxRead = float(line.split(",")[-1].rstrip())
            if hour in hourMaxDict.keys() and hourMaxDict[hour] > maxRead:
                    continue
            else:
                    hourMaxDict[hour] = maxRead
    destFd = open(destinationfileName, "a")
    for key, val in hourMaxDict.iteritems():
            val = str(val) + "\n"
            destFd.write(str(val))
    fd.close()
    destFd.close()

if __name__ == "__main__":
    fileParser(sys.argv[1], sys.argv[2])
执行:

techie@gateway2:~$ python fileReader.py sourceFile.txt destinationFile.txt
输入文件内容:

'time PST', saturn03.820.A.AlgaeWatch [microg/l]
'2014-04-25 00:04:48',3.35
'2014-04-25 00:04:54',3.225
'2014-04-25 00:04:48',3.35
'2014-04-25 00:04:54',3.225
'2014-04-25 00:05:00',3.15
'2014-04-25 00:07:48',3.4
'2014-04-25 00:07:54',3.4
'2014-04-25 00:08:00',3.375
'2014-04-25 00:10:48',3.45
'2014-04-25 00:10:54',3.325
'2014-04-25 00:11:00',3.425
'2014-04-25 00:13:49',3.45
'2014-04-25 00:13:54',3.5
'2014-04-25 01:14:00',3.525
'2014-04-25 01:16:48',3.725
输出文件内容:

3.5
3.725
您可以广泛使用split来实现相同的目标。 希望这会有所帮助。:-)

给定输入:

'time PST', saturn03.820.A.AlgaeWatch [microg/l]
'2014-04-25 00:04:48',3.35
'2014-04-25 00:04:54',3.225
'2014-04-25 00:05:00',3.15
'2014-04-25 00:07:48',3.4
'2014-04-25 00:07:54',3.4
'2014-04-25 00:08:00',3.375
'2014-04-25 00:10:48',3.45
'2014-04-25 00:10:54',3.325
'2014-04-25 00:11:00',3.425
'2014-04-25 00:13:49',3.45
'2014-04-25 00:13:54',3.5
'2014-04-25 01:14:00',3.525
'2014-04-25 02:16:48',3.725
该方案:

#! /usr/bin/env python
"""Usually a ready made file parser like csv module or even panda
et al. for more complete service is the way to go here but one may
want to know how to basically iterate and parse a little one self.
This is also for the date time parsing which one typically also
delegates to datetime module or the like."""
from __future__ import print_function
import sys


def hourly_maxima(in_file, out_file):
    """Extract calendar hourly maximum readings from in_file,
    write to out_file. If files do not exist or are
    not accessible exceptions will happily raise ;-).
    Input is expected to be ordered ascending by time
    stamp."""

    field_sep = ','
    with open(in_file, 'rt') as f_i, open(
            out_file, 'wt') as f_o:  # May raise here
        f_i.readline()  # Ignore header, be optimistic

        ts_raw = None
        hourly_maximum = None
        current_hour = None  # Group by calendar hour stored in tuples
        date_sep = '-'
        # Expect sample data line to document flow:
        # '2014-04-25 00:04:48',3.35
        for line in f_i.readlines():  # Digest rest of lines
            if not line:
                break  # stop on first empty line
            ts, reading = line.strip().split(field_sep)  # May raise ...
            r_float = float(reading)  # May raise ...

            # Map timestamp ts to calendar hour
            ts_raw = ts.strip("'")
            year, month, day = ts_raw[:10].split(date_sep)
            hour = ts_raw[11:13]
            cand_hour = (year, month, day, hour)
            if current_hour is None:
                current_hour = cand_hour

            if cand_hour == current_hour:  # We seek the maximum
                if hourly_maximum is None or r_float > hourly_maximum:
                    hourly_maximum = r_float
            else:  # report hourly maximum of previous hour and reset
                print(ts_raw, hourly_maximum)  # Also report matching hour?
                f_o.write('%s\n' % (str(hourly_maximum)))
                current_hour = cand_hour
                hourly_maximum = r_float

        # Flush the last result kept in hourly_maximum:
        print(ts_raw, hourly_maximum)  # Also report matching hour?
        f_o.write('%s\n' % (str(hourly_maximum)))


def main():
    """Drive the extraction."""
    in_file = 'dataV.csv' if len(sys.argv) < 2 else sys.argv[1]
    out_file = 'dataV-max.csv' if len(sys.argv) < 3 else sys.argv[2]

    hourly_maxima(in_file, out_file)

if __name__ == '__main__':
    sys.exit(main())
在std输出和文件中:

'time PST', saturn03.820.A.AlgaeWatch [microg/l]
'2014-04-25 00:04:48',3.35
'2014-04-25 00:04:54',3.225
'2014-04-25 00:05:00',3.15
'2014-04-25 00:07:48',3.4
'2014-04-25 00:07:54',3.4
'2014-04-25 00:08:00',3.375
'2014-04-25 00:10:48',3.45
'2014-04-25 00:10:54',3.325
'2014-04-25 00:11:00',3.425
'2014-04-25 00:13:49',3.45
'2014-04-25 00:13:54',3.5
'2014-04-25 00:14:00',3.525
'2014-04-25 00:16:48',3.725
3.5
3.525
3.725
这就是你想要的吗?我想是的。但仍有很多改进、强化和额外优雅的空间

继续学习python


PS:对不起,我暂时处于脱机状态。

请您澄清一下这个问题好吗。文件的内容是什么?样本文件内容将是罚款,如果你给。请给出一个示例内容以及您对该示例内容的期望。@sagar好的,我做了。谢谢您的输入内容。你能告诉我你期望的输出是什么吗?谢谢你提供输入样本,但是第二行真的会错过前面的单引号吗?然后您需要在脚本中使用复杂的逻辑来解释这些错误。。。请更正或明确说明应接受/忽略或更正数据中的哪些错误。好的,谢谢您的帮助。我想知道每一行逗号后每小时的最大数字。