Python循环代码,几乎都有解决方案

Python循环代码,几乎都有解决方案,python,parsing,loops,text,notepad,Python,Parsing,Loops,Text,Notepad,我需要一些python代码的帮助。如果可能的话,请告诉我 下面是notepad中4050.log数据文件在执行代码之前的一个示例- port=4050 SOH= hostname=BlueVectorEMA uptime=N/A SHTSOT=N/A iCenter=N/A start=51808152 stop=518083AA cycles=0.0 tagCnt=23 C1Gen1=0.0.0.0 C1Gen2=0.0.0.0 STX= 00045512=119,59,55,60,50,C,

我需要一些python代码的帮助。如果可能的话,请告诉我

下面是notepad中4050.log数据文件在执行代码之前的一个示例-

port=4050
SOH=
hostname=BlueVectorEMA
uptime=N/A
SHTSOT=N/A
iCenter=N/A
start=51808152
stop=518083AA
cycles=0.0
tagCnt=23
C1Gen1=0.0.0.0
C1Gen2=0.0.0.0
STX=
00045512=119,59,55,60,50,C,00,N/A
00052450=120,60,44,60,43,C,00,N/A
00042260=113,54,51,59,43,C,00,N/A
00046999=114,56,46,58,42,C,00,N/A
00043166=117,58,46,59,42,C,00,N/A
00052651=113,55,48,58,42,C,00,N/A
00050594=118,59,43,59,43,C,00,N/A
00051774=120,60,44,60,42,C,00,N/A
00056767=119,59,43,60,42,C,00,N/A
00043181=109,51,56,58,45,C,00,N/A
00033333=115,56,55,59,43,C,00,N/A
00031475=115,57,43,58,45,C,00,N/A
00042817=118,59,48,59,50,C,00,N/A
00054245=118,58,45,60,41,C,00,N/A
00043589=118,58,46,60,43,C,00,N/A
00053037=116,57,54,59,48,C,00,N/A
00053636=114,55,45,59,42,C,00,N/A
00045642=116,57,49,59,42,C,00,N/A
00045591=118,59,46,59,45,C,00,N/A
00033114=116,58,46,58,44,C,00,N/A
00042075=114,56,54,58,48,C,00,N/A
00044491=113,55,42,58,43,C,00,N/A
00000587=18,0,999,18,46,T,403/002,0030H/3.0
ETX=
EOT=
timestamp=5180a211
logdir=2013-05

Wed May  1 00:03:13 2013
--------------------------------------------------------------------------------


port=4050
SOH=
hostname=BlueVectorEMA
uptime=N/A
SHTSOT=N/A
iCenter=N/A
start=518083AA
stop=51808602
cycles=0.0
tagCnt=23
C1Gen1=0.0.0.0
C1Gen2=0.0.0.0
STX=
00053636=119,59,44,60,41,C,00,N/A
00043181=107,48,56,59,45,C,00,N/A
00046999=116,57,46,59,42,C,00,N/A
00031475=117,58,43,59,45,C,00,N/A
00053037=115,55,54,60,48,C,00,N/A
00052651=117,58,47,59,42,C,00,N/A
00042075=117,57,54,60,48,C,00,N/A
00050594=118,59,43,59,43,C,00,N/A
00045512=115,55,55,60,50,C,00,N/A
00044491=113,55,42,58,43,C,00,N/A
00033114=115,57,46,58,44,C,00,N/A
00045591=117,58,46,59,46,C,00,N/A
00052450=117,58,44,59,43,C,00,N/A
00042260=114,56,51,58,42,C,00,N/A
00043166=116,58,47,58,42,C,00,N/A
00042817=117,59,48,58,50,C,00,N/A
00056767=119,59,43,60,42,C,00,N/A
00054245=119,59,45,60,41,C,00,N/A
00043589=117,57,46,60,43,C,00,N/A
00051774=118,59,44,59,42,C,00,N/A
00045642=113,55,50,58,42,C,00,N/A
00033333=111,53,55,58,44,C,00,N/A
00000587=7,0,999,7,47,T,402/202,0028H/2.5
ETX=
EOT=
timestamp=5180a469
logdir=2013-05

Wed May  1 00:13:13 2013
--------------------------------------------------------------------------------
接下来是它运行的python代码示例--

接下来是一个例子,我有它下来--

我唯一需要的是让日期与上面的对应行匹配,这样看起来应该是这样的

00052450|120|60|44|60|43|C|00|N/A|4050|BlueVectorEMA|05/01/2013 00:03:13
00042260|113|54|51|59|43|C|00|N/A|4050|BlueVectorEMA|05/01/2013 00:03:13
00046999|114|56|46|58|42|C|00|N/A|4050|BlueVectorEMA|05/01/2013 00:03:13
00043166|117|58|46|59|42|C|00|N/A|4050|BlueVectorEMA|05/01/2013 00:03:13
00052651|113|55|48|58|42|C|00|N/A|4050|BlueVectorEMA|05/01/2013 00:03:13
00050594|118|59|43|59|43|C|00|N/A|4050|BlueVectorEMA|05/01/2013 00:03:13
00051774|120|60|44|60|42|C|00|N/A|4050|BlueVectorEMA|05/01/2013 00:03:13
00056767|119|59|43|60|42|C|00|N/A|4050|BlueVectorEMA|05/01/2013 00:03:13

伙计们,有什么想法吗?

这里有几件事你应该做,让代码更可读a,b让事情更理智,我想

所以你有你的日志文件,如果你仔细想想,它本质上是一个字典的集合,每个日期一个

因此,首先,将您的文件拆分为以下每个部分

大概是这样的:

from datetime import datetime

logFile, temp = [], []
with open("4050.log") as f:
    for line in f:
        if "------------------" in line:
            logFile.append(temp[:])
            temp = []
        else:
            temp.append(line)


logDict = {}
for log in logFile:
    tempDict = {}
    for line in log:
        if "=" in line:
            line = line.split("=")        
            tempDict[line[0]] = line[1].strip()

    logDict[log[-1].strip()] = tempDict.copy()


keysWeDontWant = "port SOH hostname hostname SHTSOT iCenter start stop cycles tagCnt C1Gen1 C1Gen2 STX ETX EOT timestamp logdir uptime".split()

for logDate, log in logDict.items():
    for key, value in log.items():
        if key not in keysWeDontWant and not key.startswith("00000"):
            dateTimeString = datetime.strptime(logDate, "%a %B %d %H:%M:%S %Y").strftime("%M/%d/%Y %H:%M:%S")
            print key + "|" + value.replace(",","|") + "|4050|%s|%s" % (log["hostname"], dateTimeString)

做这项工作,而且更干净一点。但它仍然不是很好,需要知道日志文件的规范才能很好地完成它。而且我要睡觉了

您可以做几件事:

  • 使用函数(它们可以让您将工作单元分解为易于理解的部分)
  • 使用(它们将大大简化模式匹配)
  • 使用(它们将帮助您一次性完成所有工作)
下面是一些稍加修改的代码和注释,以帮助您开始:

import collections
import re

DATA_LINE = re.compile("(\d{8})=(.*)")
DATE = re.compile("""
    (?P<month>\w+)    # Match one or more alpha-numeric characters 
                      # (store match as "month")
    \s+               # followed by one or more whitespace characters
    (?P<day>\d{1,2})  # followed by one or two numbers
                      # (store match as "day")
    \s+               # followed by one or more whitespace characters
    (?P<time>\d{2}:\d{2}:\d{2})
                      # followed by two numbers, a colon, two numbers,
                      # another colon, and two more numbers
                      # (store match as "time")
    \s+               # followed by one or more whitespace characters
    (?P<year>\d{4})   # followed by for numbers
                      # (store match as "year")
    """, re.VERBOSE)

def interesting_lines(logfile_path):
    """Yield lines of interest in tuples of
    ``(processed_line, is_date)``."""

    # First, ensure that we will close the file
    # no matter what happens
    with open(logfile_path) as file_object:
        # Loop over all the lines (files are iterators too)
        for line in file_object:
            data = DATA_LINE.match(line)
            if data:
                # If this line has data, get the matched data
                # (the number before the equals sign and everything after it)
                # pass it on as a list of strings with a flag that marks
                # this lineas a data line
                yield ",".join(data.groups()).split(","), False

            elif DATE.search(line):
                # Otherwise, if the line is the date
                # pass on the re-formatted string and a flag that marks
                # this line as the date line
                yield datetime.strptime(line, "%a %B %d %H:%M:%S %Y") \
                              .strftime("%M/%d/%Y %H:%M:%S"), True


def combine_with_date(lines):
    buffer = collections.deque()
    for line, is_date in lines:
        buffer.append(line)
        if is_date:
            current_date = line
            for line in buffer:
                line.append(current_date)
                yield line
            buffer.clear()

def write_to_file(iterable_of_lines):
    # TODO: Create CSV writer with pipe-delimited dialect
    # and write to file


if __name__ == "__main__":
    file_path = raw_input("Please provide a file path:")
    line_generator = interesting_lines(file_path)
    processed_lines = combine_with_date(line_generator)
    write_to_file(processed_lines)
导入集合
进口稀土
数据行=重新编译((\d{8})=(.*))
日期=重新编译(“”)
(?P\w+)#匹配一个或多个字母数字字符
#(存储匹配为“月”)
\s+#后跟一个或多个空格字符
(?P\d{1,2})#后跟一个或两个数字
#(将匹配存储为“日”)
\s+#后跟一个或多个空格字符
(?P\d{2}:\d{2}:\d{2})
#后跟两个数字,一个冒号,两个数字,
#另一个冒号,还有两个数字
#(将匹配存储为“时间”)
\s+#后跟一个或多个空格字符
(?P\d{4})#后跟for数字
#(存储匹配为“年”)
“”,re.VERBOSE)
def有趣的\u行(日志文件\u路径):
“以元组表示的感兴趣的产量线”
``(已处理的行,是日期)`.”
#首先,确保我们将关闭该文件
#不管发生什么事
打开(日志文件路径)作为文件对象:
#循环所有行(文件也是迭代器)
对于文件\u对象中的行:
数据=数据线。匹配(线)
如果数据:
#如果此行有数据,则获取匹配的数据
#(等号前的数字及其后的所有内容)
#将其作为字符串列表传递,其中带有标记
#这是一条数据线
yield“,”.join(data.groups()).split(“,”),False
elif日期搜索(行):
#否则,如果行是日期
#传递重新格式化的字符串和标记
#此行作为日期行
生成datetime.strTime(第行,“%a%B%d%H:%M:%S%Y”)\
.strftime(“%M/%d/%Y%H:%M:%S”),True
def组合_与_日期(行):
buffer=collections.deque()
对于第行,日期是否为第行:
buffer.append(行)
如果是(日期):
当前日期=行
对于缓冲区中的行:
行。追加(当前_日期)
生产线
buffer.clear()
def写入文件(可编辑的行数):
#TODO:使用管道分隔方言创建CSV编写器
#并写入文件
如果名称=“\uuuuu main\uuuuuuuu”:
文件路径=原始输入(“请提供文件路径:”)
行生成器=感兴趣的行(文件路径)
已处理的\u行=将\u与\u日期合并(行\u生成器)
将\u写入\u文件(已处理的\u行)

这是您在那里创建的一些非常可怕的代码。为什么?这看起来像是一个关于何时使用正则表达式的学校示例,在没有正则表达式的情况下解决。。i、 看一看常规表达式我对Python很陌生,它把它转换成我需要的格式,所以我只保留了有用的东西。@JustinM-我猜是的。我现在在回答中给你写了一些建议。我不确定你的代码是否正确。当你仔细观察输出时,它看起来有点随机。你能不能仔细检查一下,然后再给我回电话?你怎么把它的顺序和输入的顺序一样?你能不能给我一本字典,而不仅仅是一本普通的字典。
from datetime import datetime

logFile, temp = [], []
with open("4050.log") as f:
    for line in f:
        if "------------------" in line:
            logFile.append(temp[:])
            temp = []
        else:
            temp.append(line)


logDict = {}
for log in logFile:
    tempDict = {}
    for line in log:
        if "=" in line:
            line = line.split("=")        
            tempDict[line[0]] = line[1].strip()

    logDict[log[-1].strip()] = tempDict.copy()


keysWeDontWant = "port SOH hostname hostname SHTSOT iCenter start stop cycles tagCnt C1Gen1 C1Gen2 STX ETX EOT timestamp logdir uptime".split()

for logDate, log in logDict.items():
    for key, value in log.items():
        if key not in keysWeDontWant and not key.startswith("00000"):
            dateTimeString = datetime.strptime(logDate, "%a %B %d %H:%M:%S %Y").strftime("%M/%d/%Y %H:%M:%S")
            print key + "|" + value.replace(",","|") + "|4050|%s|%s" % (log["hostname"], dateTimeString)
import collections
import re

DATA_LINE = re.compile("(\d{8})=(.*)")
DATE = re.compile("""
    (?P<month>\w+)    # Match one or more alpha-numeric characters 
                      # (store match as "month")
    \s+               # followed by one or more whitespace characters
    (?P<day>\d{1,2})  # followed by one or two numbers
                      # (store match as "day")
    \s+               # followed by one or more whitespace characters
    (?P<time>\d{2}:\d{2}:\d{2})
                      # followed by two numbers, a colon, two numbers,
                      # another colon, and two more numbers
                      # (store match as "time")
    \s+               # followed by one or more whitespace characters
    (?P<year>\d{4})   # followed by for numbers
                      # (store match as "year")
    """, re.VERBOSE)

def interesting_lines(logfile_path):
    """Yield lines of interest in tuples of
    ``(processed_line, is_date)``."""

    # First, ensure that we will close the file
    # no matter what happens
    with open(logfile_path) as file_object:
        # Loop over all the lines (files are iterators too)
        for line in file_object:
            data = DATA_LINE.match(line)
            if data:
                # If this line has data, get the matched data
                # (the number before the equals sign and everything after it)
                # pass it on as a list of strings with a flag that marks
                # this lineas a data line
                yield ",".join(data.groups()).split(","), False

            elif DATE.search(line):
                # Otherwise, if the line is the date
                # pass on the re-formatted string and a flag that marks
                # this line as the date line
                yield datetime.strptime(line, "%a %B %d %H:%M:%S %Y") \
                              .strftime("%M/%d/%Y %H:%M:%S"), True


def combine_with_date(lines):
    buffer = collections.deque()
    for line, is_date in lines:
        buffer.append(line)
        if is_date:
            current_date = line
            for line in buffer:
                line.append(current_date)
                yield line
            buffer.clear()

def write_to_file(iterable_of_lines):
    # TODO: Create CSV writer with pipe-delimited dialect
    # and write to file


if __name__ == "__main__":
    file_path = raw_input("Please provide a file path:")
    line_generator = interesting_lines(file_path)
    processed_lines = combine_with_date(line_generator)
    write_to_file(processed_lines)