Python-仅当匹配数据存在时才输入循环_Python_For Loop_Break

Python-仅当匹配数据存在时才输入循环

python for-loop

Python-仅当匹配数据存在时才输入循环,python,for-loop,break,Python,For Loop,Break,我有一个从stdin读取数据的脚本，例如： ################################# # Retrieve NMON data from stdin # ################################# # Read nmon data from stdin data = sys.stdin.readlines() 然后，部分代码使用正则表达式搜索进行转换并生成数据： ################### # Dynamic Sections

我有一个从stdin读取数据的脚本，例如：

#################################
# Retrieve NMON data from stdin #
#################################

# Read nmon data from stdin

data = sys.stdin.readlines()

然后，部分代码使用正则表达式搜索进行转换并生成数据：

###################
# Dynamic Sections : data requires to be transposed to be exploitable within Splunk
###################

dynamic_section = ["DISKBUSY","DISKBSIZE","DISKREAD","DISKWRITE"]

for section in dynamic_section:

    # Set output file (will be opened for writing after data transposition)
    currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.nmon.csv'

    # Open StringIO for temp in memory
    buffer = cStringIO.StringIO()

    # counter
    count = 0

    for line in data:

        if find_section:

            # csv header

            # Replace some symbols
            line=re.sub("%",'_PCT',line)
            line=re.sub(" ",'_',line)
            line=re.sub("\+",'',line)
            line=re.sub("\(",'_',line)              
            line=re.sub("\)",'_',line)              
            line=re.sub(" ",'_',line)
            line=re.sub("\.",'_',line)

            # Extract header excluding data that always has Txxxx for timestamp reference
            myregex = '(' + section + ')\,([^T].+)'
            fullheader_match = re.search( myregex, line)            

            if fullheader_match:
                fullheader = fullheader_match.group(2)

                header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)    

                if header_match:
                    header = header_match.group(2)

                    # Write header
                    buffer.write('ZZZZ' + ',' + header + '\n'),


            # Extract timestamp

            # Nmon V9 and prior do not have date in ZZZZ
            # If unavailable, we'll use the global date (AAA,date)
            ZZZZ_DATE = '-1'
            ZZZZ_TIME = '-1'                

            # For Nmon V10 and more             

            timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
            if timestamp_match:
                ZZZZ_TIME = timestamp_match.group(2)
                ZZZZ_DATE = timestamp_match.group(3)            

                # Convert month names to numbers
                month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}         

                for k, v in month_to_numbers.items():
                    ZZZZ_DATE = ZZZZ_DATE.replace(k, v)

                ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

            # For Nmon V9 and less                  

            if ZZZZ_DATE == '-1':
                ZZZZ_DATE = DATE
                timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
                if timestamp_match:
                    ZZZZ_TIME = timestamp_match.group(2)                    

                    # Convert month names to numbers
                    month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}         

                    for k, v in month_to_numbers.items():
                        ZZZZ_DATE = ZZZZ_DATE.replace(k, v)

                    ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

            # Extract Data
            myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
            perfdata_match = re.match( myregex, line)
            if perfdata_match:
                perfdata = perfdata_match.group(2)

                # Write perf data
                buffer.write(ZZZZ_timestamp + ',' + perfdata + '\n'),


    # Open final for writing
    with open(currsection_output, "w") as currsection:

        # Rewind temp
        buffer.seek(0)

        writer = csv.writer(currsection)
        writer.writerow(['type', 'serialnum', 'hostname', 'ZZZZ', 'device', 'value'])           

        # increment
        count += 1

        for d in csv.DictReader(buffer):
            ZZZZ = d.pop('ZZZZ')
            for device, value in sorted(d.items()):

                # increment
                count += 1

                row = [section, SN, HOSTNAME, ZZZZ, device, value]
                writer.writerow(row)            

        # End for

    # Show number of lines extracted
    result = section + " section: Wrote" + " " + str(count) + " lines"
    print (result)
    ref.write(result + "\n")

    # Discard memory buffer 
    buffer.close()  

# End for

如果从stding检索的内容中没有数据，如何防止进入循环（main for部分）？（并存储在数据中）

谢谢你的帮助

我终于找到了一种处理方法，在主循环之前添加一个循环/计数器，例如：

dynamic_section = ["IOADAPT","NETERROR","NET","NETPACKET","JFSFILE","JFSINODE"]

for section in dynamic_section:

    # counter
    count = 0

    for line in data:

        # Extract sections, and write to output
        myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
        find_section = re.match( myregex, line)
        if find_section:

            # increment
            count += 1

    if count > 2:

以及代码的其余部分

最后，完整的代码：

dynamic_section = ["IOADAPT","NETERROR","NET","NETPACKET","JFSFILE","JFSINODE"]

for section in dynamic_section:

    # counter
    count = 0

    for line in data:

        # Extract sections, and write to output
        myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
        find_section = re.match( myregex, line)
        if find_section:

            # increment
            count += 1

    if count > 2:

        # Set output file (will be opened for writing after data transposition)
        currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.nmon.csv'

        # Open StringIO for temp in memory
        buffer = cStringIO.StringIO()

        # counter
        count = 0

        for line in data:

            # Extract sections, and write to output
            myregex = r'^' + section + '[0-9]*' + '|ZZZZ.+'
            find_section = re.match( myregex, line)

            if find_section:

                # csv header

                # Replace some symbols
                line=re.sub("%",'_PCT',line)
                line=re.sub(" ",'_',line)
                line=re.sub("\+",'',line)
                line=re.sub("\(",'_',line)              
                line=re.sub("\)",'_',line)              
                line=re.sub(" ",'_',line)

                # Extract header excluding data that always has Txxxx for timestamp reference
                myregex = '(' + section + ')\,([^T].+)'
                fullheader_match = re.search( myregex, line)            

                if fullheader_match:
                    fullheader = fullheader_match.group(2)

                    # Replace "." by "_" only for header
                    fullheader=re.sub("\.",'_',fullheader)

                    header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)    

                    if header_match:
                        header = header_match.group(2)

                        # Write header
                        buffer.write('ZZZZ' + ',' + header + '\n'),


                # Extract timestamp

                # Nmon V9 and prior do not have date in ZZZZ
                # If unavailable, we'll use the global date (AAA,date)
                ZZZZ_DATE = '-1'
                ZZZZ_TIME = '-1'                

                # For Nmon V10 and more             

                timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
                if timestamp_match:
                    ZZZZ_TIME = timestamp_match.group(2)
                    ZZZZ_DATE = timestamp_match.group(3)            

                    # Convert month names to numbers
                    month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}         

                    for k, v in month_to_numbers.items():
                        ZZZZ_DATE = ZZZZ_DATE.replace(k, v)

                    ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

                # For Nmon V9 and less                  

                if ZZZZ_DATE == '-1':
                    ZZZZ_DATE = DATE
                    timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
                    if timestamp_match:
                        ZZZZ_TIME = timestamp_match.group(2)                    

                        # Convert month names to numbers
                        month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}         

                        for k, v in month_to_numbers.items():
                            ZZZZ_DATE = ZZZZ_DATE.replace(k, v)

                        ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME

                # Extract Data
                myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
                perfdata_match = re.match( myregex, line)
                if perfdata_match:
                    perfdata = perfdata_match.group(2)

                    # Write perf data
                    buffer.write(ZZZZ_timestamp + ',' + perfdata + '\n'),


        # Open final for writing
        with open(currsection_output, "w") as currsection:

            # Rewind temp
            buffer.seek(0)

            writer = csv.writer(currsection)
            writer.writerow(['type', 'serialnum', 'hostname', 'ZZZZ', 'device', 'value'])           

            # increment
            count += 1

            for d in csv.DictReader(buffer):
                ZZZZ = d.pop('ZZZZ')
                for device, value in sorted(d.items()):

                    # increment
                    count += 1

                    row = [section, SN, HOSTNAME, ZZZZ, device, value]
                    writer.writerow(row)            

            # End for

        # Show number of lines extracted
        result = section + " section: Wrote" + " " + str(count) + " lines"
        print (result)
        ref.write(result + "\n")

        # Discard memory buffer 
        buffer.close()  

    # End for

你不能做一个简单的if语句吗？即，如果数据：。。。继续for循环，否则：pass。还是我误解了你的问题？我想是的，但我能不逐行搜索数据中的模式吗？你到底想做什么？通过谷歌搜索和查看您的代码，您似乎希望从数据源（即nmon数据）中提取数据，该数据源似乎附加了某种结构。也许您应该检查一下是否可以以结构化格式CSV/XML等获取这些数据，这样可以更容易/更快地提取您想要的内容。但是，如果您的目标只是从文本文件中提取任意字符串，那么是的，“逐行”是您的最佳选择。谢谢您的回答。是的，它是结构化数据，但不能像正常的csv或其他格式那样读取，这就是为什么我需要使用正则表达式来提取数据。我想要的是，如果数据中不存在该节，则防止输入for。我已经在循环中逐行提取数据了。例如，如果数据中至少没有一行，如“DISKBUSY，T.+”（regex），则无需进入循环，但只能在数据中的行上执行重新搜索？若试图搜索数据而不是datalike中的行，Python会给我一个错误，那个么它会在shell中执行grep，然后基于返回代码执行if