Python-仅当匹配数据存在时才输入循环
我有一个从stdin读取数据的脚本,例如:Python-仅当匹配数据存在时才输入循环,python,for-loop,break,Python,For Loop,Break,我有一个从stdin读取数据的脚本,例如: ################################# # Retrieve NMON data from stdin # ################################# # Read nmon data from stdin data = sys.stdin.readlines() 然后,部分代码使用正则表达式搜索进行转换并生成数据: ################### # Dynamic Sections
#################################
# Retrieve NMON data from stdin #
#################################
# Read nmon data from stdin
data = sys.stdin.readlines()
然后,部分代码使用正则表达式搜索进行转换并生成数据:
###################
# Dynamic Sections : data requires to be transposed to be exploitable within Splunk
###################
dynamic_section = ["DISKBUSY","DISKBSIZE","DISKREAD","DISKWRITE"]
for section in dynamic_section:
# Set output file (will be opened for writing after data transposition)
currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.nmon.csv'
# Open StringIO for temp in memory
buffer = cStringIO.StringIO()
# counter
count = 0
for line in data:
if find_section:
# csv header
# Replace some symbols
line=re.sub("%",'_PCT',line)
line=re.sub(" ",'_',line)
line=re.sub("\+",'',line)
line=re.sub("\(",'_',line)
line=re.sub("\)",'_',line)
line=re.sub(" ",'_',line)
line=re.sub("\.",'_',line)
# Extract header excluding data that always has Txxxx for timestamp reference
myregex = '(' + section + ')\,([^T].+)'
fullheader_match = re.search( myregex, line)
if fullheader_match:
fullheader = fullheader_match.group(2)
header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
if header_match:
header = header_match.group(2)
# Write header
buffer.write('ZZZZ' + ',' + header + '\n'),
# Extract timestamp
# Nmon V9 and prior do not have date in ZZZZ
# If unavailable, we'll use the global date (AAA,date)
ZZZZ_DATE = '-1'
ZZZZ_TIME = '-1'
# For Nmon V10 and more
timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
if timestamp_match:
ZZZZ_TIME = timestamp_match.group(2)
ZZZZ_DATE = timestamp_match.group(3)
# Convert month names to numbers
month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}
for k, v in month_to_numbers.items():
ZZZZ_DATE = ZZZZ_DATE.replace(k, v)
ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
# For Nmon V9 and less
if ZZZZ_DATE == '-1':
ZZZZ_DATE = DATE
timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
if timestamp_match:
ZZZZ_TIME = timestamp_match.group(2)
# Convert month names to numbers
month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}
for k, v in month_to_numbers.items():
ZZZZ_DATE = ZZZZ_DATE.replace(k, v)
ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
# Extract Data
myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
perfdata_match = re.match( myregex, line)
if perfdata_match:
perfdata = perfdata_match.group(2)
# Write perf data
buffer.write(ZZZZ_timestamp + ',' + perfdata + '\n'),
# Open final for writing
with open(currsection_output, "w") as currsection:
# Rewind temp
buffer.seek(0)
writer = csv.writer(currsection)
writer.writerow(['type', 'serialnum', 'hostname', 'ZZZZ', 'device', 'value'])
# increment
count += 1
for d in csv.DictReader(buffer):
ZZZZ = d.pop('ZZZZ')
for device, value in sorted(d.items()):
# increment
count += 1
row = [section, SN, HOSTNAME, ZZZZ, device, value]
writer.writerow(row)
# End for
# Show number of lines extracted
result = section + " section: Wrote" + " " + str(count) + " lines"
print (result)
ref.write(result + "\n")
# Discard memory buffer
buffer.close()
# End for
如果从stding检索的内容中没有数据,如何防止进入循环(main for部分)?(并存储在数据中)
谢谢你的帮助 我终于找到了一种处理方法,在主循环之前添加一个循环/计数器,例如:
dynamic_section = ["IOADAPT","NETERROR","NET","NETPACKET","JFSFILE","JFSINODE"]
for section in dynamic_section:
# counter
count = 0
for line in data:
# Extract sections, and write to output
myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
find_section = re.match( myregex, line)
if find_section:
# increment
count += 1
if count > 2:
以及代码的其余部分
最后,完整的代码:
dynamic_section = ["IOADAPT","NETERROR","NET","NETPACKET","JFSFILE","JFSINODE"]
for section in dynamic_section:
# counter
count = 0
for line in data:
# Extract sections, and write to output
myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
find_section = re.match( myregex, line)
if find_section:
# increment
count += 1
if count > 2:
# Set output file (will be opened for writing after data transposition)
currsection_output = DATA_DIR + HOSTNAME + '_' + day + '_' + month + '_' + year + '_' + hour + minute + second + '_' + section + '.nmon.csv'
# Open StringIO for temp in memory
buffer = cStringIO.StringIO()
# counter
count = 0
for line in data:
# Extract sections, and write to output
myregex = r'^' + section + '[0-9]*' + '|ZZZZ.+'
find_section = re.match( myregex, line)
if find_section:
# csv header
# Replace some symbols
line=re.sub("%",'_PCT',line)
line=re.sub(" ",'_',line)
line=re.sub("\+",'',line)
line=re.sub("\(",'_',line)
line=re.sub("\)",'_',line)
line=re.sub(" ",'_',line)
# Extract header excluding data that always has Txxxx for timestamp reference
myregex = '(' + section + ')\,([^T].+)'
fullheader_match = re.search( myregex, line)
if fullheader_match:
fullheader = fullheader_match.group(2)
# Replace "." by "_" only for header
fullheader=re.sub("\.",'_',fullheader)
header_match = re.match( r'([a-zA-Z\-\/\_0-9]+,)([a-zA-Z\-\/\_0-9\,]*)', fullheader)
if header_match:
header = header_match.group(2)
# Write header
buffer.write('ZZZZ' + ',' + header + '\n'),
# Extract timestamp
# Nmon V9 and prior do not have date in ZZZZ
# If unavailable, we'll use the global date (AAA,date)
ZZZZ_DATE = '-1'
ZZZZ_TIME = '-1'
# For Nmon V10 and more
timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\,(.+)\n', line)
if timestamp_match:
ZZZZ_TIME = timestamp_match.group(2)
ZZZZ_DATE = timestamp_match.group(3)
# Convert month names to numbers
month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}
for k, v in month_to_numbers.items():
ZZZZ_DATE = ZZZZ_DATE.replace(k, v)
ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
# For Nmon V9 and less
if ZZZZ_DATE == '-1':
ZZZZ_DATE = DATE
timestamp_match = re.match( r'^ZZZZ\,(.+)\,(.+)\n', line)
if timestamp_match:
ZZZZ_TIME = timestamp_match.group(2)
# Convert month names to numbers
month_to_numbers = {'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04', 'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08', 'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'}
for k, v in month_to_numbers.items():
ZZZZ_DATE = ZZZZ_DATE.replace(k, v)
ZZZZ_timestamp = ZZZZ_DATE + ' ' + ZZZZ_TIME
# Extract Data
myregex = r'^' + section + '\,(T\d+)\,(.+)\n'
perfdata_match = re.match( myregex, line)
if perfdata_match:
perfdata = perfdata_match.group(2)
# Write perf data
buffer.write(ZZZZ_timestamp + ',' + perfdata + '\n'),
# Open final for writing
with open(currsection_output, "w") as currsection:
# Rewind temp
buffer.seek(0)
writer = csv.writer(currsection)
writer.writerow(['type', 'serialnum', 'hostname', 'ZZZZ', 'device', 'value'])
# increment
count += 1
for d in csv.DictReader(buffer):
ZZZZ = d.pop('ZZZZ')
for device, value in sorted(d.items()):
# increment
count += 1
row = [section, SN, HOSTNAME, ZZZZ, device, value]
writer.writerow(row)
# End for
# Show number of lines extracted
result = section + " section: Wrote" + " " + str(count) + " lines"
print (result)
ref.write(result + "\n")
# Discard memory buffer
buffer.close()
# End for
你不能做一个简单的if语句吗?即,如果数据:。。。继续for循环,否则:pass。还是我误解了你的问题?我想是的,但我能不逐行搜索数据中的模式吗?你到底想做什么?通过谷歌搜索和查看您的代码,您似乎希望从数据源(即nmon数据)中提取数据,该数据源似乎附加了某种结构。也许您应该检查一下是否可以以结构化格式CSV/XML等获取这些数据,这样可以更容易/更快地提取您想要的内容。但是,如果您的目标只是从文本文件中提取任意字符串,那么是的,“逐行”是您的最佳选择。谢谢您的回答。是的,它是结构化数据,但不能像正常的csv或其他格式那样读取,这就是为什么我需要使用正则表达式来提取数据。我想要的是,如果数据中不存在该节,则防止输入for。我已经在循环中逐行提取数据了。例如,如果数据中至少没有一行,如“DISKBUSY,T.+”(regex),则无需进入循环,但只能在数据中的行上执行重新搜索?若试图搜索数据而不是datalike中的行,Python会给我一个错误,那个么它会在shell中执行grep,然后基于返回代码执行if