如何将变量值暂时保存在内存中并进行比较。。。用python
伙计们,我肯定我用错误的缩进打破了逻辑但是现在 我修不好。 你能帮帮我吗?如何将变量值暂时保存在内存中并进行比较。。。用python,python,regex,Python,Regex,伙计们,我肯定我用错误的缩进打破了逻辑但是现在 我修不好。 你能帮帮我吗? # #analyzeNano.py-分析XYZ文件的“健全性” # # import csv import sys import os import getopt def main(): ''' analyzeNano.py -d input-directory analyzeNano.py analyzes a list of XYZ files inside input-directory. It cou
# #analyzeNano.py-分析XYZ文件的“健全性” # #
import csv
import sys
import os
import getopt
def main():
'''
analyzeNano.py -d input-directory
analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
input-directory an input directory where XYZ files are located
-d flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
try:
opts, args = getopt.getopt(sys.argv[1:],'d:')
except getopt.GetoptError, err:
print str(err)
help(main)
sys.exit(2)
if len(opts) != 1:
help(main)
sys.exit(2)
if not os.path.isdir( sys.argv[2] ):
print "Error, ", sys.argv[2], " is not a valid directory"
help(main)
sys.exit(2)
prefix = 'dna'
goodFiles = []
badFiles = []
fileList = os.listdir(sys.argv[2])
for f in fileList:
absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
with open(absFile, 'rb') as csvfile:
# use csv to separate the fields, making it easier to deal with the
# first value without hard-coding its size
reader = csv.reader(csvfile, delimiter='\t')
match = None
count = 0
for row in reader:
# matching rows
if row[0].lower().startswith(prefix):
if match is None:
# first line with prefix..
match = row[0]
if row[0] == match:
# found a match, so increment
count += 1
if row[0] != match:
# row prefix has changed
if 96 <= count < 110:
# counted enough, so start counting the next
match = row[0] # match on this now
count = 0 # reset the count
goodFiles.append(csvfile.name)
else:
# didn't count enough, so stop working through this file
badFiles.append(csvfile.name)
break
# non-matching rows
else:
if match is None:
# ignore preceding lines in file
continue
else:
# found non-matching line when expecting a match
break
else:
if not 96 <= count < 110:
#there was at least successful run of lines
goodFiles.remove(csvfile.name)
# Create output files
createFile(goodFiles, 'goodNano')
createFile(badFiles, 'badNano')
def createFile(files, fName):
fileName = open( fName + ".csv", "w" )
for f in files:
fileName.write( os.path.basename(f) )
fileName.write("\n")
if __name__ == '__main__':
main()
导入csv
导入系统
导入操作系统
导入getopt
def main():
'''
anano.py-d输入目录
analyzeNano.py分析输入目录中的XYZ文件列表。它计算具有相同ID的必需DNA样本的数量,如果在96到110之间,则将其视为“好”,否则视为“坏”。
输入目录XYZ文件所在的输入目录
-输入目录的d标志
最后,它创建了两个文件:goodNano.csv和badNano.csv
注意:不在goodNano.csv和badNano.csv中的文件没有DNA ID,因此未列出
'''
尝试:
opts,args=getopt.getopt(sys.argv[1:,'d:')
除getopt.GetoptError外,错误:
打印str(错误)
帮助(主)
系统出口(2)
如果len(选择)!=1:
帮助(主)
系统出口(2)
如果不是os.path.isdir(sys.argv[2]):
打印“错误”,sys.argv[2],“不是有效目录”
帮助(主)
系统出口(2)
前缀='dna'
goodFiles=[]
坏文件=[]
fileList=os.listdir(sys.argv[2])
对于文件列表中的f:
absFile=os.path.join(os.path.abspath(sys.argv[2]),f)
打开(ABS文件,'rb')作为csvfile时:
#使用csv分隔字段,以便更轻松地处理
#无需硬编码其大小的第一个值
reader=csv.reader(csvfile,分隔符='\t')
匹配=无
计数=0
对于读取器中的行:
#匹配行
如果行[0].lower().startswith(前缀):
如果匹配为“无”:
#带前缀的第一行。。
匹配=行[0]
如果行[0]==匹配:
#找到了匹配项,所以
计数+=1
如果行[0]!=匹配:
#行前缀已更改
如果96以下是我如何修改你的风格:
import csv
import sys
import os
import getopt
def main():
'''
analyzeNano.py -d input-directory
analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
input-directory an input directory where XYZ files are located
-d flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
try:
opts, args = getopt.getopt(sys.argv[1:],'d:')
except getopt.GetoptError, err:
print str(err)
help(main)
sys.exit(2)
if len(opts) != 1:
help(main)
sys.exit(2)
if not os.path.isdir( sys.argv[2] ):
print "Error, ", sys.argv[2], " is not a valid directory"
help(main)
sys.exit(2)
prefix = 'dna'
goodFiles = []
badFiles = []
fileList = os.listdir(sys.argv[2])
for f in fileList:
absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
with open(absFile, 'rb') as csvfile:
# use csv to separate the fields, making it easier to deal with the
# first value without hard-coding its size
reader = csv.reader(csvfile, delimiter='\t')
match = None
count = 0
for row in reader:
# matching rows
if row[0].lower().startswith(prefix):
if match is None:
# first line with prefix..
match = row[0]
if row[0] == match:
# found a match, so increment
count += 1
if row[0] != match:
# row prefix has changed
if 96 <= count < 110:
# counted enough, so start counting the next
match = row[0] # match on this now
count = 0 # reset the count
goodFiles.append(csvfile.name)
else:
# didn't count enough, so stop working through this file
badFiles.append(csvfile.name)
break
# non-matching rows
else:
if match is None:
# ignore preceding lines in file
continue
else:
# found non-matching line when expecting a match
break
else:
if not 96 <= count < 110:
#there was at least successful run of lines
goodFiles.remove(csvfile.name)
# Create output files
createFile(goodFiles, 'goodNano')
createFile(badFiles, 'badNano')
def createFile(files, fName):
fileName = open( fName + ".csv", "w" )
for f in files:
fileName.write( os.path.basename(f) )
fileName.write("\n")
if __name__ == '__main__':
main()
with open("z:/file.txt", "rU") as file: # U flag means Universal Newline Mode,
# if error, try switching back to b
print(file.name)
counter = 0
for line in file: # iterate over a file object itself line by line
if line.lower().startswith('dna'): # look for your desired condition
# process the data
counter += 1
所有变量都保存在内存中。您希望保留最近的匹配项并对其进行比较,在匹配时计数:
import csv
import sys
import os
import getopt
def main():
'''
analyzeNano.py -d input-directory
analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
input-directory an input directory where XYZ files are located
-d flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
try:
opts, args = getopt.getopt(sys.argv[1:],'d:')
except getopt.GetoptError, err:
print str(err)
help(main)
sys.exit(2)
if len(opts) != 1:
help(main)
sys.exit(2)
if not os.path.isdir( sys.argv[2] ):
print "Error, ", sys.argv[2], " is not a valid directory"
help(main)
sys.exit(2)
prefix = 'dna'
goodFiles = []
badFiles = []
fileList = os.listdir(sys.argv[2])
for f in fileList:
absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
with open(absFile, 'rb') as csvfile:
# use csv to separate the fields, making it easier to deal with the
# first value without hard-coding its size
reader = csv.reader(csvfile, delimiter='\t')
match = None
count = 0
for row in reader:
# matching rows
if row[0].lower().startswith(prefix):
if match is None:
# first line with prefix..
match = row[0]
if row[0] == match:
# found a match, so increment
count += 1
if row[0] != match:
# row prefix has changed
if 96 <= count < 110:
# counted enough, so start counting the next
match = row[0] # match on this now
count = 0 # reset the count
goodFiles.append(csvfile.name)
else:
# didn't count enough, so stop working through this file
badFiles.append(csvfile.name)
break
# non-matching rows
else:
if match is None:
# ignore preceding lines in file
continue
else:
# found non-matching line when expecting a match
break
else:
if not 96 <= count < 110:
#there was at least successful run of lines
goodFiles.remove(csvfile.name)
# Create output files
createFile(goodFiles, 'goodNano')
createFile(badFiles, 'badNano')
def createFile(files, fName):
fileName = open( fName + ".csv", "w" )
for f in files:
fileName.write( os.path.basename(f) )
fileName.write("\n")
if __name__ == '__main__':
main()
import csv
prefix = 'DNA'
with open('file.txt','rb') as csvfile:
# use csv to separate the fields, making it easier to deal with the
# first value without hard-coding its size
reader = csv.reader(csvfile, delimiter='\t')
match = None
count = 0
is_good = False
for row in reader:
# matching rows
if row[0].startswith(prefix):
if match is None:
# first line with prefix..
match = row[0]
if row[0] == match:
# found a match, so increment
count += 1
if row[0] != match:
# row prefix has changed
if 96 <= count < 100:
# counted enough, so start counting the next
match = row[0] # match on this now
count = 0 # reset the count
else:
# didn't count enough, so stop working through this file
break
# non-matching rows
else:
if match is None:
# ignore preceding lines in file
continue
else:
# found non-matching line when expecting a match
break
else:
if 96 <= count < 100:
# there was at least successful run of lines
is_good = True
if is_good:
print 'File was good'
else:
print 'File was bad'
导入csv
前缀='DNA'
打开('file.txt','rb')作为csvfile:
#使用csv分隔字段,以便更轻松地处理
#无需硬编码其大小的第一个值
reader=csv.reader(csvfile,分隔符='\t')
匹配=无
计数=0
是好的还是错的
对于读取器中的行:
#匹配行
如果第[0]行开始,则使用(前缀):
如果匹配为“无”:
#带前缀的第一行。。
匹配=行[0]
如果行[0]==匹配:
#找到了匹配项,所以
计数+=1
如果行[0]!=匹配:
#行前缀已更改
如果96根据您的描述,您感兴趣的行与正则表达式匹配:
import csv
import sys
import os
import getopt
def main():
'''
analyzeNano.py -d input-directory
analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
input-directory an input directory where XYZ files are located
-d flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
try:
opts, args = getopt.getopt(sys.argv[1:],'d:')
except getopt.GetoptError, err:
print str(err)
help(main)
sys.exit(2)
if len(opts) != 1:
help(main)
sys.exit(2)
if not os.path.isdir( sys.argv[2] ):
print "Error, ", sys.argv[2], " is not a valid directory"
help(main)
sys.exit(2)
prefix = 'dna'
goodFiles = []
badFiles = []
fileList = os.listdir(sys.argv[2])
for f in fileList:
absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
with open(absFile, 'rb') as csvfile:
# use csv to separate the fields, making it easier to deal with the
# first value without hard-coding its size
reader = csv.reader(csvfile, delimiter='\t')
match = None
count = 0
for row in reader:
# matching rows
if row[0].lower().startswith(prefix):
if match is None:
# first line with prefix..
match = row[0]
if row[0] == match:
# found a match, so increment
count += 1
if row[0] != match:
# row prefix has changed
if 96 <= count < 110:
# counted enough, so start counting the next
match = row[0] # match on this now
count = 0 # reset the count
goodFiles.append(csvfile.name)
else:
# didn't count enough, so stop working through this file
badFiles.append(csvfile.name)
break
# non-matching rows
else:
if match is None:
# ignore preceding lines in file
continue
else:
# found non-matching line when expecting a match
break
else:
if not 96 <= count < 110:
#there was at least successful run of lines
goodFiles.remove(csvfile.name)
# Create output files
createFile(goodFiles, 'goodNano')
createFile(badFiles, 'badNano')
def createFile(files, fName):
fileName = open( fName + ".csv", "w" )
for f in files:
fileName.write( os.path.basename(f) )
fileName.write("\n")
if __name__ == '__main__':
main()
^DNA[0-9]{10}
也就是说,我假设你的xyz实际上是十位数
import csv
import sys
import os
import getopt
def main():
'''
analyzeNano.py -d input-directory
analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
input-directory an input directory where XYZ files are located
-d flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
try:
opts, args = getopt.getopt(sys.argv[1:],'d:')
except getopt.GetoptError, err:
print str(err)
help(main)
sys.exit(2)
if len(opts) != 1:
help(main)
sys.exit(2)
if not os.path.isdir( sys.argv[2] ):
print "Error, ", sys.argv[2], " is not a valid directory"
help(main)
sys.exit(2)
prefix = 'dna'
goodFiles = []
badFiles = []
fileList = os.listdir(sys.argv[2])
for f in fileList:
absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
with open(absFile, 'rb') as csvfile:
# use csv to separate the fields, making it easier to deal with the
# first value without hard-coding its size
reader = csv.reader(csvfile, delimiter='\t')
match = None
count = 0
for row in reader:
# matching rows
if row[0].lower().startswith(prefix):
if match is None:
# first line with prefix..
match = row[0]
if row[0] == match:
# found a match, so increment
count += 1
if row[0] != match:
# row prefix has changed
if 96 <= count < 110:
# counted enough, so start counting the next
match = row[0] # match on this now
count = 0 # reset the count
goodFiles.append(csvfile.name)
else:
# didn't count enough, so stop working through this file
badFiles.append(csvfile.name)
break
# non-matching rows
else:
if match is None:
# ignore preceding lines in file
continue
else:
# found non-matching line when expecting a match
break
else:
if not 96 <= count < 110:
#there was at least successful run of lines
goodFiles.remove(csvfile.name)
# Create output files
createFile(goodFiles, 'goodNano')
createFile(badFiles, 'badNano')
def createFile(files, fName):
fileName = open( fName + ".csv", "w" )
for f in files:
fileName.write( os.path.basename(f) )
fileName.write("\n")
if __name__ == '__main__':
main()
这里的策略是匹配13个字符的字符串。如果没有比赛,而我们以前也没有比赛过,我们会毫不犹豫地继续比赛。一旦我们匹配,我们
保存字符串,并递增一个计数器。只要我们一直匹配正则表达式和保存的字符串,我们就一直在递增。一旦我们找到一个不同的正则表达式匹配,或者根本没有匹配,相同匹配的序列就结束了。如果有效,我们将计数重置为
零,最后一个匹配项为空。如果无效,我们退出
import csv
import sys
import os
import getopt
def main():
'''
analyzeNano.py -d input-directory
analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
input-directory an input directory where XYZ files are located
-d flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
try:
opts, args = getopt.getopt(sys.argv[1:],'d:')
except getopt.GetoptError, err:
print str(err)
help(main)
sys.exit(2)
if len(opts) != 1:
help(main)
sys.exit(2)
if not os.path.isdir( sys.argv[2] ):
print "Error, ", sys.argv[2], " is not a valid directory"
help(main)
sys.exit(2)
prefix = 'dna'
goodFiles = []
badFiles = []
fileList = os.listdir(sys.argv[2])
for f in fileList:
absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
with open(absFile, 'rb') as csvfile:
# use csv to separate the fields, making it easier to deal with the
# first value without hard-coding its size
reader = csv.reader(csvfile, delimiter='\t')
match = None
count = 0
for row in reader:
# matching rows
if row[0].lower().startswith(prefix):
if match is None:
# first line with prefix..
match = row[0]
if row[0] == match:
# found a match, so increment
count += 1
if row[0] != match:
# row prefix has changed
if 96 <= count < 110:
# counted enough, so start counting the next
match = row[0] # match on this now
count = 0 # reset the count
goodFiles.append(csvfile.name)
else:
# didn't count enough, so stop working through this file
badFiles.append(csvfile.name)
break
# non-matching rows
else:
if match is None:
# ignore preceding lines in file
continue
else:
# found non-matching line when expecting a match
break
else:
if not 96 <= count < 110:
#there was at least successful run of lines
goodFiles.remove(csvfile.name)
# Create output files
createFile(goodFiles, 'goodNano')
createFile(badFiles, 'badNano')
def createFile(files, fName):
fileName = open( fName + ".csv", "w" )
for f in files:
fileName.write( os.path.basename(f) )
fileName.write("\n")
if __name__ == '__main__':
main()
我急忙补充说,以下内容未经测试
import csv
import sys
import os
import getopt
def main():
'''
analyzeNano.py -d input-directory
analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
input-directory an input directory where XYZ files are located
-d flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
try:
opts, args = getopt.getopt(sys.argv[1:],'d:')
except getopt.GetoptError, err:
print str(err)
help(main)
sys.exit(2)
if len(opts) != 1:
help(main)
sys.exit(2)
if not os.path.isdir( sys.argv[2] ):
print "Error, ", sys.argv[2], " is not a valid directory"
help(main)
sys.exit(2)
prefix = 'dna'
goodFiles = []
badFiles = []
fileList = os.listdir(sys.argv[2])
for f in fileList:
absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
with open(absFile, 'rb') as csvfile:
# use csv to separate the fields, making it easier to deal with the
# first value without hard-coding its size
reader = csv.reader(csvfile, delimiter='\t')
match = None
count = 0
for row in reader:
# matching rows
if row[0].lower().startswith(prefix):
if match is None:
# first line with prefix..
match = row[0]
if row[0] == match:
# found a match, so increment
count += 1
if row[0] != match:
# row prefix has changed
if 96 <= count < 110:
# counted enough, so start counting the next
match = row[0] # match on this now
count = 0 # reset the count
goodFiles.append(csvfile.name)
else:
# didn't count enough, so stop working through this file
badFiles.append(csvfile.name)
break
# non-matching rows
else:
if match is None:
# ignore preceding lines in file
continue
else:
# found non-matching line when expecting a match
break
else:
if not 96 <= count < 110:
#there was at least successful run of lines
goodFiles.remove(csvfile.name)
# Create output files
createFile(goodFiles, 'goodNano')
createFile(badFiles, 'badNano')
def createFile(files, fName):
fileName = open( fName + ".csv", "w" )
for f in files:
fileName.write( os.path.basename(f) )
fileName.write("\n")
if __name__ == '__main__':
main()
# Input file with DNA lines to match:
infile = "z:/file.txt"
# This is the regex for the lines of interest:
regex = re.compile('^DNA[0-9]{10}')
# This will keep count of the number of matches in sequence:
n_seq = 0
# This is the previous match (if any):
lastmatch = ''
# Subroutine to check given sequence count and bail if bad:
def bail_on_bad_sequence(count, match):
if 96 <= count < 100:
return
sys.stderr.write("Bad count (%d) for '%s'\n" % (count,match))
sys.exit(1)
with open(infile) as file:
for line in file:
# Try to match the line to the regex:
match = re.match(line)
if match:
if match.group(0) == lastmatch:
n_seq += 1
else:
bail_on_bad_sequence(lastmatch, n_seq)
n_seq = 0
lastmatch = match.group(0)
else:
if n_seq != 0:
bail_on_bad_sequence(lastmatch, n_seq)
n_seq = 0
lastmatch = ''
#输入要匹配的DNA行的文件:
infle=“z:/file.txt”
#这是感兴趣的行的正则表达式:
regex=re.compile(“^DNA[0-9]{10}”)
#这将按顺序记录匹配数:
n_seq=0
#这是上一个匹配项(如果有):
lastmatch=“”
#检查给定序列计数和bail(如果错误)的子例程:
def bail_on_bad_序列(计数、匹配):
如果96请忽略我最后一次查看代码的请求。我自己检查了一下,发现问题出在格式上。
现在看来,它可以按预期工作并分析目录中的所有文件。再次感谢梅瑟。这种帮助是巨大的。我仍然有些担心计算的准确性,因为在一些情况下,它失败了,但它不应该。。。但我会调查的。
总的来说。。。非常感谢大家提供的巨大帮助。欢迎来到Stackoverflow,快速调查,您从哪一个教程中学习了如何打开这样的文件?如果您也向我们展示数据的一个子集,这将很有帮助。嗯。。。我不是从教程中学到的:)。。我刚刚打开了Python文档,找到了如何打开和读取文件。很抱歉,我无法提供数据的子集,因为它位于Firewall后面。谢谢您的建议,但我遇到了以下错误:回溯(最近一次调用):文件“\Scripts\script.py”,第7行,在AttributeError中:“str”对象没有来自以下代码的属性“startwith”:with open(“z:/file.txt”,“rU”)作为文件:print file.name counter=0用于文件中的行:if line.lower().startwith('dna'):new_l=list(l[17+计数器])print”“。加入(new_l[:13])计数器+=1@susja你拼错了startswith
Great!!谢谢你的建议。我的文件不是csv…但无论如何,我会尝试它并用结果更新你,虽然它是以制表符分隔的,所以应该仍然有效。好的,Matthew…问题1与“键”相关:回溯(最近一次调用):file“\Scripts\script.py“,第32行,在NameError:全局名称'key'未定义问题#2:在我注释掉'else'后,阻止它完成,没有错误,但结果我希望它应该打印出我处理的文件是'good'或'bad'。在我们的情况下,它不会通知处理结果。对不起,我的错,
import csv
import sys
import os
import getopt
def main():
'''
analyzeNano.py -d input-directory
analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
input-directory an input directory where XYZ files are located
-d flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
try:
opts, args = getopt.getopt(sys.argv[1:],'d:')
except getopt.GetoptError, err:
print str(err)
help(main)
sys.exit(2)
if len(opts) != 1:
help(main)
sys.exit(2)
if not os.path.isdir( sys.argv[2] ):
print "Error, ", sys.argv[2], " is not a valid directory"
help(main)
sys.exit(2)
prefix = 'dna'
goodFiles = []
badFiles = []
fileList = os.listdir(sys.argv[2])
for f in fileList:
absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
with open(absFile, 'rb') as csvfile:
# use csv to separate the fields, making it easier to deal with the
# first value without hard-coding its size
reader = csv.reader(csvfile, delimiter='\t')
match = None
count = 0
for row in reader:
# matching rows
if row[0].lower().startswith(prefix):
if match is None:
# first line with prefix..
match = row[0]
if row[0] == match:
# found a match, so increment
count += 1
if row[0] != match:
# row prefix has changed
if 96 <= count < 110:
# counted enough, so start counting the next
match = row[0] # match on this now
count = 0 # reset the count
goodFiles.append(csvfile.name)
else:
# didn't count enough, so stop working through this file
badFiles.append(csvfile.name)
break
# non-matching rows
else:
if match is None:
# ignore preceding lines in file
continue
else:
# found non-matching line when expecting a match
break
else:
if not 96 <= count < 110:
#there was at least successful run of lines
goodFiles.remove(csvfile.name)
# Create output files
createFile(goodFiles, 'goodNano')
createFile(badFiles, 'badNano')
def createFile(files, fName):
fileName = open( fName + ".csv", "w" )
for f in files:
fileName.write( os.path.basename(f) )
fileName.write("\n")
if __name__ == '__main__':
main()