如何将变量值暂时保存在内存中并进行比较。。。用python

如何将变量值暂时保存在内存中并进行比较。。。用python,python,regex,Python,Regex,伙计们,我肯定我用错误的缩进打破了逻辑但是现在 我修不好。 你能帮帮我吗? # #analyzeNano.py-分析XYZ文件的“健全性” # # import csv import sys import os import getopt def main(): ''' analyzeNano.py -d input-directory analyzeNano.py analyzes a list of XYZ files inside input-directory. It cou

伙计们,我肯定我用错误的缩进打破了逻辑但是现在 我修不好。 你能帮帮我吗?
# #analyzeNano.py-分析XYZ文件的“健全性” # #

import csv
import sys
import os
import getopt

def main():
    '''
analyzeNano.py -d input-directory

analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
    input-directory    an input directory where XYZ files are located
    -d    flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
    try:
        opts, args = getopt.getopt(sys.argv[1:],'d:')
    except getopt.GetoptError, err:
        print str(err)
        help(main)
        sys.exit(2)

    if len(opts) != 1:
        help(main)
        sys.exit(2)

    if not os.path.isdir( sys.argv[2] ):
        print "Error, ", sys.argv[2], " is not a valid directory"
        help(main)
        sys.exit(2)


    prefix = 'dna'
    goodFiles = []
    badFiles = []

    fileList = os.listdir(sys.argv[2])
    for f in fileList:
        absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
        with open(absFile, 'rb') as csvfile:
            # use csv to separate the fields, making it easier to deal with the
            # first value without hard-coding its size
            reader = csv.reader(csvfile, delimiter='\t')
            match = None
            count = 0

            for row in reader:
                # matching rows
                if row[0].lower().startswith(prefix):

                    if match is None:
                        # first line with prefix..
                        match = row[0]

                    if row[0] == match:
                        # found a match, so increment
                        count += 1

                    if row[0] != match:
                        # row prefix has changed
                        if 96 <= count < 110:
                            # counted enough, so start counting the next
                            match = row[0] # match on this now
                            count = 0 # reset the count
                            goodFiles.append(csvfile.name)
                        else:
                            # didn't count enough, so stop working through this file
                            badFiles.append(csvfile.name)
                            break

                # non-matching rows
                else:
                    if match is None:
                        # ignore preceding lines in file
                        continue
                    else:
                        # found non-matching line when expecting a match
                        break
    else:
        if not 96 <= count < 110:
                    #there was at least successful run of lines
            goodFiles.remove(csvfile.name)

    # Create output files
    createFile(goodFiles, 'goodNano')
    createFile(badFiles, 'badNano')

def createFile(files, fName):
    fileName = open( fName + ".csv", "w" )
    for f in files:
        fileName.write( os.path.basename(f) )
        fileName.write("\n")


if __name__ == '__main__':
    main()
导入csv
导入系统
导入操作系统
导入getopt
def main():
'''
anano.py-d输入目录
analyzeNano.py分析输入目录中的XYZ文件列表。它计算具有相同ID的必需DNA样本的数量,如果在96到110之间,则将其视为“好”,否则视为“坏”。
输入目录XYZ文件所在的输入目录
-输入目录的d标志
最后,它创建了两个文件:goodNano.csv和badNano.csv
注意:不在goodNano.csv和badNano.csv中的文件没有DNA ID,因此未列出
'''
尝试:
opts,args=getopt.getopt(sys.argv[1:,'d:')
除getopt.GetoptError外,错误:
打印str(错误)
帮助(主)
系统出口(2)
如果len(选择)!=1:
帮助(主)
系统出口(2)
如果不是os.path.isdir(sys.argv[2]):
打印“错误”,sys.argv[2],“不是有效目录”
帮助(主)
系统出口(2)
前缀='dna'
goodFiles=[]
坏文件=[]
fileList=os.listdir(sys.argv[2])
对于文件列表中的f:
absFile=os.path.join(os.path.abspath(sys.argv[2]),f)
打开(ABS文件,'rb')作为csvfile时:
#使用csv分隔字段,以便更轻松地处理
#无需硬编码其大小的第一个值
reader=csv.reader(csvfile,分隔符='\t')
匹配=无
计数=0
对于读取器中的行:
#匹配行
如果行[0].lower().startswith(前缀):
如果匹配为“无”:
#带前缀的第一行。。
匹配=行[0]
如果行[0]==匹配:
#找到了匹配项,所以
计数+=1
如果行[0]!=匹配:
#行前缀已更改

如果96以下是我如何修改你的风格:

import csv
import sys
import os
import getopt

def main():
    '''
analyzeNano.py -d input-directory

analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
    input-directory    an input directory where XYZ files are located
    -d    flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
    try:
        opts, args = getopt.getopt(sys.argv[1:],'d:')
    except getopt.GetoptError, err:
        print str(err)
        help(main)
        sys.exit(2)

    if len(opts) != 1:
        help(main)
        sys.exit(2)

    if not os.path.isdir( sys.argv[2] ):
        print "Error, ", sys.argv[2], " is not a valid directory"
        help(main)
        sys.exit(2)


    prefix = 'dna'
    goodFiles = []
    badFiles = []

    fileList = os.listdir(sys.argv[2])
    for f in fileList:
        absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
        with open(absFile, 'rb') as csvfile:
            # use csv to separate the fields, making it easier to deal with the
            # first value without hard-coding its size
            reader = csv.reader(csvfile, delimiter='\t')
            match = None
            count = 0

            for row in reader:
                # matching rows
                if row[0].lower().startswith(prefix):

                    if match is None:
                        # first line with prefix..
                        match = row[0]

                    if row[0] == match:
                        # found a match, so increment
                        count += 1

                    if row[0] != match:
                        # row prefix has changed
                        if 96 <= count < 110:
                            # counted enough, so start counting the next
                            match = row[0] # match on this now
                            count = 0 # reset the count
                            goodFiles.append(csvfile.name)
                        else:
                            # didn't count enough, so stop working through this file
                            badFiles.append(csvfile.name)
                            break

                # non-matching rows
                else:
                    if match is None:
                        # ignore preceding lines in file
                        continue
                    else:
                        # found non-matching line when expecting a match
                        break
    else:
        if not 96 <= count < 110:
                    #there was at least successful run of lines
            goodFiles.remove(csvfile.name)

    # Create output files
    createFile(goodFiles, 'goodNano')
    createFile(badFiles, 'badNano')

def createFile(files, fName):
    fileName = open( fName + ".csv", "w" )
    for f in files:
        fileName.write( os.path.basename(f) )
        fileName.write("\n")


if __name__ == '__main__':
    main()
with open("z:/file.txt", "rU") as file: # U flag means Universal Newline Mode, 
                                        # if error, try switching back to b
    print(file.name)        
    counter = 0
    for line in file: # iterate over a file object itself line by line
        if line.lower().startswith('dna'): # look for your desired condition
            # process the data
            counter += 1

所有变量都保存在内存中。您希望保留最近的匹配项并对其进行比较,在匹配时计数:

import csv
import sys
import os
import getopt

def main():
    '''
analyzeNano.py -d input-directory

analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
    input-directory    an input directory where XYZ files are located
    -d    flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
    try:
        opts, args = getopt.getopt(sys.argv[1:],'d:')
    except getopt.GetoptError, err:
        print str(err)
        help(main)
        sys.exit(2)

    if len(opts) != 1:
        help(main)
        sys.exit(2)

    if not os.path.isdir( sys.argv[2] ):
        print "Error, ", sys.argv[2], " is not a valid directory"
        help(main)
        sys.exit(2)


    prefix = 'dna'
    goodFiles = []
    badFiles = []

    fileList = os.listdir(sys.argv[2])
    for f in fileList:
        absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
        with open(absFile, 'rb') as csvfile:
            # use csv to separate the fields, making it easier to deal with the
            # first value without hard-coding its size
            reader = csv.reader(csvfile, delimiter='\t')
            match = None
            count = 0

            for row in reader:
                # matching rows
                if row[0].lower().startswith(prefix):

                    if match is None:
                        # first line with prefix..
                        match = row[0]

                    if row[0] == match:
                        # found a match, so increment
                        count += 1

                    if row[0] != match:
                        # row prefix has changed
                        if 96 <= count < 110:
                            # counted enough, so start counting the next
                            match = row[0] # match on this now
                            count = 0 # reset the count
                            goodFiles.append(csvfile.name)
                        else:
                            # didn't count enough, so stop working through this file
                            badFiles.append(csvfile.name)
                            break

                # non-matching rows
                else:
                    if match is None:
                        # ignore preceding lines in file
                        continue
                    else:
                        # found non-matching line when expecting a match
                        break
    else:
        if not 96 <= count < 110:
                    #there was at least successful run of lines
            goodFiles.remove(csvfile.name)

    # Create output files
    createFile(goodFiles, 'goodNano')
    createFile(badFiles, 'badNano')

def createFile(files, fName):
    fileName = open( fName + ".csv", "w" )
    for f in files:
        fileName.write( os.path.basename(f) )
        fileName.write("\n")


if __name__ == '__main__':
    main()
import csv

prefix = 'DNA'

with open('file.txt','rb') as csvfile:
    # use csv to separate the fields, making it easier to deal with the
    # first value without hard-coding its size
    reader = csv.reader(csvfile, delimiter='\t')
    match = None
    count = 0
    is_good = False
    for row in reader:
        # matching rows
        if row[0].startswith(prefix):

            if match is None:
                # first line with prefix..
                match = row[0]

            if row[0] == match:
                # found a match, so increment
                count += 1

            if row[0] != match:
                # row prefix has changed
                if 96 <= count < 100:
                    # counted enough, so start counting the next
                    match = row[0] # match on this now
                    count = 0 # reset the count
                else:
                    # didn't count enough, so stop working through this file
                    break

        # non-matching rows
        else:
            if match is None:
                # ignore preceding lines in file
                continue
            else:
                # found non-matching line when expecting a match
                break
    else:
        if 96 <= count < 100:
            # there was at least successful run of lines
            is_good = True

if is_good:
    print 'File was good'
else:
    print 'File was bad'
导入csv
前缀='DNA'
打开('file.txt','rb')作为csvfile:
#使用csv分隔字段,以便更轻松地处理
#无需硬编码其大小的第一个值
reader=csv.reader(csvfile,分隔符='\t')
匹配=无
计数=0
是好的还是错的
对于读取器中的行:
#匹配行
如果第[0]行开始,则使用(前缀):
如果匹配为“无”:
#带前缀的第一行。。
匹配=行[0]
如果行[0]==匹配:
#找到了匹配项,所以
计数+=1
如果行[0]!=匹配:
#行前缀已更改

如果96根据您的描述,您感兴趣的行与正则表达式匹配:

import csv
import sys
import os
import getopt

def main():
    '''
analyzeNano.py -d input-directory

analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
    input-directory    an input directory where XYZ files are located
    -d    flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
    try:
        opts, args = getopt.getopt(sys.argv[1:],'d:')
    except getopt.GetoptError, err:
        print str(err)
        help(main)
        sys.exit(2)

    if len(opts) != 1:
        help(main)
        sys.exit(2)

    if not os.path.isdir( sys.argv[2] ):
        print "Error, ", sys.argv[2], " is not a valid directory"
        help(main)
        sys.exit(2)


    prefix = 'dna'
    goodFiles = []
    badFiles = []

    fileList = os.listdir(sys.argv[2])
    for f in fileList:
        absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
        with open(absFile, 'rb') as csvfile:
            # use csv to separate the fields, making it easier to deal with the
            # first value without hard-coding its size
            reader = csv.reader(csvfile, delimiter='\t')
            match = None
            count = 0

            for row in reader:
                # matching rows
                if row[0].lower().startswith(prefix):

                    if match is None:
                        # first line with prefix..
                        match = row[0]

                    if row[0] == match:
                        # found a match, so increment
                        count += 1

                    if row[0] != match:
                        # row prefix has changed
                        if 96 <= count < 110:
                            # counted enough, so start counting the next
                            match = row[0] # match on this now
                            count = 0 # reset the count
                            goodFiles.append(csvfile.name)
                        else:
                            # didn't count enough, so stop working through this file
                            badFiles.append(csvfile.name)
                            break

                # non-matching rows
                else:
                    if match is None:
                        # ignore preceding lines in file
                        continue
                    else:
                        # found non-matching line when expecting a match
                        break
    else:
        if not 96 <= count < 110:
                    #there was at least successful run of lines
            goodFiles.remove(csvfile.name)

    # Create output files
    createFile(goodFiles, 'goodNano')
    createFile(badFiles, 'badNano')

def createFile(files, fName):
    fileName = open( fName + ".csv", "w" )
    for f in files:
        fileName.write( os.path.basename(f) )
        fileName.write("\n")


if __name__ == '__main__':
    main()
^DNA[0-9]{10}
也就是说,我假设你的xyz实际上是十位数

import csv
import sys
import os
import getopt

def main():
    '''
analyzeNano.py -d input-directory

analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
    input-directory    an input directory where XYZ files are located
    -d    flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
    try:
        opts, args = getopt.getopt(sys.argv[1:],'d:')
    except getopt.GetoptError, err:
        print str(err)
        help(main)
        sys.exit(2)

    if len(opts) != 1:
        help(main)
        sys.exit(2)

    if not os.path.isdir( sys.argv[2] ):
        print "Error, ", sys.argv[2], " is not a valid directory"
        help(main)
        sys.exit(2)


    prefix = 'dna'
    goodFiles = []
    badFiles = []

    fileList = os.listdir(sys.argv[2])
    for f in fileList:
        absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
        with open(absFile, 'rb') as csvfile:
            # use csv to separate the fields, making it easier to deal with the
            # first value without hard-coding its size
            reader = csv.reader(csvfile, delimiter='\t')
            match = None
            count = 0

            for row in reader:
                # matching rows
                if row[0].lower().startswith(prefix):

                    if match is None:
                        # first line with prefix..
                        match = row[0]

                    if row[0] == match:
                        # found a match, so increment
                        count += 1

                    if row[0] != match:
                        # row prefix has changed
                        if 96 <= count < 110:
                            # counted enough, so start counting the next
                            match = row[0] # match on this now
                            count = 0 # reset the count
                            goodFiles.append(csvfile.name)
                        else:
                            # didn't count enough, so stop working through this file
                            badFiles.append(csvfile.name)
                            break

                # non-matching rows
                else:
                    if match is None:
                        # ignore preceding lines in file
                        continue
                    else:
                        # found non-matching line when expecting a match
                        break
    else:
        if not 96 <= count < 110:
                    #there was at least successful run of lines
            goodFiles.remove(csvfile.name)

    # Create output files
    createFile(goodFiles, 'goodNano')
    createFile(badFiles, 'badNano')

def createFile(files, fName):
    fileName = open( fName + ".csv", "w" )
    for f in files:
        fileName.write( os.path.basename(f) )
        fileName.write("\n")


if __name__ == '__main__':
    main()
这里的策略是匹配13个字符的字符串。如果没有比赛,而我们以前也没有比赛过,我们会毫不犹豫地继续比赛。一旦我们匹配,我们 保存字符串,并递增一个计数器。只要我们一直匹配正则表达式和保存的字符串,我们就一直在递增。一旦我们找到一个不同的正则表达式匹配,或者根本没有匹配,相同匹配的序列就结束了。如果有效,我们将计数重置为 零,最后一个匹配项为空。如果无效,我们退出

import csv
import sys
import os
import getopt

def main():
    '''
analyzeNano.py -d input-directory

analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
    input-directory    an input directory where XYZ files are located
    -d    flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
    try:
        opts, args = getopt.getopt(sys.argv[1:],'d:')
    except getopt.GetoptError, err:
        print str(err)
        help(main)
        sys.exit(2)

    if len(opts) != 1:
        help(main)
        sys.exit(2)

    if not os.path.isdir( sys.argv[2] ):
        print "Error, ", sys.argv[2], " is not a valid directory"
        help(main)
        sys.exit(2)


    prefix = 'dna'
    goodFiles = []
    badFiles = []

    fileList = os.listdir(sys.argv[2])
    for f in fileList:
        absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
        with open(absFile, 'rb') as csvfile:
            # use csv to separate the fields, making it easier to deal with the
            # first value without hard-coding its size
            reader = csv.reader(csvfile, delimiter='\t')
            match = None
            count = 0

            for row in reader:
                # matching rows
                if row[0].lower().startswith(prefix):

                    if match is None:
                        # first line with prefix..
                        match = row[0]

                    if row[0] == match:
                        # found a match, so increment
                        count += 1

                    if row[0] != match:
                        # row prefix has changed
                        if 96 <= count < 110:
                            # counted enough, so start counting the next
                            match = row[0] # match on this now
                            count = 0 # reset the count
                            goodFiles.append(csvfile.name)
                        else:
                            # didn't count enough, so stop working through this file
                            badFiles.append(csvfile.name)
                            break

                # non-matching rows
                else:
                    if match is None:
                        # ignore preceding lines in file
                        continue
                    else:
                        # found non-matching line when expecting a match
                        break
    else:
        if not 96 <= count < 110:
                    #there was at least successful run of lines
            goodFiles.remove(csvfile.name)

    # Create output files
    createFile(goodFiles, 'goodNano')
    createFile(badFiles, 'badNano')

def createFile(files, fName):
    fileName = open( fName + ".csv", "w" )
    for f in files:
        fileName.write( os.path.basename(f) )
        fileName.write("\n")


if __name__ == '__main__':
    main()
我急忙补充说,以下内容未经测试

import csv
import sys
import os
import getopt

def main():
    '''
analyzeNano.py -d input-directory

analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
    input-directory    an input directory where XYZ files are located
    -d    flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
    try:
        opts, args = getopt.getopt(sys.argv[1:],'d:')
    except getopt.GetoptError, err:
        print str(err)
        help(main)
        sys.exit(2)

    if len(opts) != 1:
        help(main)
        sys.exit(2)

    if not os.path.isdir( sys.argv[2] ):
        print "Error, ", sys.argv[2], " is not a valid directory"
        help(main)
        sys.exit(2)


    prefix = 'dna'
    goodFiles = []
    badFiles = []

    fileList = os.listdir(sys.argv[2])
    for f in fileList:
        absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
        with open(absFile, 'rb') as csvfile:
            # use csv to separate the fields, making it easier to deal with the
            # first value without hard-coding its size
            reader = csv.reader(csvfile, delimiter='\t')
            match = None
            count = 0

            for row in reader:
                # matching rows
                if row[0].lower().startswith(prefix):

                    if match is None:
                        # first line with prefix..
                        match = row[0]

                    if row[0] == match:
                        # found a match, so increment
                        count += 1

                    if row[0] != match:
                        # row prefix has changed
                        if 96 <= count < 110:
                            # counted enough, so start counting the next
                            match = row[0] # match on this now
                            count = 0 # reset the count
                            goodFiles.append(csvfile.name)
                        else:
                            # didn't count enough, so stop working through this file
                            badFiles.append(csvfile.name)
                            break

                # non-matching rows
                else:
                    if match is None:
                        # ignore preceding lines in file
                        continue
                    else:
                        # found non-matching line when expecting a match
                        break
    else:
        if not 96 <= count < 110:
                    #there was at least successful run of lines
            goodFiles.remove(csvfile.name)

    # Create output files
    createFile(goodFiles, 'goodNano')
    createFile(badFiles, 'badNano')

def createFile(files, fName):
    fileName = open( fName + ".csv", "w" )
    for f in files:
        fileName.write( os.path.basename(f) )
        fileName.write("\n")


if __name__ == '__main__':
    main()
# Input file with DNA lines to match:
infile = "z:/file.txt"

# This is the regex for the lines of interest:
regex = re.compile('^DNA[0-9]{10}')

# This will keep count of the number of matches in sequence:
n_seq = 0

# This is the previous match (if any):
lastmatch = ''

# Subroutine to check given sequence count and bail if bad:
def bail_on_bad_sequence(count, match):
    if 96 <= count < 100:
        return
    sys.stderr.write("Bad count (%d) for '%s'\n" % (count,match))
    sys.exit(1)


with open(infile) as file:
    for line in file:
        # Try to match the line to the regex:
        match = re.match(line)

        if match:
            if match.group(0) == lastmatch:
                n_seq += 1
            else:
                bail_on_bad_sequence(lastmatch, n_seq)
                n_seq = 0
                lastmatch = match.group(0)
        else:
            if n_seq != 0:
                bail_on_bad_sequence(lastmatch, n_seq)
                n_seq = 0
                lastmatch = ''
#输入要匹配的DNA行的文件:
infle=“z:/file.txt”
#这是感兴趣的行的正则表达式:
regex=re.compile(“^DNA[0-9]{10}”)
#这将按顺序记录匹配数:
n_seq=0
#这是上一个匹配项(如果有):
lastmatch=“”
#检查给定序列计数和bail(如果错误)的子例程:
def bail_on_bad_序列(计数、匹配):

如果96请忽略我最后一次查看代码的请求。我自己检查了一下,发现问题出在格式上。 现在看来,它可以按预期工作并分析目录中的所有文件。再次感谢梅瑟。这种帮助是巨大的。我仍然有些担心计算的准确性,因为在一些情况下,它失败了,但它不应该。。。但我会调查的。
总的来说。。。非常感谢大家提供的巨大帮助。

欢迎来到Stackoverflow,快速调查,您从哪一个教程中学习了如何打开这样的文件?如果您也向我们展示数据的一个子集,这将很有帮助。嗯。。。我不是从教程中学到的:)。。我刚刚打开了Python文档,找到了如何打开和读取文件。很抱歉,我无法提供数据的子集,因为它位于Firewall后面。谢谢您的建议,但我遇到了以下错误:回溯(最近一次调用):文件“\Scripts\script.py”,第7行,在AttributeError中:“str”对象没有来自以下代码的属性“startwith”:with open(“z:/file.txt”,“rU”)作为文件:print file.name counter=0用于文件中的行:if line.lower().startwith('dna'):new_l=list(l[17+计数器])print”“。加入(new_l[:13])计数器+=1@susja你拼错了
startswith
Great!!谢谢你的建议。我的文件不是csv…但无论如何,我会尝试它并用结果更新你,虽然它是以制表符分隔的,所以应该仍然有效。好的,Matthew…问题1与“键”相关:回溯(最近一次调用):file“\Scripts\script.py“,第32行,在NameError:全局名称'key'未定义问题#2:在我注释掉'else'后,阻止它完成,没有错误,但结果我希望它应该打印出我处理的文件是'good'或'bad'。在我们的情况下,它不会通知处理结果。对不起,我的错,
import csv
import sys
import os
import getopt

def main():
    '''
analyzeNano.py -d input-directory

analyzeNano.py analyzes a list of XYZ files inside input-directory. It counts for the number of consequitive DNA samples with identical ID and if it between 96 and 110 it treats it as 'good', otherwise 'bad'.
    input-directory    an input directory where XYZ files are located
    -d    flag for input-directory
At the end it creates 2 files: goodNano.csv and badNano.csv
Note: files that are not in goodNano.csv and badNano.csv have no DNA ID and therefore not listed
'''
    try:
        opts, args = getopt.getopt(sys.argv[1:],'d:')
    except getopt.GetoptError, err:
        print str(err)
        help(main)
        sys.exit(2)

    if len(opts) != 1:
        help(main)
        sys.exit(2)

    if not os.path.isdir( sys.argv[2] ):
        print "Error, ", sys.argv[2], " is not a valid directory"
        help(main)
        sys.exit(2)


    prefix = 'dna'
    goodFiles = []
    badFiles = []

    fileList = os.listdir(sys.argv[2])
    for f in fileList:
        absFile = os.path.join(os.path.abspath(sys.argv[2]), f )
        with open(absFile, 'rb') as csvfile:
            # use csv to separate the fields, making it easier to deal with the
            # first value without hard-coding its size
            reader = csv.reader(csvfile, delimiter='\t')
            match = None
            count = 0

            for row in reader:
                # matching rows
                if row[0].lower().startswith(prefix):

                    if match is None:
                        # first line with prefix..
                        match = row[0]

                    if row[0] == match:
                        # found a match, so increment
                        count += 1

                    if row[0] != match:
                        # row prefix has changed
                        if 96 <= count < 110:
                            # counted enough, so start counting the next
                            match = row[0] # match on this now
                            count = 0 # reset the count
                            goodFiles.append(csvfile.name)
                        else:
                            # didn't count enough, so stop working through this file
                            badFiles.append(csvfile.name)
                            break

                # non-matching rows
                else:
                    if match is None:
                        # ignore preceding lines in file
                        continue
                    else:
                        # found non-matching line when expecting a match
                        break
    else:
        if not 96 <= count < 110:
                    #there was at least successful run of lines
            goodFiles.remove(csvfile.name)

    # Create output files
    createFile(goodFiles, 'goodNano')
    createFile(badFiles, 'badNano')

def createFile(files, fName):
    fileName = open( fName + ".csv", "w" )
    for f in files:
        fileName.write( os.path.basename(f) )
        fileName.write("\n")


if __name__ == '__main__':
    main()