Python 根据条件将tsv的每一行复制到另一个tsv
我有一个目录,比如说Python 根据条件将tsv的每一行复制到另一个tsv,python,csv,text,tsv,Python,Csv,Text,Tsv,我有一个目录,比如说/var/work/X/,其中有数百个tsv文件 这些文件名为call1.tsv、call2.tsv、call3.tsv等 一个tsv如下所示(称为call1.tsv): 我想遍历每一行,只选取第三列为reply或field3=reply的行,并将其保存在另一个目录中同名的文件中/var/work/processed/ 最后,我需要在/var/work/processed/call1.tsv field1 field2 field3 field4 field5 xyz tt
/var/work/X/
,其中有数百个tsv文件
这些文件名为call1.tsv、call2.tsv、call3.tsv
等
一个tsv如下所示(称为call1.tsv):
我想遍历每一行,只选取第三列为reply或field3=reply的行,并将其保存在另一个目录中同名的文件中/var/work/processed/
最后,我需要在/var/work/processed/call1.tsv
field1 field2 field3 field4 field5
xyz tts reply sms 'hi'
mno server reply sms 'done'
我需要像这样检查所有TSV。
请帮我查一下密码
import os, sys,glob,codecs
import csv
csv.field_size_limit(sys.maxint)
input_dir = "/var/work/X"
print input_dir
output_dir= "/var/work/processed"
print output_dir
# Get names of all tsv files
tsvs= glob.glob(os.path.join(input_dir,'*.tsv'))
for tsvfile in tsvs:
outtsvfile=str(tsvfile).split('/')[-1]
print outtsvfile
data=csv.reader(open(tsvfile,'rb'),delimiter = "\t")
try:
with open(os.path.join(output_dir, outtsvfile)) as outputfile:
csvwriter=csv.writer(outputfile,delimiter='\t')
for row in data:
if "reply" in row[2]:
csvwriter.writerow(row)
except csv.Error as e:
print "%s" %e
print "%s" %traceback.format_exc()
我得到:值错误:对关闭的文件执行I?O操作使用模块:
对输入目录中的所有文件执行此操作。要列出并循环浏览输入目录中的所有tsv文件,您可以使用
os
modules'方法。我认为您的第一个问题是
with open(os.path.join(output_dir, outtsvfile)) as outputfile:
csvwriter=csv.writer(outputfile,delimiter='\t')
for row in data: # <= bad indentation!
if "reply" in row[2]:
csvwriter.writerow(row)
您的文件处理代码应该是
import csv
import sys
if sys.hexversion < 0x3000000:
READ_MODE = "rb" # Python 2.x
WRITE_MODE = "wb"
else:
READ_MODE = "rU" # Python 3.x
WRITE_MODE = "wU"
def read_csv(fname, skip_header=False, **kwargs):
with open(fname, READ_MODE) as inf:
incsv = csv.reader(inf, **kwargs)
if skip_header:
next(incsv, None)
for row in incsv: # or 'yield from incsv' if your Python supports it
yield row #
def write_csv(fname, rows, header=None, **kwargs):
with open(fname, WRITE_MODE) as outf:
outcsv = csv.writer(outf, **kwargs)
if header:
outcsv.writerow(header)
outcsv.writerows(rows)
def process_file(in_file, out_file):
data = read_csv(in_file, delimiter="\t")
header = next(data, [])
filtered = (row for row in data if row[2] == "reply")
write_csv(out_file, filtered, header=header, delimiter="\t")
导入csv
导入系统
如果sys.hexversion<0x3000000:
READ_MODE=“rb”#Python 2.x
写入模式=“wb”
其他:
READ_MODE=“rU”#Python 3.x
写入_MODE=“wU”
def read_csv(fname,skip_header=False,**kwargs):
以open(fname,读取模式)作为inf:
incsv=csv.reader(inf,**kwargs)
如果跳过标题:
下一个(incsv,无)
对于incsv中的行:#或“从incsv中获得收益”,如果Python支持的话
产量行#
def write_csv(fname,rows,header=None,**kwargs):
以open(fname,WRITE_模式)作为输出:
outcsv=csv.writer(outp,**kwargs)
如果标题为:
outcsv.writerow(标题)
outcsv.writerows(行)
def进程文件(in_文件,out_文件):
数据=读取\u csv(在\u文件中,分隔符=“\t”)
页眉=下一个(数据,[])
过滤=(如果行[2]=“回复”,则数据中的行对应行)
写入csv(输出文件,过滤,头=头,分隔符=“\t”)
尝试一些东西并展示出来。我们将帮助改进。这里的内容就像一个需求说明或家庭作业。“请帮我写代码。”当然!发布你的代码,我们很乐意帮助你。你试过什么吗?我们会帮助您,但不会为您编写代码。查看csv
模块,了解如何很好地读取csv文件。另外,我假设您的意思是field3=reply
?在这些示例中,您的字段4是sms
。
with open(os.path.join(output_dir, outtsvfile)) as outputfile:
csvwriter=csv.writer(outputfile,delimiter='\t')
for row in data: # <= bad indentation!
if "reply" in row[2]:
csvwriter.writerow(row)
IN_DIR = "/var/work/X/"
OUT_DIR = "/var/work/processed/"
def get_file_names(dir, ext=""):
for fname in os.listdir(dir):
if fname.endswith(ext) and os.path.isfile(fname):
yield fname
def process_file(in_file, out_file):
# your file-processing code goes here
print("{} => {}".format(in_file, out_file))
def main():
for fname in get_file_names(IN_DIR, ".tsv"):
process_file(
os.path.join(IN_DIR, fname),
os.path.join(OUT_DIR, fname)
)
if __name__=="__main__":
main()
import csv
import sys
if sys.hexversion < 0x3000000:
READ_MODE = "rb" # Python 2.x
WRITE_MODE = "wb"
else:
READ_MODE = "rU" # Python 3.x
WRITE_MODE = "wU"
def read_csv(fname, skip_header=False, **kwargs):
with open(fname, READ_MODE) as inf:
incsv = csv.reader(inf, **kwargs)
if skip_header:
next(incsv, None)
for row in incsv: # or 'yield from incsv' if your Python supports it
yield row #
def write_csv(fname, rows, header=None, **kwargs):
with open(fname, WRITE_MODE) as outf:
outcsv = csv.writer(outf, **kwargs)
if header:
outcsv.writerow(header)
outcsv.writerows(rows)
def process_file(in_file, out_file):
data = read_csv(in_file, delimiter="\t")
header = next(data, [])
filtered = (row for row in data if row[2] == "reply")
write_csv(out_file, filtered, header=header, delimiter="\t")