如何使用Python从一个文件到另一个文件搜索和替换文本?
我有一个文件file1.txt: 和另一个file2.txt: 我想要的是:如何使用Python从一个文件到另一个文件搜索和替换文本?,python,text-files,replace,Python,Text Files,Replace,我有一个文件file1.txt: 和另一个file2.txt: 我想要的是: I Show more flower you can see by link All is Beautyfull.You Can View Here ! Link View : http://lincoln.com/view/43645645/flower1.jpg http://lincoln.com/view/84344454/flower3456.jpg http://lincoln.com/v
I Show more flower you can see by link
All is Beautyfull.You Can View Here !
Link View :
http://lincoln.com/view/43645645/flower1.jpg
http://lincoln.com/view/84344454/flower3456.jpg
http://lincoln.com/view/43343433/flower56.jpg
http://lincoln.com/view/13424324/flower2.jpg
More Link VIew:
http://kashi.com/view/343434344/flower1.jpg
http://kashi.com/view/766454544/flower3456.jpg
http://kashi.com/view/32634545/flower56.jpg
http://kashi.com/view/84353453/flower2.jpg
++++++++++++++++++++++++++++++++++++++++
I Show more candy you can see by link
All is Beautyfull.You Can View Here !
http://photobucket.com
伪代码是:
if filename exists in file1 but not in file2:
remove filename
else if filename exists in file1 and in file2:
the version in file2 replaces the line in file1
else if filename exists in file2 but not in file1:
do nothing
add the links with the domaine name "http://kashi.com" from file2.txt
in a section "More link view"
add "++++++++++++++++++++++++++"
我尝试了这个算法:
def file_merge(file1name,file2name):
file1contents = list()
file2contents = list()
file1 = open(file1name, 'U')
for line in file1:
line = line.replace('\n','')
line = line.split('/')
file1contents.append(line)
file1.close()
file2 = open(file2name, 'U')
for line in file2:
line = line.replace('\n','')
line = line.split('/')
file2contents.append(line)
file2.close()
file3contents = file1contents
for x in file2contents:
for y in file1contents:
if x[-1] == y[-1]:
file3contents[file3contents.index(y)] = x
file3 = open(file1name,'w')
for line in file3contents:
file3.write(str('/'.join(line))+'\n')
file3.close()
file_merge('file1.txt','file2.txt')
谢谢
简单的是: 将file2.txt中的“filename”替换为file1.txt中的“filename” 并在关键字“++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++”之前将链接和“文件名”附加到内容中 作为: 试试这个
import urlparse
import os
def file_merge(file1name,file2name):
file1contents = list()
file2contents = list()
file1 = open(file1name, 'U')
file1contents = file1.readlines()
file1.close()
file2 = open(file2name, 'U')
file2contents = file2.readlines()
file2.close()
file3contents = []
for link in file2contents:
temp = urlparse.urlsplit(link)
dirname, filename = os.path.split(temp.path)
file3contents.append(link)
linkin1 = False
for l_link in file1contents[4:]:
if l_link.endswith(filename):
linkin1 = True
if not linkin1:
urllist = list(temp)
urllist[1] = 'kashi.com'
file3contents[-1] = urlparse.urlunsplit(urllist)
file3 = open(file1name,'w')
for line in file3contents:
file3.write(line)
file3.close()
file_merge('/tmp/file1.txt','/tmp/file2.txt')
试试这个
import urlparse
import os
def file_merge(file1name,file2name):
file1contents = list()
file2contents = list()
file1 = open(file1name, 'U')
file1contents = file1.readlines()
file1.close()
file2 = open(file2name, 'U')
file2contents = file2.readlines()
file2.close()
file3contents = []
for link in file2contents:
temp = urlparse.urlsplit(link)
dirname, filename = os.path.split(temp.path)
file3contents.append(link)
linkin1 = False
for l_link in file1contents[4:]:
if l_link.endswith(filename):
linkin1 = True
if not linkin1:
urllist = list(temp)
urllist[1] = 'kashi.com'
file3contents[-1] = urlparse.urlunsplit(urllist)
file3 = open(file1name,'w')
for line in file3contents:
file3.write(line)
file3.close()
file_merge('/tmp/file1.txt','/tmp/file2.txt')
请试试这个:
with open('file2.txt','r') as f2:
dic2 = {}
li2 = []
for line in f2:
spli = line.rstrip().replace('http://','').split('/')
dic2[(spli[0],spli[-1])] = line if line[-1]=='\n' else line+'\n'
li2.append((spli[0],spli[-1]))
with open('file1.txt','r') as f1,open('file3.txt','w') as f3:
itr1 = iter(f1)
for line in itr1:
f3.write(line)
if line.strip()=='':
break
for line in itr1:
if line.strip():
spli = line.rstrip().replace('http://','').split('/')
x = (spli[0],spli[-1])
if x in li2:
f3.write(dic2[x])
li2.remove((spli[0],spli[-1]))
klu = '\n' if line.rstrip()==line else ''
# to add a blank line if the last line wasn't '\n'-ended
f3.write(klu + '\nMore Link VIew:\n\n')
for remain in li2:
f3.write(dic2[remain])
f3.write('++++++++++++++++++++++++++++++++++++++++ ')
请试试这个:
with open('file2.txt','r') as f2:
dic2 = {}
li2 = []
for line in f2:
spli = line.rstrip().replace('http://','').split('/')
dic2[(spli[0],spli[-1])] = line if line[-1]=='\n' else line+'\n'
li2.append((spli[0],spli[-1]))
with open('file1.txt','r') as f1,open('file3.txt','w') as f3:
itr1 = iter(f1)
for line in itr1:
f3.write(line)
if line.strip()=='':
break
for line in itr1:
if line.strip():
spli = line.rstrip().replace('http://','').split('/')
x = (spli[0],spli[-1])
if x in li2:
f3.write(dic2[x])
li2.remove((spli[0],spli[-1]))
klu = '\n' if line.rstrip()==line else ''
# to add a blank line if the last line wasn't '\n'-ended
f3.write(klu + '\nMore Link VIew:\n\n')
for remain in li2:
f3.write(dic2[remain])
f3.write('++++++++++++++++++++++++++++++++++++++++ ')
这是有效的;然而,在我看来,这确实是一个非常奇怪的问题…对不起
from urlparse import urlparse
import os.path
def read_links2(f):
for line in f:
line = line.strip()
url = urlparse(line)
if url.scheme in ('http', 'https'):
key = (url.netloc, os.path.split(url.path)[1])
yield (key, url)
links2 = dict(read_links2(open('f2.txt', 'U')))
for line in open('f1.txt', 'U'):
line = line.rstrip()
url = urlparse(line)
if url.scheme in ('http', 'https'):
key = (url.netloc, os.path.split(url.path)[1])
if key in links2:
print links2[key].geturl()
else:
print line
print 'More Link VIew:'
for url in links2.values():
if url.netloc == 'kashi.com':
print url.geturl()
print '+++++++++++++++++++'
这是有效的;然而,在我看来,这确实是一个非常奇怪的问题…对不起
from urlparse import urlparse
import os.path
def read_links2(f):
for line in f:
line = line.strip()
url = urlparse(line)
if url.scheme in ('http', 'https'):
key = (url.netloc, os.path.split(url.path)[1])
yield (key, url)
links2 = dict(read_links2(open('f2.txt', 'U')))
for line in open('f1.txt', 'U'):
line = line.rstrip()
url = urlparse(line)
if url.scheme in ('http', 'https'):
key = (url.netloc, os.path.split(url.path)[1])
if key in links2:
print links2[key].geturl()
else:
print line
print 'More Link VIew:'
for url in links2.values():
if url.netloc == 'kashi.com':
print url.geturl()
print '+++++++++++++++++++'
你只是在检查“jpg”文件名是否匹配,但如果它们不匹配,你就没有考虑你的2个条件:“1>如果文件名存在于文件1中,但不存在于文件2中2>如果文件名存在于文件2中,但不存在于文件1中,那就是什么都不做,在这个线程中添加“@j3oy9x,你在自言自语,而不考虑写给你的内容。你前面的两个问题得到了9张反对票。你有一种独特的沟通方式。你应该考虑一下。就我个人而言,只要你的问题和编辑保持不变,我就不会对它们给予更多的关注。你应该考虑我在这里写的东西。祝你生活愉快。你只是检查“jpg”文件名是否匹配,但如果它们不匹配,你就没有考虑你的2个条件:“1>如果文件名存在于文件1中,但不存在于文件2中2>如果文件名存在于文件2中,但不存在于文件1中,则不必执行任何操作,并在该线程中添加“@j3oy9x,你在自言自语,没有考虑到写给你的东西。你前面的两个问题得到了9张反对票。你有一种独特的沟通方式。你应该考虑一下。就我个人而言,只要你的问题和编辑保持不变,我就不会对它们给予更多的关注。你应该考虑我在这里写的东西。祝你生活愉快。谢谢!我测试了你的代码。但它没有达到预期的结果。我所需要的是:如果文件名存在于文件1中,但不存在于文件2中,如果文件名存在于文件1和文件2中,则为删除;如果文件名存在于文件2中,但不存在于文件1中,则为文件2中的版本替换文件1中的行;如果文件名存在于文件2中,则为“不做任何事情”和“添加更多链接”视图跟随关键字之前的文件名“+++++++++++++++++++++++++++++++++++++++++++++++请您专门指定这3种情况,根据我的理解:案例1:文件名存在于文件1中,但不存在于文件2中-删除它案例2:文件名存在于文件1和文件2中-文件2的条目仍然存在案例3:文件名不存在于文件1中,但存在于文件2中-将kashi.com添加到域名中让我知道哪个案例失败。谢谢!我测试了你的代码。但它没有达到预期的结果。我所需要的是:如果文件名存在于文件1中,但不存在于文件2中,如果文件名存在于文件1和文件2中,则为删除;如果文件名存在于文件2中,但不存在于文件1中,则为文件2中的版本替换文件1中的行;如果文件名存在于文件2中,则为“不做任何事情”和“添加更多链接”视图跟随关键字之前的文件名“+++++++++++++++++++++++++++++++++++++++++++++请按照我的理解专门指定这3种情况:情况1:文件名存在于文件1中,但不存在于文件2中-删除它情况2:文件名存在于文件1和文件2中-文件名条目仍然存在情况3:文件名不存在于文件1中,但存在于文件2中-将kashi.com添加到域名中让我知道哪种情况失败。