如何有效地删除python目录?
我有以下Python代码如何有效地删除python目录?,python,Python,我有以下Python代码 """ Module to encapsulate body parsing. """ from urlparse import urlparse from bs4 import BeautifulSoup,Comment import os import shutil from hct import utils BASE_DIR = os.getcwd() PAGE_SOURCE_CMD = 'phantomas %s --page-source' FEO_PA
"""
Module to encapsulate body parsing.
"""
from urlparse import urlparse
from bs4 import BeautifulSoup,Comment
import os
import shutil
from hct import utils
BASE_DIR = os.getcwd()
PAGE_SOURCE_CMD = 'phantomas %s --page-source'
FEO_PAGE_SOURCE_CMD = 'phantomjs RequestURL.js %s > body.html'
class Extractor(object):
"""
This file provides utility to do body parsing of an url.
"""
def __init__(self):
pass
def check_tags(self, tags, feed):
"""
Method: Method to handle the tags as encountered during parsing.
Also contains the business logic to check to prefetch and
preresolve DNS eanblement
Args: Takes the tag and its attributes as a list
Returns: A dictionary of tags and their values.
"""
result = {}
for tag in tags:
if len(feed.select('link[rel='+tag+']')) > 0:
result['link'] = tag
return result
def get_generated_html(self, url, has_headers):
"""
Method: Method to get the generated HTML content from Phantomas.
Args: Takes the url as an argument for which to get the HTML content.
hasHeaders defaulted to false for no headers.
Returns: Nothing.
"""
if not urlparse(url).scheme:
url = 'http://'+url
if has_headers == False:
command = PAGE_SOURCE_CMD % url
else:
command = FEO_PAGE_SOURCE_CMD % url
utils.execute_command(command).communicate()
def create_analysis_folder(self, analysis_id, has_headers):
if not os.path.exists(os.path.join(BASE_DIR, analysis_id)):
os.makedirs(os.path.join(BASE_DIR,analysis_id))
path = os.path.join(BASE_DIR, analysis_id, 'html')
if has_headers:
os.makedirs(path)
shutil.copy(os.path.join(BASE_DIR, "RequestURL.js"), path)
return path
"""
def create_analysis_folder(self, analysis_id, has_headers):
Method: To create a folder to fetch and analyse the HTML based on
analysis ID.
Args: Takes the Analsis ID as an argument.
Returns: The path to the created folder.
analysis_id = str(analysis_id)
path = None
if not os.path.exists(analysis_id):
os.makedirs(analysis_id)
os.chdir(analysis_id)
if has_headers == False:
path = os.getcwd() + '/html'
print path
return path
else:
print "coming here"
os.makedirs('html')
os.chdir('html')
shutil.copy("../../hct/data_processors/RequestURL.js", os.getcwd())
return os.getcwd()
"""
def start_parser(self, analysis_id, url, hasHeaders=False):
"""
Method: Method to start the parser.
Args: Analsyis ID and URL as an argument.
Returns: Nothing.
"""
feed = None
analysis_id = str(analysis_id)
path = self.create_analysis_folder(analysis_id, hasHeaders)
os.chdir(path)
self.get_generated_html(url, hasHeaders)
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith('.html'):
feed = BeautifulSoup(open(path + '/' +file).read())
print feed
if hasHeaders:
os.chdir('..')
#shutil.rmtree(path)
break
return feed
我根据一些条件创建文件夹,下载HTML源代码,然后在解析后删除文件夹。此模块将被调用多次。问题是,它第一次就可以正常工作。但是第二次,行#shutil.rmtree(path)
抛出错误没有这样的文件或目录
,因为它试图在不存在的目录中创建一个目录,当第一次调用该方法时,该目录会被删除
我如何克服这个问题,因为我必须删除目录,否则它将淹没服务器内存如果
shutil.rmtree(path)
引发“没有这样的文件或目录”,您可以在shutil.rmtree()之后立即使用os.mkdir()
重新创建目录对于路径
目录本身来说,它可能是shutil.rmtree()
中的一个bug——它应该很高兴有人为它做了工作——它应该忽略这些错误
您可以编写一个解决此问题的包装器:
import errno
import shutil
def rmtree(path):
try:
return shutil.rmtree(path)
except OSError as e:
if e.errno == errno.ENOENT and e.filename == path:
pass # path is gone already -- ignore the exception
else:
raise
无论路径是否存在,您都可以调用rmtree(path)
你也可以使用同样的效果