如何有效地删除python目录?

如何有效地删除python目录?,python,Python,我有以下Python代码 """ Module to encapsulate body parsing. """ from urlparse import urlparse from bs4 import BeautifulSoup,Comment import os import shutil from hct import utils BASE_DIR = os.getcwd() PAGE_SOURCE_CMD = 'phantomas %s --page-source' FEO_PA

我有以下Python代码

"""
Module to encapsulate body parsing.
"""

from urlparse import urlparse
from bs4 import BeautifulSoup,Comment
import os
import shutil
from hct import utils


BASE_DIR = os.getcwd()
PAGE_SOURCE_CMD = 'phantomas %s --page-source'
FEO_PAGE_SOURCE_CMD = 'phantomjs  RequestURL.js %s > body.html'


class Extractor(object):
    """
    This file provides utility to do body parsing of an url.
    """

    def __init__(self):
        pass

    def check_tags(self, tags, feed):
        """
        Method: Method to handle the tags as encountered during parsing.
                Also contains the business logic to check to prefetch and
                preresolve DNS eanblement

        Args: Takes the tag and its attributes as a list

        Returns: A dictionary of tags and their values.
        """
        result = {}
        for tag in tags:
            if len(feed.select('link[rel='+tag+']')) > 0:
                result['link'] = tag
        return result

    def get_generated_html(self, url, has_headers):
        """
        Method: Method to get the generated HTML content from Phantomas.

        Args: Takes the url as an argument for which to get the HTML content.
              hasHeaders defaulted to false for no headers.

        Returns: Nothing.
        """
        if not urlparse(url).scheme:
            url = 'http://'+url
        if has_headers == False:
            command = PAGE_SOURCE_CMD % url
        else:
            command = FEO_PAGE_SOURCE_CMD % url
        utils.execute_command(command).communicate()

    def create_analysis_folder(self, analysis_id, has_headers):

        if not os.path.exists(os.path.join(BASE_DIR, analysis_id)):
            os.makedirs(os.path.join(BASE_DIR,analysis_id))
        path = os.path.join(BASE_DIR, analysis_id, 'html')
        if has_headers:
            os.makedirs(path)
            shutil.copy(os.path.join(BASE_DIR, "RequestURL.js"), path) 
        return path

    """
    def create_analysis_folder(self, analysis_id, has_headers):

        Method: To create a folder to fetch and analyse the HTML based on
                analysis ID.

        Args: Takes the Analsis ID as an argument.

        Returns: The path to the created folder.


        analysis_id = str(analysis_id)
        path = None
        if not os.path.exists(analysis_id):
            os.makedirs(analysis_id)
        os.chdir(analysis_id)
        if has_headers == False:
            path = os.getcwd() + '/html'
            print path
            return path
        else:
            print "coming here"
            os.makedirs('html')
            os.chdir('html')
            shutil.copy("../../hct/data_processors/RequestURL.js", os.getcwd()) 
            return os.getcwd()
    """

    def start_parser(self, analysis_id, url, hasHeaders=False):
        """
        Method: Method to start the parser.

        Args: Analsyis ID and URL as an argument.

        Returns: Nothing.
        """

        feed = None
        analysis_id = str(analysis_id)
        path = self.create_analysis_folder(analysis_id, hasHeaders)
        os.chdir(path)
        self.get_generated_html(url, hasHeaders)
        for root, dirs, files in os.walk(path):
            for file in files:
                if file.endswith('.html'):
                    feed = BeautifulSoup(open(path + '/' +file).read())
                    print feed
                    if hasHeaders:
                        os.chdir('..')
                    #shutil.rmtree(path)

            break
        return feed
我根据一些条件创建文件夹,下载HTML源代码,然后在解析后删除文件夹。此模块将被调用多次。问题是,它第一次就可以正常工作。但是第二次,行
#shutil.rmtree(path)
抛出错误
没有这样的文件或目录
,因为它试图在不存在的目录中创建一个目录,当第一次调用该方法时,该目录会被删除


我如何克服这个问题,因为我必须删除目录,否则它将淹没服务器内存

如果
shutil.rmtree(path)
引发“没有这样的文件或目录”,您可以在
shutil.rmtree()之后立即使用
os.mkdir()
重新创建目录对于
路径
目录本身来说,它可能是
shutil.rmtree()
中的一个bug——它应该很高兴有人为它做了工作——它应该忽略这些错误

您可以编写一个解决此问题的包装器:

import errno
import shutil

def rmtree(path):
    try:
        return shutil.rmtree(path)
    except OSError as e:
        if e.errno == errno.ENOENT and e.filename == path:
            pass # path is gone already -- ignore the exception
        else:
            raise
无论路径是否存在,您都可以调用
rmtree(path)

你也可以使用同样的效果