Python 全局变量重置在Google应用程序引擎中不起作用_Python_Google App Engine_Global Variables_Webapp2

Python 全局变量重置在Google应用程序引擎中不起作用

python google-app-engine

Python 全局变量重置在Google应用程序引擎中不起作用,python,google-app-engine,global-variables,webapp2,Python,Google App Engine,Global Variables,Webapp2,我正在从GAE中的处理程序调用一个web爬行函数，它检索一些图像，然后显示它们。它在第一次调用时工作正常，但下一次它显示所有相同的图像时，爬虫程序将从上次调用的位置启动。我认为这是我的全局变量没有正确重置的问题每次我重新部署应用程序时，它都会在第一次正确运行，但问题开始了这是我的代码，请让我知道，如果你需要我澄清它，但我认为它应该是有意义的这里是刮刀功能 visited_pages = [] visit_queue = deque([]) collected_pages = [] coll

我正在从GAE中的处理程序调用一个web爬行函数，它检索一些图像，然后显示它们。它在第一次调用时工作正常，但下一次它显示所有相同的图像时，爬虫程序将从上次调用的位置启动。我认为这是我的全局变量没有正确重置的问题

每次我重新部署应用程序时，它都会在第一次正确运行，但问题开始了

这是我的代码，请让我知道，如果你需要我澄清它，但我认为它应该是有意义的

这里是刮刀功能

visited_pages = []
visit_queue = deque([])
collected_pages = []
collected_pics = []
count = 0
pic_count = 0

def scrape_pages(url, root_url, keywords=[], recurse=True):
    #variables
    max_count = 16
    pic_num = 100

    global count
    global pic_count
    global collected_pics
    global collected_pages

    print 'the keywords and url are'
    print keywords
    print url

    #this is all of the links that have been scraped
    the_links = []

    soup = soupify_url(url)

    #only add new pages onto the queue if the recursion argument is true    
    if recurse:
        #find all the links on the page
        try:
            for tag in soup.findAll('a'):
                the_links.append(tag.get('href'))
        except AttributeError:
            return

        try:
            external_links, internal_links, root_links, primary_links = categorize_links(the_links, url, root_url)
        except TypeError:
            return


        #change it so this depends on the input
        links_to_visit = external_links + internal_links + root_links

        #build the queue
        for link in links_to_visit:
            if link not in visited_pages and link not in visit_queue:
                visit_queue.append(link)

    visited_pages.append(url)
    count = count + 1
#    print 'number of pages visited'
#    print count

    #add pages to collected_pages depending on the criteria given if any keywords are given
    if keywords:
        page_to_add = find_pages(url, soup, keywords)

#        print 'page to add'
#        print page_to_add
        if page_to_add and page_to_add not in collected_pages:
            collected_pages.append(page_to_add)


    pics_to_add = add_pics(url, soup)
#    print 'pics to add'
#    print pics_to_add
    if pics_to_add:
        collected_pics.extend(pics_to_add)

    #here is where the actual recursion happens by finishing the queue
    while visit_queue:
        if count >= max_count:
            return

        if pic_count > pic_num:
            return

        link = visit_queue.popleft()
#        print link
        scrape_pages(link, root_url, keywords)

#    print '***done***'
    ###done with the recursive scraping function here

#here I just get a list of links from Bing, add them to the queue and go through them then reset all the global variables
def scrape_bing_src(keywords):
    visit_queue, the_url = scrape_bing.get_links(keywords, a_list = False)
    scrape_pages(visit_queue.popleft(), the_url, keywords, recurse=True)

    global collected_pics
    global pic_count
    global count
    global visited_pages
    global visit_queue

    pic_count = 0
    count = 0
    visited_pages = []
    visit_queue = deque([])

    pics_to_return = collected_pics
    collected_pics = []
    return pics_to_return

#this just simply displays the images
class Try(BlogHandler):
    def get(self, keyword):
        keyword = str(keyword)
        keyword_list = keyword.split()
        img_list = scraper.scrape_bing_src(keyword_list)

        for img in img_list:
            self.response.write("""<br><img src='""" + img + """'>""")

        self.response.write('we are done here')

下面是调用scraper函数的处理程序

visited_pages = []
visit_queue = deque([])
collected_pages = []
collected_pics = []
count = 0
pic_count = 0

def scrape_pages(url, root_url, keywords=[], recurse=True):
    #variables
    max_count = 16
    pic_num = 100

    global count
    global pic_count
    global collected_pics
    global collected_pages

    print 'the keywords and url are'
    print keywords
    print url

    #this is all of the links that have been scraped
    the_links = []

    soup = soupify_url(url)

    #only add new pages onto the queue if the recursion argument is true    
    if recurse:
        #find all the links on the page
        try:
            for tag in soup.findAll('a'):
                the_links.append(tag.get('href'))
        except AttributeError:
            return

        try:
            external_links, internal_links, root_links, primary_links = categorize_links(the_links, url, root_url)
        except TypeError:
            return


        #change it so this depends on the input
        links_to_visit = external_links + internal_links + root_links

        #build the queue
        for link in links_to_visit:
            if link not in visited_pages and link not in visit_queue:
                visit_queue.append(link)

    visited_pages.append(url)
    count = count + 1
#    print 'number of pages visited'
#    print count

    #add pages to collected_pages depending on the criteria given if any keywords are given
    if keywords:
        page_to_add = find_pages(url, soup, keywords)

#        print 'page to add'
#        print page_to_add
        if page_to_add and page_to_add not in collected_pages:
            collected_pages.append(page_to_add)


    pics_to_add = add_pics(url, soup)
#    print 'pics to add'
#    print pics_to_add
    if pics_to_add:
        collected_pics.extend(pics_to_add)

    #here is where the actual recursion happens by finishing the queue
    while visit_queue:
        if count >= max_count:
            return

        if pic_count > pic_num:
            return

        link = visit_queue.popleft()
#        print link
        scrape_pages(link, root_url, keywords)

#    print '***done***'
    ###done with the recursive scraping function here

#here I just get a list of links from Bing, add them to the queue and go through them then reset all the global variables
def scrape_bing_src(keywords):
    visit_queue, the_url = scrape_bing.get_links(keywords, a_list = False)
    scrape_pages(visit_queue.popleft(), the_url, keywords, recurse=True)

    global collected_pics
    global pic_count
    global count
    global visited_pages
    global visit_queue

    pic_count = 0
    count = 0
    visited_pages = []
    visit_queue = deque([])

    pics_to_return = collected_pics
    collected_pics = []
    return pics_to_return

#this just simply displays the images
class Try(BlogHandler):
    def get(self, keyword):
        keyword = str(keyword)
        keyword_list = keyword.split()
        img_list = scraper.scrape_bing_src(keyword_list)

        for img in img_list:
            self.response.write("""<br><img src='""" + img + """'>""")

        self.response.write('we are done here')

#这只是简单地显示图像
类Try（BlogHandler）：
def get（自我，关键字）：
关键字=str（关键字）
关键字列表=关键字.split（）
img\u list=scraper.scrape\u bing\u src（关键字列表）
对于img_列表中的img：
self.response.write（“
”）
self.response.write（'我们到此结束'）

您的代码并不是只在一个“服务器”和一个实例中运行，您可能已经注意到管理控制台中的“实例”选项卡。所以，即使在两次调用之间，您也有可能切换到不同的服务器，或者进程将“重新启动”（您可以读取更多）。在预热过程中，应用程序将从磁盘读取到内存中，然后开始处理请求。因此，每次您使用自己的全局变量值获取新的预缓存python实例时

在您的情况下，最好使用。

可能重复伟大的资源，但似乎有相当多的信息，其中一些相互冲突。您认为最好将这些变量设置为None，还是将调用它的函数设置在原始类之外并让它返回它？没有两种方法：在函数定义中使用

keywords=None

。不，我不希望全局变量停留在实例之间。我想我只需要改变它，不再使用全局变量。全局变量不会“停留”在实例之间。然而，一旦你的应用程序被加载到一个实例上，程序就会一直运行直到关闭——这意味着在你的处理程序处理完一个请求后，全局变量是活动的。听起来你肯定不想要这里的全局数据。