python程序在运行几天时会冻结_Python_Freeze

python程序在运行几天时会冻结

python

python程序在运行几天时会冻结,python,freeze,Python,Freeze,问题是：我正在编写一个python程序，其目的是不断地从RSS提要收集新闻。我想让程序收集一周的数据。问题是，该计划永远不会持续到周末。有时在运行几天、有时几个小时甚至几分钟后会结冰。它总是冻结，没有错误。当我说冻结时，我的意思是解释器似乎还在运行，因为我不能给它任何额外的命令。我怎样才能解决这个问题我将在下面发布代码。谢谢大家 from goose import Goose from requests import get import urllib2 import feedparser

问题是：

我正在编写一个python程序，其目的是不断地从RSS提要收集新闻。我想让程序收集一周的数据。问题是，该计划永远不会持续到周末。有时在运行几天、有时几个小时甚至几分钟后会结冰。它总是冻结，没有错误。当我说冻结时，我的意思是解释器似乎还在运行，因为我不能给它任何额外的命令。我怎样才能解决这个问题

我将在下面发布代码。谢谢大家

from goose import Goose
from requests import get
import urllib2
import feedparser
from urllib2 import urlopen
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import re
import datetime as dt
import time
import os
Symbols=['AAPL','T','BA','XOM','GOOG','JPM','PG','WMT']
url='http://finance.yahoo.com/rss/headline?s='

for t in xrange(7):
    AAPL=pd.DataFrame()
    AAPL['Published']=""
    AAPL['Title']=""
    AAPL['link']=""
    AAPL['ID']=""
    AAPL['News']=""

    T=pd.DataFrame()
    T['Published']=""
    T['Title']=""
    T['link']=""
    T['ID']=""
    T['News']=""

    BA=pd.DataFrame()
    BA['Published']=""
    BA['Title']=""
    BA['link']=""
    BA['ID']=""
    BA['News']=""

    XOM=pd.DataFrame()
    XOM['Published']=""
    XOM['Title']=""
    XOM['link']=""
    XOM['ID']=""
    XOM['News']=""

    GOOG=pd.DataFrame()
    GOOG['Published']=""
    GOOG['Title']=""
    GOOG['link']=""
    GOOG['ID']=""
    GOOG['News']=""

    JPM=pd.DataFrame()
    JPM['Published']=""
    JPM['Title']=""
    JPM['link']=""
    JPM['ID']=""
    JPM['News']=""

    PG=pd.DataFrame()
    PG['Published']=""
    PG['Title']=""
    PG['link']=""
    PG['ID']=""
    PG['News']=""

    WMT=pd.DataFrame()
    WMT['Published']=""
    WMT['Title']=""
    WMT['link']=""
    WMT['ID']=""
    WMT['News']=""




    DaysIDsAAPL=[]
    DaysIDsT=[]
    DaysIDsBA=[]
    DaysIDsXOM=[]
    DaysIDsGOOG=[]
    DaysIDsJPM=[]
    DaysIDsPG=[]
    DaysIDsWMT=[]



    count=0

    AAPLCount=0
    TCount=0
    BACount=0
    XOMCount=0
    GOOGCount=0
    JPMCount=0
    PGCount=0
    WMTCount=0

    date=dt.date.today()

    newpathAAPL = r'D:\News Data\AAPL\\'+str(t)
    newpathT = r'D:\News Data\T\\'+str(t)
    newpathBA = r'D:\News Data\BA\\'+str(t)
    newpathXOM = r'D:\News Data\XOM\\'+str(t)
    newpathGOOG = r'D:\News Data\GOOG\\'+str(t)
    newpathJPM = r'D:\News Data\JPM\\'+str(t)
    newpathPG = r'D:\News Data\PG\\'+str(t)
    newpathWMT = r'D:\News Data\WMT\\'+str(t)
    os.makedirs(newpathAAPL)
    os.makedirs(newpathT)
    os.makedirs(newpathBA)
    os.makedirs(newpathXOM)
    os.makedirs(newpathGOOG)
    os.makedirs(newpathJPM)
    os.makedirs(newpathPG)
    os.makedirs(newpathWMT)
    while dt.date.today()==date:
        print "Loop"
        try:
        #AAPL inner most loop
            d1=feedparser.parse(url+Symbols[0])  
            for x in xrange(len(d1['entries'])):
                if int(d1.entries[x]['id'][14:]) not in DaysIDsAAPL:
                    DaysIDsAAPL.append(int(d1.entries[x]['id'][14:]))
                    y = len(AAPL.index.tolist())
                    m=re.search(r'\*(.*)',d1.entries[x]['link'])
                    z=re.search(r'\?ru=yahoo\?mod=yahoo_itp',m.group(1))
                    if type(z) is not None:
                        m=re.sub(r'\?ru=yahoo\?mod=yahoo_itp', '', m.group(1))
                    AAPL.loc[y,'Title'] =d1.entries[x]['title'].encode('utf8')
                    AAPL.loc[y,'link'] =m.encode('utf8')
                    AAPL.loc[y,'Published'] =d1.entries[x]['published'].encode('utf8')
                    AAPL.loc[y,'ID'] =int(d1.entries[x]['id'][14:])
                    hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
                    page = get(m,headers=hdr)
                    extractor = Goose()
                    article = extractor.extract(raw_html=page.text)
                    text = article.cleaned_text.encode('utf8')
                    if text == '':
                        try:
                            url2 = m
                            req = urllib2.Request(url2, None, hdr)
                            html2 = urlopen(req).read().decode('utf8')
                            raw = BeautifulSoup(html2,"lxml").get_text().encode('utf8')
                            Text_file = open(newpathAAPL+r"\\"+str(AAPLCount)+".txt", "w")
                            Text_file.write(raw)
                            Text_file.close()
                            AAPL.loc[y,'News'] = AAPLCount
                            AAPLCount+=1
                            AAPL=AAPL.fillna("")
                            AAPL.to_csv(newpathAAPL+r'\Key.csv')
                        except:
                            print m
                            print "AAPL"
                    else:
                        Text_file = open(newpathAAPL+r"\\"+str(AAPLCount)+".txt", "w")
                        Text_file.write(text)
                        Text_file.close()
                        AAPL.loc[y,'News'] =AAPLCount
                        AAPLCount+=1
                        AAPL=AAPL.fillna("")
                        AAPL.to_csv(newpathAAPL+r'\Key.csv')
                    print "AAPL"

            #T inner most loop
            d2=feedparser.parse(url+Symbols[1])

            for x in xrange(len(d2['entries'])):
                if int(d2.entries[x]['id'][14:]) not in DaysIDsT:
                    DaysIDsT.append(int(d2.entries[x]['id'][14:]))
                    y = len(T.index.tolist())
                    m=re.search(r'\*(.*)',d2.entries[x]['link'])
                    z=re.search(r'\?ru=yahoo\?mod=yahoo_itp',m.group(1))
                    if type(z) is not None:
                        m=re.sub(r'\?ru=yahoo\?mod=yahoo_itp', '', m.group(1))
                    T.loc[y,'Title'] =d2.entries[x]['title'].encode('utf8')
                    T.loc[y,'link'] =m.encode('utf8')
                    T.loc[y,'Published'] =d2.entries[x]['published'].encode('utf8')
                    T.loc[y,'ID'] =int(d2.entries[x]['id'][14:])
                    hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
                    page = get(m,headers=hdr)
                    extractor = Goose()
                    article = extractor.extract(raw_html=page.text)
                    text = article.cleaned_text.encode('utf8')
                    if text == '':
                        try:
                            url2 = m
                            req = urllib2.Request(url2, None, hdr)
                            html2 = urlopen(req).read().decode('utf8')
                            raw = BeautifulSoup(html2,"lxml").get_text().encode('utf8')
                            Text_file = open(newpathT+r"\\"+str(TCount)+".txt", "w")
                            Text_file.write(raw)
                            Text_file.close()
                            T.loc[y,'News'] = TCount
                            TCount+=1
                            T=T.fillna("")
                            T.to_csv(newpathT+r'\Key.csv')
                        except:
                            print m
                            print "T"
                    else:
                        Text_file = open(newpathT+r"\\"+str(TCount)+".txt", "w")
                        Text_file.write(text)
                        Text_file.close()
                        T.loc[y,'News'] =TCount
                        TCount+=1
                        T=T.fillna("")
                        T.to_csv(newpathT+r'\Key.csv')
                    print "T"

            #BA inner most loop
            d3=feedparser.parse(url+Symbols[2])

            for x in xrange(len(d3['entries'])):
                if int(d3.entries[x]['id'][14:]) not in DaysIDsBA:
                    DaysIDsBA.append(int(d3.entries[x]['id'][14:]))
                    y = len(BA.index.tolist())
                    m=re.search(r'\*(.*)',d3.entries[x]['link'])
                    z=re.search(r'\?ru=yahoo\?mod=yahoo_itp',m.group(1))
                    if type(z) is not None:
                        m=re.sub(r'\?ru=yahoo\?mod=yahoo_itp', '', m.group(1))
                    BA.loc[y,'Title'] =d3.entries[x]['title'].encode('utf8')
                    BA.loc[y,'link'] =m.encode('utf8')
                    BA.loc[y,'Published'] =d3.entries[x]['published'].encode('utf8')
                    BA.loc[y,'ID'] =int(d3.entries[x]['id'][14:])
                    hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
                    page = get(m,headers=hdr)
                    extractor = Goose()
                    article = extractor.extract(raw_html=page.text)
                    text = article.cleaned_text.encode('utf8')
                    if text == '':
                        try:
                            url2 = m
                            req = urllib2.Request(url2, None, hdr)
                            html2 = urlopen(req).read().decode('utf8')
                            raw = BeautifulSoup(html2,"lxml").get_text().encode('utf8')
                            Text_file = open(newpathBA+r"\\"+str(BACount)+".txt", "w")
                            Text_file.write(raw)
                            Text_file.close()
                            BA.loc[y,'News'] = BACount
                            BACount+=1
                            BA=BA.fillna("")
                            BA.to_csv(newpathBA+r'\Key.csv')
                        except:
                            print m
                            print "BA"
                    else:
                        Text_file = open(newpathBA+r"\\"+str(BACount)+".txt", "w")
                        Text_file.write(text)
                        Text_file.close()
                        BA.loc[y,'News'] =BACount
                        BACount+=1
                        BA=BA.fillna("")
                        BA.to_csv(newpathBA+r'\Key.csv')
                    print "BA"

            #XOM inner most loop
            d4=feedparser.parse(url+Symbols[3])

            for x in xrange(len(d4['entries'])):
                if int(d4.entries[x]['id'][14:]) not in DaysIDsXOM:
                    DaysIDsXOM.append(int(d4.entries[x]['id'][14:]))
                    y = len(XOM.index.tolist())
                    m=re.search(r'\*(.*)',d4.entries[x]['link'])
                    z=re.search(r'\?ru=yahoo\?mod=yahoo_itp',m.group(1))
                    if type(z) is not None:
                        m=re.sub(r'\?ru=yahoo\?mod=yahoo_itp', '', m.group(1))
                    XOM.loc[y,'Title'] =d4.entries[x]['title'].encode('utf8')
                    XOM.loc[y,'link'] =m.encode('utf8')
                    XOM.loc[y,'Published'] =d4.entries[x]['published'].encode('utf8')
                    XOM.loc[y,'ID'] =int(d4.entries[x]['id'][14:])
                    hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
                    page = get(m,headers=hdr)
                    extractor = Goose()
                    article = extractor.extract(raw_html=page.text)
                    text = article.cleaned_text.encode('utf8')
                    if text == '':
                        try:
                            url2 = m
                            req = urllib2.Request(url2, None, hdr)
                            html2 = urlopen(req).read().decode('utf8')
                            raw = BeautifulSoup(html2,"lxml").get_text().encode('utf8')
                            Text_file = open(newpathXOM+r"\\"+str(XOMCount)+".txt", "w")
                            Text_file.write(raw)
                            Text_file.close()
                            XOM.loc[y,'News'] = XOMCount
                            XOMCount+=1
                            XOM=XOM.fillna("")
                            XOM.to_csv(newpathXOM+r'\Key.csv')
                        except:
                            print m
                            print "XOM"
                    else:
                        Text_file = open(newpathXOM+r"\\"+str(XOMCount)+".txt", "w")
                        Text_file.write(text)
                        Text_file.close()
                        XOM.loc[y,'News'] =XOMCount
                        XOMCount+=1
                        XOM=XOM.fillna("")
                        XOM.to_csv(newpathXOM+r'\Key.csv')

            #GOOG inner most loop
            d5=feedparser.parse(url+Symbols[4])

            for x in xrange(len(d5['entries'])):
                if int(d5.entries[x]['id'][14:]) not in DaysIDsGOOG:
                    DaysIDsGOOG.append(int(d5.entries[x]['id'][14:]))
                    y = len(GOOG.index.tolist())
                    m=re.search(r'\*(.*)',d5.entries[x]['link'])
                    z=re.search(r'\?ru=yahoo\?mod=yahoo_itp',m.group(1))
                    if type(z) is not None:
                        m=re.sub(r'\?ru=yahoo\?mod=yahoo_itp', '', m.group(1))
                    GOOG.loc[y,'Title'] =d5.entries[x]['title'].encode('utf8')
                    GOOG.loc[y,'link'] =m.encode('utf8')
                    GOOG.loc[y,'Published'] =d5.entries[x]['published'].encode('utf8')
                    GOOG.loc[y,'ID'] =int(d5.entries[x]['id'][14:])
                    hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
                    page = get(m,headers=hdr)
                    extractor = Goose()
                    article = extractor.extract(raw_html=page.text)
                    text = article.cleaned_text.encode('utf8')
                    if text == '':
                        try:
                            url2 = m
                            req = urllib2.Request(url2, None, hdr)
                            html2 = urlopen(req).read().decode('utf8')
                            raw = BeautifulSoup(html2,"lxml").get_text().encode('utf8')
                            Text_file = open(newpathGOOG+r"\\"+str(GOOGCount)+".txt", "w")
                            Text_file.write(raw)
                            Text_file.close()
                            GOOG.loc[y,'News'] = GOOGCount
                            GOOGCount+=1
                            GOOG=GOOG.fillna("")
                            GOOG.to_csv(newpathGOOG+r'\Key.csv')
                        except:
                            print m
                            print "GOOG"
                    else:
                        Text_file = open(newpathGOOG+r"\\"+str(GOOGCount)+".txt", "w")
                        Text_file.write(text)
                        Text_file.close()
                        GOOG.loc[y,'News'] =GOOGCount
                        GOOGCount+=1
                        GOOG=GOOG.fillna("")
                        GOOG.to_csv(newpathGOOG+r'\Key.csv')
                    print "GOOG"

            #JPM inner most loop
            d6=feedparser.parse(url+Symbols[5])

            for x in xrange(len(d6['entries'])):
                if int(d6.entries[x]['id'][14:]) not in DaysIDsJPM:
                    DaysIDsJPM.append(int(d6.entries[x]['id'][14:]))
                    y = len(JPM.index.tolist())
                    m=re.search(r'\*(.*)',d6.entries[x]['link'])
                    z=re.search(r'\?ru=yahoo\?mod=yahoo_itp',m.group(1))
                    if type(z) is not None:
                        m=re.sub(r'\?ru=yahoo\?mod=yahoo_itp', '', m.group(1))
                    JPM.loc[y,'Title'] =d6.entries[x]['title'].encode('utf8')
                    JPM.loc[y,'link'] =m.encode('utf8')
                    JPM.loc[y,'Published'] =d6.entries[x]['published'].encode('utf8')
                    JPM.loc[y,'ID'] =int(d6.entries[x]['id'][14:])
                    hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
                    page = get(m,headers=hdr)
                    extractor = Goose()
                    article = extractor.extract(raw_html=page.text)
                    text = article.cleaned_text.encode('utf8')
                    if text == '':
                        try:
                            url2 = m
                            req = urllib2.Request(url2, None, hdr)
                            html2 = urlopen(req).read().decode('utf8')
                            raw = BeautifulSoup(html2,"lxml").get_text().encode('utf8')
                            Text_file = open(newpathJPM+r"\\"+str(JPMCount)+".txt", "w")
                            Text_file.write(raw)
                            Text_file.close()
                            JPM.loc[y,'News'] = JPMCount
                            JPMCount+=1
                            JPM=JPM.fillna("")
                            JPM.to_csv(newpathJPM+r'\Key.csv')
                        except:
                            print m
                            print "JPM"
                    else:
                        Text_file = open(newpathJPM+r"\\"+str(JPMCount)+".txt", "w")
                        Text_file.write(text)
                        Text_file.close()
                        JPM.loc[y,'News'] =JPMCount
                        JPMCount+=1
                        JPM=JPM.fillna("")
                        JPM.to_csv(newpathJPM+r'\Key.csv')
                    print "JPM"


            #PG inner most loop
            d7=feedparser.parse(url+Symbols[6])

            for x in xrange(len(d7['entries'])):
                if int(d7.entries[x]['id'][14:]) not in DaysIDsPG:
                    DaysIDsPG.append(int(d7.entries[x]['id'][14:]))
                    y = len(PG.index.tolist())
                    m=re.search(r'\*(.*)',d7.entries[x]['link'])
                    z=re.search(r'\?ru=yahoo\?mod=yahoo_itp',m.group(1))
                    if type(z) is not None:
                        m=re.sub(r'\?ru=yahoo\?mod=yahoo_itp', '', m.group(1))
                    PG.loc[y,'Title'] =d7.entries[x]['title'].encode('utf8')
                    PG.loc[y,'link'] =m.encode('utf8')
                    PG.loc[y,'Published'] =d7.entries[x]['published'].encode('utf8')
                    PG.loc[y,'ID'] =int(d7.entries[x]['id'][14:])
                    hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
                    page = get(m,headers=hdr)
                    extractor = Goose()
                    article = extractor.extract(raw_html=page.text)
                    text = article.cleaned_text.encode('utf8')
                    if text == "":
                        try:
                            url2 = m
                            req = urllib2.Request(url2, None, hdr)
                            html2 = urlopen(req).read().decode('utf8')
                            raw = BeautifulSoup(html2,"lxml").get_text().encode('utf8')
                            Text_file = open(newpathPG+r"\\"+str(PGCount)+".txt", "w")
                            Text_file.write(raw)
                            Text_file.close()
                            PG.loc[y,'News'] = PGCount
                            PGCount+=1
                            PG=PG.fillna("")
                            PG.to_csv(newpathPG+r'\Key.csv')
                        except:
                            print m
                            print "PG"
                    else:
                        Text_file = open(newpathPG+r"\\"+str(PGCount)+".txt", "w")
                        Text_file.write(text)
                        Text_file.close()
                        PG.loc[y,'News'] =PGCount
                        PGCount+=1
                        PG=PG.fillna("")
                        PG.to_csv(newpathPG+r'\Key.csv')
                    print "PG"


            #WMT inner most loop
            d8=feedparser.parse(url+Symbols[7])

            for x in xrange(len(d8['entries'])):
                if int(d8.entries[x]['id'][14:]) not in DaysIDsWMT:
                    DaysIDsWMT.append(int(d8.entries[x]['id'][14:]))
                    y = len(WMT.index.tolist())
                    m=re.search(r'\*(.*)',d8.entries[x]['link'])
                    z=re.search(r'\?ru=yahoo\?mod=yahoo_itp',m.group(1))
                    if type(z) is not None:
                        m=re.sub(r'\?ru=yahoo\?mod=yahoo_itp', '', m.group(1))
                    WMT.loc[y,'Title'] =d8.entries[x]['title'].encode('utf8')
                    WMT.loc[y,'link'] =m.encode('utf8')
                    WMT.loc[y,'Published'] =d8.entries[x]['published'].encode('utf8')
                    WMT.loc[y,'ID'] =int(d8.entries[x]['id'][14:])
                    hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
                    page = get(m,headers=hdr)
                    extractor = Goose()
                    article = extractor.extract(raw_html=page.text)
                    text = article.cleaned_text.encode('utf8')
                    if text == "":
                        try:
                            url2 = m
                            req = urllib2.Request(url2, None, hdr)
                            html2 = urlopen(req).read().decode('utf8')
                            raw = BeautifulSoup(html2,"lxml").get_text().encode('utf8')
                            Text_file = open(newpathWMT+r"\\"+str(WMTCount)+".txt", "w")
                            Text_file.write(raw)
                            Text_file.close()
                            WMT.loc[y,'News'] = WMTCount
                            WMTCount+=1
                            WMT=WMT.fillna("")
                            WMT.to_csv(newpathWMT+r'\Key.csv')
                        except:
                            print m
                            print "WMT"
                    else:
                        Text_file = open(newpathWMT+r"\\"+str(WMTCount)+".txt", "w")
                        Text_file.write(text)
                        Text_file.close()
                        WMT.loc[y,'News'] =WMTCount
                        WMTCount+=1
                        WMT=WMT.fillna("")
                        WMT.to_csv(newpathWMT+r'\Key.csv')
                    print "WMT"       
            count+=1
            print count
            time.sleep(1)
        except:
            print "Error"
    AAPL=AAPL.fillna("")
    AAPL.to_csv(newpathAAPL+r'\Key.csv')
    T=T.fillna("")
    T.to_csv(newpathT+r'\Key.csv')
    BA=BA.fillna("")
    BA.to_csv(newpathBA+r'\Key.csv')
    XOM=XOM.fillna("")
    XOM.to_csv(newpathXOM+r'\Key.csv')
    GOOG=GOOG.fillna("")
    GOOG.to_csv(newpathGOOG+r'\Key.csv')
    JPM=JPM.fillna("")
    JPM.to_csv(newpathJPM+r'\Key.csv')
    PG=PG.fillna("")
    PG.to_csv(newpathPG+r'\Key.csv')
    WMT=WMT.fillna("")
    WMT.to_csv(newpathWMT+r'\Key.csv')

在这种情况下，如果程序收集的提要太多，或者系统上有其他活动进程（这就是冻结时间不同的原因），则会消耗太多RAM，请参阅

程序运行的进程将用于计算的数组和变量存储在进程内存（ram）中

您可以通过强制程序使用硬盘内存来修复此问题

有关解决方法（

搁置

，定期将收集的提要保存到文本文件（将信息从ram移动到rom并释放ram），…）请参阅以下链接

您有很多重复的代码；您可以将粗糙的代码提取到助手方法（小函数）中，然后传入参数。如果您的代码没有bug，请尝试禁用gc并手动收集垃圾（如果您使用的是2.6）。可能尝试分解代码以隔离问题。在不同的函数中分隔所有这些部分，并添加一个负责跟踪的装饰器，例如

时间戳-func name start-parameters

和

时间戳-func name end-duration

。您所呼叫的服务可能存在一个问题，即您处理得不优雅，但在500行程序中很难判断！嘿，伙计们，谢谢你们的建议，我会尝试一下，然后报告发生了什么。对不起，代码太混乱了，我知道里面有很多冗余，我更多的是为了时间而不是清洁。事后看来，这显然很糟糕，哈哈