Python ValueError:dict包含字段名中不包含的字段,即使使用if语句
我正试图利用《纽约时报》的API,从2016年《纽约时报》的所有文章中提取“经济”一词。我在代码末尾收到以下错误消息: ValueError:dict包含字段名中不包含的字段:“抽象” 这是我的代码:Python ValueError:dict包含字段名中不包含的字段,即使使用if语句,python,web,screen-scraping,Python,Web,Screen Scraping,我正试图利用《纽约时报》的API,从2016年《纽约时报》的所有文章中提取“经济”一词。我在代码末尾收到以下错误消息: ValueError:dict包含字段名中不包含的字段:“抽象” 这是我的代码: from nytimesarticle import articleAPI api = articleAPI('0282db2f333f4f4095edd19f0660c978') articles = api.search( q = 'economy', fq = {'headline':
from nytimesarticle import articleAPI
api = articleAPI('0282db2f333f4f4095edd19f0660c978')
articles = api.search( q = 'economy',
fq = {'headline':'economy', 'source':['Reuters','AP', 'The New
YorkTimes']},
begin_date = 20151231)
def parse_articles(articles):
news = []
for i in articles['response']['docs']:
dic = {}
dic['id'] = i['_id']
if i['abstract'] is not None:
dic['abstract'] = i['abstract'].encode("utf8")
dic['headline'] = i['headline']['main'].encode("utf8")
dic['desk'] = i['news_desk']
dic['date'] = i['pub_date'][0:10] # cutting time of day.
dic['section'] = i['section_name']
if i['snippet'] is not None:
dic['snippet'] = i['snippet'].encode("utf8")
dic['source'] = i['source']
dic['type'] = i['type_of_material']
dic['url'] = i['web_url']
dic['word_count'] = i['word_count']
locations = []
for x in range(0,len(i['keywords'])):
if 'glocations' in i['keywords'][x]['name']:
locations.append(i['keywords'][x]['value'])
dic['locations'] = locations
subjects = []
for x in range(0,len(i['keywords'])):
if 'subject' in i['keywords'][x]['name']:
subjects.append(i['keywords'][x]['value'])
dic['subjects'] = subjects
news.append(dic)
return(news)
def get_articles(date,query):
all_articles = []
for i in range(0,100):
articles = api.search(q = query,
fq = {'source':['Reuters','AP', 'The New York Times']},
begin_date = 20151231,
end_date = 20160715,
sort='oldest',
page = str(i))
articles = parse_articles(articles)
all_articles = all_articles + articles
return(all_articles)
econ_all = []
for i in range(2015,2016):
print 'Processing' + str(i) + '...'
econ_year = get_articles(str(i),'economy')
econ_all = econ_all + econ_year
import csv
keys = econ_all[0].keys()
with open('econ-mentions.csv', 'wb') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(econ_all)
看来我的if语句应该可以防止这个错误。此外,如果我使用“writerow”,正如我在这里看到的,我会在不创建csv的情况下获得完整的详细信息列表。任何帮助都将不胜感激 我不确定您的问题出在哪里,但这段代码创建了一个包含内容的文件econ-indications.csv
from nytimesarticle import articleAPI
def parse_articles(articles):
news = []
for i in articles['response']['docs']:
dic = {}
dic['id'] = i['_id']
if i['abstract'] is not None:
dic['abstract'] = i['abstract'].encode("utf8")
dic['headline'] = i['headline']['main'].encode("utf8")
dic['desk'] = i['news_desk']
dic['date'] = i['pub_date'][0:10] # cutting time of day.
dic['section'] = i['section_name']
if i['snippet'] is not None:
dic['snippet'] = i['snippet'].encode("utf8")
dic['source'] = i['source']
dic['type'] = i['type_of_material']
dic['url'] = i['web_url']
dic['word_count'] = i['word_count']
locations = []
for x in range(0,len(i['keywords'])):
if 'glocations' in i['keywords'][x]['name']:
locations.append(i['keywords'][x]['value'])
dic['locations'] = locations
subjects = []
for x in range(0,len(i['keywords'])):
if 'subject' in i['keywords'][x]['name']:
subjects.append(i['keywords'][x]['value'])
dic['subjects'] = subjects
news.append(dic)
return(news)
def get_articles(date,query):
all_articles = []
for i in range(0,100):
articles = api.search(q = query,
fq = {'source':['Reuters','AP', 'The New York Times']},
begin_date = 20151231,
end_date = 20160715,
sort='oldest',
page = str(i))
articles = parse_articles(articles)
all_articles = all_articles + articles
return(all_articles)
if __name__ == "__main__":
api = articleAPI('0282db2f333f4f4095edd19f0660c978')
articles = api.search( q = 'economy',
fq = {'headline':'economy', 'source':['Reuters','AP', 'The New YorkTimes']},
begin_date = 20151231)
econ_all = []
for i in range(2015,2016):
print 'Processing' + str(i) + '...'
econ_year = get_articles(str(i),'economy')
econ_all = econ_all + econ_year
import csv
keys = econ_all[0].keys()
with open('econ-mentions.csv', 'wb') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(econ_all)
我不确定您的问题出在哪里,但这段代码创建了一个包含内容的文件econ-indications.csv
from nytimesarticle import articleAPI
def parse_articles(articles):
news = []
for i in articles['response']['docs']:
dic = {}
dic['id'] = i['_id']
if i['abstract'] is not None:
dic['abstract'] = i['abstract'].encode("utf8")
dic['headline'] = i['headline']['main'].encode("utf8")
dic['desk'] = i['news_desk']
dic['date'] = i['pub_date'][0:10] # cutting time of day.
dic['section'] = i['section_name']
if i['snippet'] is not None:
dic['snippet'] = i['snippet'].encode("utf8")
dic['source'] = i['source']
dic['type'] = i['type_of_material']
dic['url'] = i['web_url']
dic['word_count'] = i['word_count']
locations = []
for x in range(0,len(i['keywords'])):
if 'glocations' in i['keywords'][x]['name']:
locations.append(i['keywords'][x]['value'])
dic['locations'] = locations
subjects = []
for x in range(0,len(i['keywords'])):
if 'subject' in i['keywords'][x]['name']:
subjects.append(i['keywords'][x]['value'])
dic['subjects'] = subjects
news.append(dic)
return(news)
def get_articles(date,query):
all_articles = []
for i in range(0,100):
articles = api.search(q = query,
fq = {'source':['Reuters','AP', 'The New York Times']},
begin_date = 20151231,
end_date = 20160715,
sort='oldest',
page = str(i))
articles = parse_articles(articles)
all_articles = all_articles + articles
return(all_articles)
if __name__ == "__main__":
api = articleAPI('0282db2f333f4f4095edd19f0660c978')
articles = api.search( q = 'economy',
fq = {'headline':'economy', 'source':['Reuters','AP', 'The New YorkTimes']},
begin_date = 20151231)
econ_all = []
for i in range(2015,2016):
print 'Processing' + str(i) + '...'
econ_year = get_articles(str(i),'economy')
econ_all = econ_all + econ_year
import csv
keys = econ_all[0].keys()
with open('econ-mentions.csv', 'wb') as output_file:
dict_writer = csv.DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(econ_all)
谢谢你的帮助!有趣的是,它确实创建了内容,但我不确定它是否捕获了所有包含“经济”一词的文章。但这有帮助…我会继续工作。谢谢你的帮助!有趣的是,它确实创建了内容,但我不确定它是否捕获了所有包含“经济”一词的文章。但这有帮助…我会继续工作。