Python 导出到Excel时会覆盖数据
我正在浏览一个网站,根据一些关键词收集最近的十篇文章。一旦我得到我的数据(使用的关键字、文章名称、URL/超链接和发布日期),我想将其全部写入xls文件。到目前为止,它只写最后一个关键字的结果,而不是所有四个关键字,它只是覆盖了电子表格的同一部分。如何显示整个列表,而不仅仅是最近的部分Python 导出到Excel时会覆盖数据,python,xlwt,Python,Xlwt,我正在浏览一个网站,根据一些关键词收集最近的十篇文章。一旦我得到我的数据(使用的关键字、文章名称、URL/超链接和发布日期),我想将其全部写入xls文件。到目前为止,它只写最后一个关键字的结果,而不是所有四个关键字,它只是覆盖了电子表格的同一部分。如何显示整个列表,而不仅仅是最近的部分 import requests from bs4 import BeautifulSoup import datetime import xlwt from xlwt import Formula today
import requests
from bs4 import BeautifulSoup
import datetime
import xlwt
from xlwt import Formula
today = datetime.date.today().strftime("%Y%m%d")
keywords = ('PNC', 'Huntington', 'KeyCorp', 'Fifth Third')
for keyword in keywords:
keyword.replace("+", " ")
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
def article_fetch(keyword):
url = 'https://www.americanbanker.com/search?query={}'.format(keyword)
r = requests.get(url, headers = headers)
soup = BeautifulSoup(r.text, 'html.parser')
mylist = []
cols = "KeyWord", "Article", "URL", "Publication Date"
mylist.append(cols)
for articles in soup.find_all("div", "feed-item"):
article = articles.find("h4").text.strip()
timestamp = articles.find("span", "timestamp").text.strip()
article_url = 'https://{}'.format(articles.find("a")["href"][2:])
link = 'HYPERLINK("{}", "Link" )'.format(article_url)
item = [keyword, article, Formula(link), timestamp]
mylist.append(item)
book = xlwt.Workbook()
sheet = book.add_sheet("Articles")
for i, row in enumerate(mylist):
for j, col in enumerate(row):
sheet.write(i, j, col)
book.save("C:\Python\American Banker\American Banker {}.xls".format(today))
for keyword in keywords:
article_fetch(keyword)
print('Workbook Saved')
我希望看到我的整个列表,其中包含所有四个关键字的结果。但是,我只看到最后一个关键字的结果。我已将Excel文件生成移到脚本末尾:
import requests
from bs4 import BeautifulSoup
import datetime
import xlwt
from xlwt import Formula
today = datetime.date.today().strftime("%Y%m%d")
keywords = ('PNC', 'Huntington', 'KeyCorp', 'Fifth Third')
for keyword in keywords:
keyword.replace("+", " ")
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
def article_fetch(keyword):
url = 'https://www.americanbanker.com/search?query={}'.format(keyword)
r = requests.get(url, headers = headers)
soup = BeautifulSoup(r.text, 'html.parser')
for articles in soup.find_all("div", "feed-item"):
article = articles.find("h4").text.strip()
timestamp = articles.find("span", "timestamp").text.strip()
article_url = 'https://{}'.format(articles.find("a")["href"][2:])
link = 'HYPERLINK("{}", "Link" )'.format(article_url)
item = [keyword, article, Formula(link), timestamp]
mylist.append(item)
mylist = []
cols = "KeyWord", "Article", "URL", "Publication Date"
mylist.append(cols)
for keyword in keywords:
article_fetch(keyword)
book = xlwt.Workbook()
sheet = book.add_sheet('Articles')
for i, row in enumerate(mylist):
for j, col in enumerate(row):
sheet.write(i, j, col)
book.save("American Banker {}.xls".format(today))
print('Workbook Saved')
数据将不再丢失: