Scrapy Spider没有以正确的格式写入Postgres

Scrapy Spider没有以正确的格式写入Postgres,scrapy,Scrapy,我正在抓取美国科学网站上有关心理健康的文章,并试图将其转储到本地运行的postgres数据库中。scrapy输出存储在一个字典中,它看起来像articles={'title':[],'striser':[],'link':[],'date':[],'author':[],'source':[]等 在运行我的代码时,它会将每个键的整个值列表转储到name==key的列中。相反,我希望每篇文章都是数据库中的一行,例如,第1篇文章在每一列中都有自己的一行,标题、摘要、链接、日期、作者和来源 以下是相关

我正在抓取美国科学网站上有关心理健康的文章,并试图将其转储到本地运行的postgres数据库中。scrapy输出存储在一个字典中,它看起来像articles={'title':[],'striser':[],'link':[],'date':[],'author':[],'source':[]等

在运行我的代码时,它会将每个键的整个值列表转储到name==key的列中。相反,我希望每篇文章都是数据库中的一行,例如,第1篇文章在每一列中都有自己的一行,标题、摘要、链接、日期、作者和来源

以下是相关代码: 1.py


您要输出一个项目,其字段上有多个值,最好每个值输出一个项目,因为您的数据库似乎就是这样接受它的:

 def parse(self, response):
for article in response.xpath('//ul[@class="newsfeed-article-list"]'):
    title = article.xpath('.//li[contains(@class, "newsfeed-article")]/div[@class="headline-wrapper"]/a[@class="headline-link"]/h3[@class="headline"]').extract()
    for i in title:
        for search_term in mh_search_terms:
            if search_term in i.upper().strip():
                article_item = {}
                article_item['title'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/div[@class="headline-wrapper"]/a[@class="headline-link"]/h3[@class="headline"]/text()').extract()[title.index(i)]
                article_item['teaser'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/p[@class = "teaser"]/text()').extract()[title.index(i)]
                article_item['link'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/a[@class = "read-more"]/@href').extract()[title.index(i)]
                article_item['date'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/div[@class="headline-wrapper"]/div[@class="headline-above"]/time/text()').extract()[title.index(i)]
                article_item['author'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/span[@class="by-authors"]/span/span[@class="author"]/text()').extract()[title.index(i)]
                article_item['source'] = 'Science Of Us'
                yield article_item
from sqlalchemy.orm import sessionmaker
from models import Articles, db_connect, create_articles_table

class ArticlesPipeline(object):
  def __init__(self):
      engine = db_connect()
      create_articles_table(engine)
      self.Session = sessionmaker(bind=engine)

def process_item(self, item, spider):
  session = self.Session()
    article = Articles(**item)

    try:
        session.add(article)
        session.commit()
    except :
        session.rollback()
        raise
    finally:
        session.close()

    return item
 def parse(self, response):
for article in response.xpath('//ul[@class="newsfeed-article-list"]'):
    title = article.xpath('.//li[contains(@class, "newsfeed-article")]/div[@class="headline-wrapper"]/a[@class="headline-link"]/h3[@class="headline"]').extract()
    for i in title:
        for search_term in mh_search_terms:
            if search_term in i.upper().strip():
                article_item = {}
                article_item['title'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/div[@class="headline-wrapper"]/a[@class="headline-link"]/h3[@class="headline"]/text()').extract()[title.index(i)]
                article_item['teaser'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/p[@class = "teaser"]/text()').extract()[title.index(i)]
                article_item['link'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/a[@class = "read-more"]/@href').extract()[title.index(i)]
                article_item['date'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/div[@class="headline-wrapper"]/div[@class="headline-above"]/time/text()').extract()[title.index(i)]
                article_item['author'] = article.xpath('.//li[contains(@class, "newsfeed-article")]/span[@class="by-authors"]/span/span[@class="author"]/text()').extract()[title.index(i)]
                article_item['source'] = 'Science Of Us'
                yield article_item