Python 2.7 在scrapy中提取标记值
我想在scrapy fpr示例中提取xpath标记的值Python 2.7 在scrapy中提取标记值,python-2.7,web-scraping,scrapy,Python 2.7,Web Scraping,Scrapy,我想在scrapy fpr示例中提取xpath标记的值 /html/body/div[3]/ul[1]/li[1]/div/p q1 ans1 问题2 ans2 链接: 以这样的收益率 def parse(self, response): for quote in response.xpath('//html/body/main'): yield { #question or answer #question pattern li/div/p o
/html/body/div[3]/ul[1]/li[1]/div/p
q1
ans1
问题2
ans2
链接:
以这样的收益率
def parse(self, response):
for quote in response.xpath('//html/body/main'):
yield {
#question or answer
#question pattern li/div/p or li[1]/div/p
#answer pattern ended with li[2 or higher number]/div/p
#related question and answer both have the same ul for example both are ul[1]
'type': quote.xpath('i dont know this part').extract_first (),
'QAnumber': quote.xpath('?').extract(),
'text': quote.xpath('/html/body/div[3]/*/*/div/p/text()').extract(),
}
我怎样才能提取出这3个部分呢?这3个部分很难理解你的问题。是否要提取问题和答案?会是这样的
from w3lib.html import remove_tags
for qa in response.css('div#product-questions-list ul.c-faq__list'):
question = qa.css('li.is-question div.section > p::text').get()
answer = qa.css('li.is-answer div.section > p').get()
answer = remove_tags(answer) if answer else None
number = qa.css('li.is-question a::attr(data-question-id)')
很难理解你的问题。是否要提取问题和答案?会是这样的
from w3lib.html import remove_tags
for qa in response.css('div#product-questions-list ul.c-faq__list'):
question = qa.css('li.is-question div.section > p::text').get()
answer = qa.css('li.is-answer div.section > p').get()
answer = remove_tags(answer) if answer else None
number = qa.css('li.is-question a::attr(data-question-id)')
def parse(self, response):
for quote in response.xpath('//html/body/main'):
yield {
#question or answer
#question pattern li/div/p or li[1]/div/p
#answer pattern ended with li[2 or higher number]/div/p
#related question and answer both have the same ul for example both are ul[1]
'type': quote.xpath('i dont know this part').extract_first (),
'QAnumber': quote.xpath('?').extract(),
'text': quote.xpath('/html/body/div[3]/*/*/div/p/text()').extract(),
}
from w3lib.html import remove_tags
for qa in response.css('div#product-questions-list ul.c-faq__list'):
question = qa.css('li.is-question div.section > p::text').get()
answer = qa.css('li.is-answer div.section > p').get()
answer = remove_tags(answer) if answer else None
number = qa.css('li.is-question a::attr(data-question-id)')
def parse(self, response):
for quote in response.css('#product-questions-list > ul'):
quest = response.css('.is-question > div.section > div > p::text').extract_first()
answer = response.css('.is-answer > div.section > div > p::text').extract_first()
yield {quest: answer}