如何从网站获取javascript动态内容
我正在尝试从网站获取动态内容 我试着满足于scrapy。但内容是用js文件加载的。所以它不是进入文本 然后我为此安装了selenium,但现在我没有收到这样的会话错误 例如,这是我试图获取内容的页面 我只是试着在这个网站上写下这个如何从网站获取javascript动态内容,javascript,selenium,scrapy,scrapy-spider,scrapy-splash,Javascript,Selenium,Scrapy,Scrapy Spider,Scrapy Splash,我正在尝试从网站获取动态内容 我试着满足于scrapy。但内容是用js文件加载的。所以它不是进入文本 然后我为此安装了selenium,但现在我没有收到这样的会话错误 例如,这是我试图获取内容的页面 我只是试着在这个网站上写下这个 item = ProductItem name = response.css('h1.product-name::text').extract_first() price = response.css('span[id=offerin
item = ProductItem
name = response.css('h1.product-name::text').extract_first()
price = response.css('span[id=offering-price] > span::text').extract_first()
xpath = response.xpath('/html/head/script[17]')
data = xpath.re(" = (\{.+\})")
print(data)
这就是我想要得到的内容
var utagData = {"merchant_names":["Finspor"],"new_site":"new","order_store":"Finspor","order_currency":"TRY","page_domain":"www.hepsiburada.com","page_language":"tr-TR","page_site_name":"Hepsiburada","page_site_region":"tr","site_type":"desktop","page_type":"pdp","page_name":"Product Detail","category_path":"/product/spor-outdoor/spor-fitness/fitness-kondisyon/kosu-bantlari/sporkonksbfox008/","page_title":"Fox Fitness New Target 70E 2.5 Hp Motorlu, Masajlı Koşu Fiyatı","page_url":"http://www.hepsiburada.com/fox-fitness-new-target-70e-2-5-hp-motorlu-masajli-kosu-bandi-hediye-secenekleriyle-p-SPORKONKSBFOX0081?magaza=Finspor","page_referring_url":"http://www.hepsiburada.com/gunun-firsati-teklifi?element=1","page_query_string":["magaza=Finspor"],"is_canonical":"1","canonical_url":"http://www.hepsiburada.com/fox-fitness-new-target-70e-2-5-hp-motorlu-masajli-kosu-bandi-hediye-secenekleriyle-pm-sporkonksbfox008","product_prices":["999.00"],"product_unit_prices":["999.00"],"product_brands":["Fox Fitness"],"product_brand":"Fox Fitness","product_skus":["SPORKONKSBFOX0081"],"product_ids":["sporkonksbfox008"],"product_top_5":["sporkonksbfox008"],"product_names":["Fox Fitness New Target 70E 2.5 Hp Motorlu, Masajlı Koşu Bandı (Hediye Seçenekleriyle)"],"product_category_ids":["19249"],"product_categories":["kosu-bantlari"],"shipping_type":["super-hizli"],"product_quantities":["1"],"product_barcodes":["8691128100776"],"product_barcode":"8691128100776","product_name_array":"Fox Fitness New Target 70E 2.5 Hp Motorlu, Masajlı Koşu Bandı (Hediye Seçenekleriyle)","merchant_ids":["95df0e3483104fc1a16cca6e38bc45cc"],"order_subtotal":["999.00"],"category_id_hierarchy":"60001546 > 2147483635 > 353045 > 19249","category_name_hierarchy":"Spor Outdoor > Spor / Fitness > Fitness - Kondisyon > Koşu Bantları","product_status":"InStock"};
var utagObject = utagData;
var utag_data = {"merchant_names":["Finspor"],"new_site":"new","order_store":"Finspor","order_currency":"TRY","page_domain":"www.hepsiburada.com","page_language":"tr-TR","page_site_name":"Hepsiburada","page_site_region":"tr","site_type":"desktop","page_type":"pdp","page_name":"Product Detail","category_path":"/product/spor-outdoor/spor-fitness/fitness-kondisyon/kosu-bantlari/sporkonksbfox008/","page_title":"Fox Fitness New Target 70E 2.5 Hp Motorlu, Masajlı Koşu Fiyatı","page_url":"http://www.hepsiburada.com/fox-fitness-new-target-70e-2-5-hp-motorlu-masajli-kosu-bandi-hediye-secenekleriyle-p-SPORKONKSBFOX0081?magaza=Finspor","page_referring_url":"http://www.hepsiburada.com/gunun-firsati-teklifi?element=1","page_query_string":["magaza=Finspor"],"is_canonical":"1","canonical_url":"http://www.hepsiburada.com/fox-fitness-new-target-70e-2-5-hp-motorlu-masajli-kosu-bandi-hediye-secenekleriyle-pm-sporkonksbfox008","product_prices":["999.00"],"product_unit_prices":["999.00"],"product_brands":["Fox Fitness"],"product_brand":"Fox Fitness","product_skus":["SPORKONKSBFOX0081"],"product_ids":["sporkonksbfox008"],"product_top_5":["sporkonksbfox008"],"product_names":["Fox Fitness New Target 70E 2.5 Hp Motorlu, Masajlı Koşu Bandı (Hediye Seçenekleriyle)"],"product_category_ids":["19249"],"product_categories":["kosu-bantlari"],"shipping_type":["super-hizli"],"product_quantities":["1"],"product_barcodes":["8691128100776"],"product_barcode":"8691128100776","product_name_array":"Fox Fitness New Target 70E 2.5 Hp Motorlu, Masajlı Koşu Bandı (Hediye Seçenekleriyle)","merchant_ids":["95df0e3483104fc1a16cca6e38bc45cc"],"order_subtotal":["999.00"],"category_id_hierarchy":"60001546 > 2147483635 > 353045 > 19249","category_name_hierarchy":"Spor Outdoor > Spor / Fitness > Fitness - Kondisyon > Koşu Bantları","product_status":"InStock"};
这里不需要执行任何javascript。如果在页面上单击鼠标右键,然后单击“查看页面源代码”(或类似内容),您可以在那里找到json格式的数据:
# assuming we're crawling:
# 'http://www.hepsiburada.com/fox-fitness-new-target-70e-2-5-hp-motorlu-masajli-kosu-bandi-hediye-secenekleriyle-p-SPORKONKSBFOX0081?magaza=Finspor'
import json
def parse(self, response):
# get the java-script in the <script> node
node = response.xpath("//script[contains(text(),'var utagData = ')]/text()")
# extract the json bit from the script text with regex
data = node.re('= (\{.+\})')[0]
# convert json to python dictionary
data = json.loads(data)
print(data)
print(data['merchant_names'])
# gives ['Finspor']
#假设我们正在爬行:
# 'http://www.hepsiburada.com/fox-fitness-new-target-70e-2-5-hp-motorlu-masajli-kosu-bandi-hediye-secenekleriyle-p-SPORKONKSBFOX0081?magaza=Finspor'
导入json
def解析(自我,响应):
#在节点中获取java脚本
node=response.xpath(//script[contains(text(),'var utagData=')]/text())
#使用正则表达式从脚本文本中提取json位
data=node.re('=(\{.+\})[0]
#将json转换为python字典
data=json.load(数据)
打印(数据)
打印(数据[“商户名称])
#给出['Finspor']
过去,我使用此库对网站进行爬网,并获取我需要的内容:
它有很好的API来查找您需要的具体数据:
//get title
xray('http://google.com', 'title')(function(err, title) {
console.log(title);
})
或通过查找器查找:
xray('http://reddit.com', '.content')(function(err, innerHTML) {
console.log(innerHTML);
})
获取具体属性值:
xray('http://techcrunch.com', 'img.logo@src')(function(err, value) {
console.log(value);
})
所以请看看这个图书馆。也许它可以帮助您获得所需的结果。您没有显示selenium代码(您应该从中获得响应)