使用python中的bs4获取由脚本标记包围的json

使用python中的bs4获取由脚本标记包围的json,python,html,beautifulsoup,Python,Html,Beautifulsoup,我正在使用bs4 for Python,我想从网页获取json,但它是这样的: <script> vtex.events.addData({"pageCategory":"Product","pageDepartment":"Calzado","pageUrl":"http://www.taf.com.mx/air-force-1-07-lv8-cu8070-100/p&qu

我正在使用bs4 for Python,我想从网页获取json,但它是这样的:

<script>
vtex.events.addData({"pageCategory":"Product","pageDepartment":"Calzado","pageUrl":"http://www.taf.com.mx/air-force-1-07-lv8-cu8070-100/p","pageTitle":"AIR FORCE 1 07 LV8 | MASCULINO - tafmx","skuStockOutFromShelf":[],"skuStockOutFromProductDetail":["23312","23313","23314","23316","23325","23326","23327","23328"],"shelfProductIds":["140","141","142","3775","3777","3782","3785","545","17","314","318","530","645","801","822","940"],"accountName":"tafmx","pageFacets":[],"productId":"3829","productReferenceId":"CU8070-100","productEans":["194502172393","194502172409","194502172416","194502172423","194502172430","194502172447","194502172454","194502172461","194502172478","194502172485","194502172492","194502172508","194502172515","194502172522","194502172539","194502172546","194502172553"],"skuStocks":{"23312":0,"23313":0,"23314":0,"23315":11,"23316":0,"23317":19,"23318":29,"23319":22,"23320":12,"23321":7,"23322":9,"23323":15,"23324":14,"23325":0,"23326":0,"23327":0,"23328":0},"productName":"AIR FORCE 1 07 LV8","productBrandId":2000004,"productBrandName":"Nike","productDepartmentId":7,"productDepartmentName":"Calzado","productCategoryId":8,"productCategoryName":"Sneakers","productListPriceFrom":"2199","productListPriceTo":"2199","productPriceFrom":"2199","productPriceTo":"2199","sellerId":"1","sellerIds":"1"});
</script>

vtex.events.addData({“pageCategory”:“Product”,“pageDepartment”:“Calzado”,“pageUrl”:”http://www.taf.com.mx/air-force-1-07-lv8-cu8070-100/p“,”页面标题“:“空军1 07 LV8 |男性-tafmx”,“SKUSTOCKOUTFROMSELF:[],“SKUSTOCKOUTFROMSORDUCTDETAIL:[“23312”,“23313”,“23314”,“23316”,“23325”,“23326”,“23327”,“23328”],“shelfProductIds:[“140”,“141”,“142”、“3775”、“3777”、“3782”、“3785”、“545”、“17”、“314”、“318”、“530”、“645”、“801”、“822”、“940”]、“accountName”:“tafmx”、“pageFacets”:[]、“productId”:“3829”、“productReferenceId”:“CU8070-100”、“productEans”:[“194502172393”、“194502172409”、“194502172416”、“194502172423”、“194502172430”、“194502172447”、“194502172447”、“194502172447”、“194502474”、“194502471”、“194502472461”、“194502478”、“194502478”、”194502172492、194502172508、194502172515、194502172522、194502172539、194502172546、194502172553“]、“skuStocks”:{“23312”:0,“23313”:0,“23314”:0,“23315”:11,“23316”:0,“23317”:19,“23318”:29,“23319”:22,“23320”:12,“23321”:7,“23322”:9,“23323”:15,“23324”:14,“23325”:0,“23326”:0,“23327”:0,“23328”:0,“产品名称”:“空军1”、“LV8”、“品牌识别号”“:2000004,“productBrandName”:“Nike”,“productDepartmentId”:7,“productDepartmentName”:“Calzado”,“productCategoryId”:8,“productCategoryName”:“运动鞋”,“productListPriceFrom”:“2199”,“productListPriceTo”:“2199”,“productPriceFrom”:“2199”,“productPriceTo”:“2199”,“ProductPricerId”:“1”,“sellerIds”:“1”});
为python使用beautifulsoup,但没有可识别的类


谢谢

您只需使用
'script'
标记查找元素:

soup=beautifulsou(''vtex.events.addData({“pageCategory”:“产品”、“pageDepartment”:“Calzado”、“pageUrl”:”http://www.taf.com.mx/air-force-1-07-lv8-cu8070-100/p“,”页面标题“:“空军1 07 LV8 |男性-tafmx”,“SKUSTOCKOUTFROMSELF:[],“SKUSTOCKOUTFROMSPRODUCTDETAIL:[“23312”,“23313”,“23314”,“23316”,“23325”,“23326”,“23327”,“23328”],“ShelfProductId:[“140”、“141”、“142”、“3775”、“3777”、“3782”、“3785”、“545”、“17”、“314”、“318”、“530”、“645”、“801”、“822”、“940”]、“accountName:“tafmx”、“pageFacets:[]、“productId:“3829”、“productReferenceId:“CU8070-100”、“productEans:[“194502172393”、“194502172409”、“194502172416”、“194502423”、“19450172443”、“194502172443”、“194502447”、“19450172474”、“194502474”、“194502461”、“194502461”、“194502474”、“194502471”194502172478、194502172485、194502172492、194502172508、194502172515、194502172522、194502172539、194502172546、194502172553、194502172553、skuStocks:{“23312”:0,“23313”:0,“23314”:0,“23315”:11,“23316”:0,“23317”:19,“23318”:29,“23319”:22,“23320”:12,“23321”:7,“23322”:9,“23323”:15,“23324”:14,“25”:0,“23315”:0,“23326”:0,“23327”:0”,“2330”:2330:}:”空军1 07 LV8,“productBrandId”:2000004,“productBrandName”:“Nike”,“productDepartmentId”:7,“productDepartmentName”:“Calzado”,“productCategoryId”:8,“productCategoryName”:“运动鞋”,“productListPriceFrom”:“2199”,“productListPriceFrom”:“2199”,“productPriceFrom”:“2199”,“sellerId”:“1”,“sellerId”:“1”;“html.parser”)
js_code=soup.find('script')。内容[0]
js_code
然后

vtex.events.addData({"pageCategory":"Product","pageDepartment":"Calzado","pageUrl":"http://www.taf.com.mx/air-force-1-07-lv8-cu8070-100/p","pageTitle":"AIR FORCE 1 07 LV8 | MASCULINO - tafmx","skuStockOutFromShelf":[],"skuStockOutFromProductDetail":["23312","23313","23314","23316","23325","23326","23327","23328"],"shelfProductIds":["140","141","142","3775","3777","3782","3785","545","17","314","318","530","645","801","822","940"],"accountName":"tafmx","pageFacets":[],"productId":"3829","productReferenceId":"CU8070-100","productEans":["194502172393","194502172409","194502172416","194502172423","194502172430","194502172447","194502172454","194502172461","194502172478","194502172485","194502172492","194502172508","194502172515","194502172522","194502172539","194502172546","194502172553"],"skuStocks":{"23312":0,"23313":0,"23314":0,"23315":11,"23316":0,"23317":19,"23318":29,"23319":22,"23320":12,"23321":7,"23322":9,"23323":15,"23324":14,"23325":0,"23326":0,"23327":0,"23328":0},"productName":"AIR FORCE 1 07 LV8","productBrandId":2000004,"productBrandName":"Nike","productDepartmentId":7,"productDepartmentName":"Calzado","productCategoryId":8,"productCategoryName":"Sneakers","productListPriceFrom":"2199","productListPriceTo":"2199","productPriceFrom":"2199","productPriceTo":"2199","sellerId":"1","sellerIds":"1"});
棘手的粗略部分是从中获取json。 对于这类任务,我很少使用regex,但这是一个罕见的例子

import re
...
js_code = soup.find('script').contents[0]
print(re.search('{.*}', js_code).group(0))
这个输出

{“pageCategory”:“产品”、“pageDepartment”:“Calzado”、“pageUrl”:http://www.taf.com.mx/air-force-1-07-lv8-cu8070-100/p“,”页面标题“:“空军1 07 LV8 |阳性-tafmx”,“SKUSTOCKOUTFROMSELF:[],“SKUSTOCKOUTFROMSORDUCTDETAIL:[“23312”、“23313”、“23314”、“23316”、“23325”、“23326”、“23327”、“23328”],“shelfProductIds:[“140”、“141”、“142”、“3775”,”3777、3782、3785、545、17、314、318、530、645、801、822、940、accountName:“tafmx”、“pageFacets:[]、“productId:“3829”、“productReferenceId:“CU8070-100”、“productEans:”[“194502172393”、“194502172409”、“194502172416”、“194502172423”、“194502172430”、“194502172447”、“194502172447”、“194502454”、“19450172474”、“194502172461”、“194502478”、“1945024724858”、“194502472485”、“1945024724848”、“194502472492”、”194502172508、194502172515、194502172522、194502172539、194502172546、194502172553、skuStocks:{“23312:0、23313:0、23314:0、23315:11、23316:0、23317:19、23318:29、23319:22、23320:12、23321:7、23322:9、23323:15、23324:14、23325:0、23326:0、23327:0、23328:0、23328:0},“产品名称”:“空军一号07”、“产品品牌识别号200004”productBrandName:“Nike”,“productDepartmentId:”7,“productDepartmentName:”Calzado”,“productCategoryId:”8,“productCategoryName:”运动鞋”,“productListPriceFrom:”2199”,“productListPriceTo:”2199”,“productPriceTo:”2199”,“sellerId:”1”,“sellerId:”1“}
可以使用
json.loads
将其转换为Python
dict

import json
...
print(json.loads(re.search('{.*}', js_code).group(0)))
输出

{'pageCategory': 'Product', 'pageDepartment': 'Calzado', 'pageUrl': 'http://www.taf.com.mx/air-force-1-07-lv8-cu8070-100/p', 'pageTitle': 'AIR FORCE 1 07 LV8 | MASCULINO - tafmx', 'skuStockOutFromShelf': [], 'skuStockOutFromProductDetail': ['23312', '23313', '23314', '23316', '23325', '23326', '23327', '23328'], 'shelfProductIds': ['140', '141', '142', '3775', '3777', '3782', '3785', '545', '17', '314', '318', '530', '645', '801', '822', '940'], 'accountName': 'tafmx', 'pageFacets': [], 'productId': '3829', 'productReferenceId': 'CU8070-100', 'productEans': ['194502172393', '194502172409', '194502172416', '194502172423', '194502172430', '194502172447', '194502172454', '194502172461', '194502172478', '194502172485', '194502172492', '194502172508', '194502172515', '194502172522', '194502172539', '194502172546', '194502172553'], 'skuStocks': {'23312': 0, '23313': 0, '23314': 0, '23315': 11, '23316': 0, '23317': 19, '23318': 29, '23319': 22, '23320': 12, '23321': 7, '23322': 9, '23323': 15, '23324': 14, '23325': 0, '23326': 0, '23327': 0, '23328': 0}, 'productName': 'AIR FORCE 1 07 LV8', 'productBrandId': 2000004, 'productBrandName': 'Nike', 'productDepartmentId': 7, 'productDepartmentName': 'Calzado', 'productCategoryId': 8, 'productCategoryName': 'Sneakers', 'productListPriceFrom': '2199', 'productListPriceTo': '2199', 'productPriceFrom': '2199', 'productPriceTo': '2199', 'sellerId': '1', 'sellerIds': '1'}

请注意,如果
script
标记包含问题中未显示的其他内容,则可能需要使用更复杂的正则表达式。

您可以简单地使用
'script'
标记查找元素:

soup=beautifulsou(''vtex.events.addData({“pageCategory”:“产品”、“pageDepartment”:“Calzado”、“pageUrl”:”http://www.taf.com.mx/air-force-1-07-lv8-cu8070-100/p“,”页面标题“:“空军1 07 LV8 |阳性-tafmx”,“SKUSTOCKOUTFROMSELF:[],“SKUSTOCKOUTFROMSPRODUCTDETAIL:[“23312”,“23313”,“23314”,“23316”,“23325”,“23326”,“23327”,“23328”],”ShelfProductId:[“140”、“141”、“142”、“3775”、“3777”、“3782”、“3785”、“545”、“17”、“314”、“318”、“530”、“645”、“801”、“822”、“940”]、“accountName:“tafmx”、“pageFacets:[]、“productId:“3829”、“productReferenceId:“CU8070-100”、“productEans:[“194502172393”、“194502172409”、“194502172416”、“194502423”、“194502172430”、“194502172434”、“194502172447”、“194502474”、“194502474”、“194502471”、“194502471”、“194502471”194502172478、194502172485、194502172492、194502172508、194502172515、194502172522、194502172539、194502172546、194502172553、194502172553、skuStocks:{“23312”:0,“23313”:0,“23314”:0,“23315”:11,“23316”:0,“23317”:19,“23318”:29,“23319”:22,“23320”:12,“23321”:7,“23322”:9,“23323”:15,“23324”:14,“25”:0,“23315”:0,“23326”:0,“23327”:0”,“2330”:2330:}:”空军1 078级,“产品品牌标识”:2000004