如何在php中使用正则表达式(regex)从json获取内容
我需要提取一些html标记中存在的json。如何使用正则表达式从下面的json中提取名称(键)值如何在php中使用正则表达式(regex)从json获取内容,php,javascript,regex,json,Php,Javascript,Regex,Json,我需要提取一些html标记中存在的json。如何使用正则表达式从下面的json中提取名称(键)值 <div id="gwt_products_display_results" class="gwt_products_display_results"> <span class="JSON" style="display: none;"> { "products": [ { "targetURL"
<div id="gwt_products_display_results" class="gwt_products_display_results">
<span class="JSON" style="display: none;">
{
"products": [
{
"targetURL": "/athena-mineral-fabric-by-the-yard/262682",
"listIndex": "0",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATM",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATM",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Mineral Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262682",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
},
{
"targetURL": "/athena-slate-fabric-by-the-yard/262683",
"listIndex": "1",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATS",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATS",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Slate Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262683",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
},
{
"targetURL": "/typewriter-keys-giclee/261307",
"listIndex": "2",
"minimumPrice": 259,
"categoryOnSale": "false",
"mfPartNumber": "WD813",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [
{
"S7 - Overlay 1": "blank"
}
],
"partNumber": "p_WD813",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/typewriter-keys-giclee/261307",
"iapAttributeCode": "",
"beanType": "product",
"name": "Typewriter Keys Giclee",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "261307",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 259,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
}
]
}
</span>
</div>
{
“产品”:[
{
“targetURL”:“/雅典娜矿场矿织物/262682”,
“listIndex”:“0”,
“最低价格”:20,
“类别销售”:“假”,
“mfPartNumber”:“FF010ATM”,
“HasatleastoneBayableandPublishedItem”:“true”,
“属性”:[],
“零件号”:“b_FF010ATM”,
“itemAsProduct”:“true”,
“IapatAttribute”:“,
“productDetailTargetURL”:“/雅典娜矿织物按码/262682”,
“iapAttributeCode”:“,
“beanType”:“bundle”,
“名称”:“雅典娜矿织物在院子里”,
“maxListPrice”:0,
“缩略图”:“空”,
“hasSaleSKUs”:错,
“产品ID”:“262682”,
“货币代码”:“美元”,
“hasMoreColors”:错,
“xPriceLabel”:“null”,
“minListPrice”:0,
“最高价格”:20,
“IapatDistributedDisplayName”:“,
“shortDescription”:“null”,
“listId”:“搜索结果”,
“类别ID”:“空”
},
{
“targetURL”:“/雅典娜石板布料在院子里/262683”,
“列表索引”:“1”,
“最低价格”:20,
“类别销售”:“假”,
“mfPartNumber”:“FF010ATS”,
“HasatleastoneBayableandPublishedItem”:“true”,
“属性”:[],
“零件号”:“b_FF010ATS”,
“itemAsProduct”:“true”,
“IapatAttribute”:“,
“productDetailTargetURL”:“/athena slate布料按码/262683”,
“iapAttributeCode”:“,
“beanType”:“bundle”,
“名称”:“院子边的雅典娜石板布料”,
“maxListPrice”:0,
“缩略图”:“空”,
“hasSaleSKUs”:错,
“产品ID”:“262683”,
“货币代码”:“美元”,
“hasMoreColors”:错,
“xPriceLabel”:“null”,
“minListPrice”:0,
“最高价格”:20,
“IapatDistributedDisplayName”:“,
“shortDescription”:“null”,
“listId”:“搜索结果”,
“类别ID”:“空”
},
{
“targetURL”:“/打字机按键giclee/261307”,
“列表索引”:“2”,
“最低价格”:259,
“类别销售”:“假”,
“mfPartNumber”:“WD813”,
“HasatleastoneBayableandPublishedItem”:“true”,
“属性”:[
{
“S7-叠加1”:“空白”
}
],
“零件号”:“p_WD813”,
“itemAsProduct”:“true”,
“IapatAttribute”:“,
“productDetailTargetURL”:“/打字机按键giclee/261307”,
“iapAttributeCode”:“,
“beanType”:“产品”,
“姓名”:“打字机钥匙”Giclee,
“maxListPrice”:0,
“缩略图”:“空”,
“hasSaleSKUs”:错,
“productId”:“261307”,
“货币代码”:“美元”,
“hasMoreColors”:错,
“xPriceLabel”:“null”,
“minListPrice”:0,
“最高价格”:259,
“IapatDistributedDisplayName”:“,
“shortDescription”:“null”,
“listId”:“搜索结果”,
“类别ID”:“空”
}
]
}
到目前为止,我所尝试的是
<span class="JSON" style="display: none;">([\s\S]+?)<\/span>
([\s\s]+?)
您可以将其转换为数组,然后使用array_keys()获取名称代码>
您可以将其转换为数组,然后使用array_keys()获取名称代码>
为什么,正则表达式?正如这里提到的其他人,您可以使用json_decode将其解析为数组并对其进行处理
但如果您坚持使用正则表达式,我会说,/“(.+?)”:/
将匹配所有键,前提是您的JSON具有如图所示的精确格式
更新
因此,您是从一个html字符串获取它的。考虑变量是$HTML,当您坚持正则表达式时,使用ReGEX解析JSON如下,然后解码。要分析键,请使用array\u keys()
preg_match('/(.+?)/s',$html,$matches);
$decoded_array=json_decode($matches[1],true);
打印(解码数组);
$keys=array_keys($decoded_array['products'][0]);
打印(钥匙);
为什么,正则表达式?正如这里提到的其他人,您可以使用json_decode将其解析为数组并对其进行处理
但如果您坚持使用正则表达式,我会说,/“(.+?)”:/
将匹配所有键,前提是您的JSON具有如图所示的精确格式
更新
因此,您是从一个html字符串获取它的。考虑变量是$HTML,当您坚持正则表达式时,使用ReGEX解析JSON如下,然后解码。要分析键,请使用array\u keys()
preg_match('/(.+?)/s',$html,$matches);
$decoded_array=json_decode($matches[1],true);
打印(解码数组);
$keys=array_keys($decoded_array['products'][0]);
打印(钥匙);
您可以使用和查找包含JSON的span
元素,然后再查找。下面是一个粗略的例子,可以帮助您上路:-
<?php
$html = '
<html>
<head>
<title>Example</title>
</head>
<body>
<div id="gwt_products_display_results" class="gwt_products_display_results">
<span class="JSON" style="display: none;">
{
"products": [
{
"targetURL": "/athena-mineral-fabric-by-the-yard/262682",
"listIndex": "0",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATM",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATM",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Mineral Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262682",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
},
{
"targetURL": "/athena-slate-fabric-by-the-yard/262683",
"listIndex": "1",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATS",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATS",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Slate Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262683",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
}
]
}
</span>
</div>
</body>
</html>
';
$document = DOMDocument::loadHTML($html);
$xpath = new DOMXPath($document);
$spans = $xpath->query('//div/span[@class="JSON"]');
foreach ($spans as $span) {
$catalog = json_decode($span->nodeValue);
printf("We found %d products.\n", count($catalog->products));
foreach ($catalog->products as $index => $product) {
printf("Product #%d - %s.\n", ++$index, $product->name);
}
}
/*
We found 2 products.
Product #1 - Athena Mineral Fabric by the Yard.
Product #2 - Athena Slate Fabric by the Yard.
*/
您可以使用和来查找包含JSON的span
元素,然后找到它。下面是一个粗略的例子,可以帮助您上路:-
<?php
$html = '
<html>
<head>
<title>Example</title>
</head>
<body>
<div id="gwt_products_display_results" class="gwt_products_display_results">
<span class="JSON" style="display: none;">
{
"products": [
{
"targetURL": "/athena-mineral-fabric-by-the-yard/262682",
"listIndex": "0",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATM",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATM",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Mineral Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262682",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
},
{
"targetURL": "/athena-slate-fabric-by-the-yard/262683",
"listIndex": "1",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATS",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATS",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Slate Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262683",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
}
]
}
</span>
</div>
</body>
</html>
';
$document = DOMDocument::loadHTML($html);
$xpath = new DOMXPath($document);
$spans = $xpath->query('//div/span[@class="JSON"]');
foreach ($spans as $span) {
$catalog = json_decode($span->nodeValue);
printf("We found %d products.\n", count($catalog->products));
foreach ($catalog->products as $index => $product) {
printf("Product #%d - %s.\n", ++$index, $product->name);
}
}
/*
We found 2 products.
Product #1 - Athena Mineral Fabric by the Yard.
Product #2 - Athena Slate Fabric by the Yard.
*/
为什么???只要使用json\u decode
。为什么世界上所有可能被视为神圣的东西都要在json这样的数据结构上使用正则表达式?将其解析为一个对象/数组,并直接或通过循环访问所需的值。如果您计划删除json\u encode()
,并编写自己的完整fle
<?php
$html = '
<html>
<head>
<title>Example</title>
</head>
<body>
<div id="gwt_products_display_results" class="gwt_products_display_results">
<span class="JSON" style="display: none;">
{
"products": [
{
"targetURL": "/athena-mineral-fabric-by-the-yard/262682",
"listIndex": "0",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATM",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATM",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Mineral Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262682",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
},
{
"targetURL": "/athena-slate-fabric-by-the-yard/262683",
"listIndex": "1",
"minimumPrice": 20,
"categoryOnSale": "false",
"mfPartNumber": "FF010ATS",
"hasAtLeastOneBuyableAndPublishedItem": "true",
"attributes": [],
"partNumber": "b_FF010ATS",
"itemAsProduct": "true",
"iapAttribute": "",
"productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
"iapAttributeCode": "",
"beanType": "bundle",
"name": "Athena Slate Fabric by the Yard",
"maxListPrice": 0,
"thumbNail": "null",
"hasSaleSKUs": false,
"productId": "262683",
"currencyCode": "USD",
"hasMoreColors": false,
"xPriceLabel": "null",
"minListPrice": 0,
"maximumPrice": 20,
"iapAttributeDisplayName": "",
"shortDescription": "null",
"listId": "SEARCHRESULTS",
"categoryId": "null"
}
]
}
</span>
</div>
</body>
</html>
';
$document = DOMDocument::loadHTML($html);
$xpath = new DOMXPath($document);
$spans = $xpath->query('//div/span[@class="JSON"]');
foreach ($spans as $span) {
$catalog = json_decode($span->nodeValue);
printf("We found %d products.\n", count($catalog->products));
foreach ($catalog->products as $index => $product) {
printf("Product #%d - %s.\n", ++$index, $product->name);
}
}
/*
We found 2 products.
Product #1 - Athena Mineral Fabric by the Yard.
Product #2 - Athena Slate Fabric by the Yard.
*/