如何在php中使用正则表达式(regex)从json获取内容

如何在php中使用正则表达式(regex)从json获取内容,php,javascript,regex,json,Php,Javascript,Regex,Json,我需要提取一些html标记中存在的json。如何使用正则表达式从下面的json中提取名称(键)值 <div id="gwt_products_display_results" class="gwt_products_display_results"> <span class="JSON" style="display: none;"> { "products": [ { "targetURL"

我需要提取一些html标记中存在的json。如何使用正则表达式从下面的json中提取名称(键)值

<div id="gwt_products_display_results" class="gwt_products_display_results">
                <span class="JSON" style="display: none;">
{
    "products": [
        {
            "targetURL": "/athena-mineral-fabric-by-the-yard/262682",
            "listIndex": "0",
            "minimumPrice": 20,
            "categoryOnSale": "false",
            "mfPartNumber": "FF010ATM",
            "hasAtLeastOneBuyableAndPublishedItem": "true",
            "attributes": [],
            "partNumber": "b_FF010ATM",
            "itemAsProduct": "true",
            "iapAttribute": "",
            "productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
            "iapAttributeCode": "",
            "beanType": "bundle",
            "name": "Athena Mineral Fabric by the Yard",
            "maxListPrice": 0,
            "thumbNail": "null",
            "hasSaleSKUs": false,
            "productId": "262682",
            "currencyCode": "USD",
            "hasMoreColors": false,
            "xPriceLabel": "null",
            "minListPrice": 0,
            "maximumPrice": 20,
            "iapAttributeDisplayName": "",
            "shortDescription": "null",
            "listId": "SEARCHRESULTS",
            "categoryId": "null"
        },
        {
            "targetURL": "/athena-slate-fabric-by-the-yard/262683",
            "listIndex": "1",
            "minimumPrice": 20,
            "categoryOnSale": "false",
            "mfPartNumber": "FF010ATS",
            "hasAtLeastOneBuyableAndPublishedItem": "true",
            "attributes": [],
            "partNumber": "b_FF010ATS",
            "itemAsProduct": "true",
            "iapAttribute": "",
            "productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
            "iapAttributeCode": "",
            "beanType": "bundle",
            "name": "Athena Slate Fabric by the Yard",
            "maxListPrice": 0,
            "thumbNail": "null",
            "hasSaleSKUs": false,
            "productId": "262683",
            "currencyCode": "USD",
            "hasMoreColors": false,
            "xPriceLabel": "null",
            "minListPrice": 0,
            "maximumPrice": 20,
            "iapAttributeDisplayName": "",
            "shortDescription": "null",
            "listId": "SEARCHRESULTS",
            "categoryId": "null"
        },
        {
            "targetURL": "/typewriter-keys-giclee/261307",
            "listIndex": "2",
            "minimumPrice": 259,
            "categoryOnSale": "false",
            "mfPartNumber": "WD813",
            "hasAtLeastOneBuyableAndPublishedItem": "true",
            "attributes": [
                {
                    "S7 - Overlay 1": "blank"
                }
            ],
            "partNumber": "p_WD813",
            "itemAsProduct": "true",
            "iapAttribute": "",
            "productDetailTargetURL": "/typewriter-keys-giclee/261307",
            "iapAttributeCode": "",
            "beanType": "product",
            "name": "Typewriter Keys Giclee",
            "maxListPrice": 0,
            "thumbNail": "null",
            "hasSaleSKUs": false,
            "productId": "261307",
            "currencyCode": "USD",
            "hasMoreColors": false,
            "xPriceLabel": "null",
            "minListPrice": 0,
            "maximumPrice": 259,
            "iapAttributeDisplayName": "",
            "shortDescription": "null",
            "listId": "SEARCHRESULTS",
            "categoryId": "null"
        }
    ]
}
</span>
</div>

{
“产品”:[
{
“targetURL”:“/雅典娜矿场矿织物/262682”,
“listIndex”:“0”,
“最低价格”:20,
“类别销售”:“假”,
“mfPartNumber”:“FF010ATM”,
“HasatleastoneBayableandPublishedItem”:“true”,
“属性”:[],
“零件号”:“b_FF010ATM”,
“itemAsProduct”:“true”,
“IapatAttribute”:“,
“productDetailTargetURL”:“/雅典娜矿织物按码/262682”,
“iapAttributeCode”:“,
“beanType”:“bundle”,
“名称”:“雅典娜矿织物在院子里”,
“maxListPrice”:0,
“缩略图”:“空”,
“hasSaleSKUs”:错,
“产品ID”:“262682”,
“货币代码”:“美元”,
“hasMoreColors”:错,
“xPriceLabel”:“null”,
“minListPrice”:0,
“最高价格”:20,
“IapatDistributedDisplayName”:“,
“shortDescription”:“null”,
“listId”:“搜索结果”,
“类别ID”:“空”
},
{
“targetURL”:“/雅典娜石板布料在院子里/262683”,
“列表索引”:“1”,
“最低价格”:20,
“类别销售”:“假”,
“mfPartNumber”:“FF010ATS”,
“HasatleastoneBayableandPublishedItem”:“true”,
“属性”:[],
“零件号”:“b_FF010ATS”,
“itemAsProduct”:“true”,
“IapatAttribute”:“,
“productDetailTargetURL”:“/athena slate布料按码/262683”,
“iapAttributeCode”:“,
“beanType”:“bundle”,
“名称”:“院子边的雅典娜石板布料”,
“maxListPrice”:0,
“缩略图”:“空”,
“hasSaleSKUs”:错,
“产品ID”:“262683”,
“货币代码”:“美元”,
“hasMoreColors”:错,
“xPriceLabel”:“null”,
“minListPrice”:0,
“最高价格”:20,
“IapatDistributedDisplayName”:“,
“shortDescription”:“null”,
“listId”:“搜索结果”,
“类别ID”:“空”
},
{
“targetURL”:“/打字机按键giclee/261307”,
“列表索引”:“2”,
“最低价格”:259,
“类别销售”:“假”,
“mfPartNumber”:“WD813”,
“HasatleastoneBayableandPublishedItem”:“true”,
“属性”:[
{
“S7-叠加1”:“空白”
}
],
“零件号”:“p_WD813”,
“itemAsProduct”:“true”,
“IapatAttribute”:“,
“productDetailTargetURL”:“/打字机按键giclee/261307”,
“iapAttributeCode”:“,
“beanType”:“产品”,
“姓名”:“打字机钥匙”Giclee,
“maxListPrice”:0,
“缩略图”:“空”,
“hasSaleSKUs”:错,
“productId”:“261307”,
“货币代码”:“美元”,
“hasMoreColors”:错,
“xPriceLabel”:“null”,
“minListPrice”:0,
“最高价格”:259,
“IapatDistributedDisplayName”:“,
“shortDescription”:“null”,
“listId”:“搜索结果”,
“类别ID”:“空”
}
]
}
到目前为止,我所尝试的是

<span class="JSON" style="display: none;">([\s\S]+?)<\/span>
([\s\s]+?)

您可以将其转换为数组,然后使用
array_keys()获取名称


您可以将其转换为数组,然后使用
array_keys()获取名称


为什么,正则表达式?正如这里提到的其他人,您可以使用json_decode将其解析为数组并对其进行处理

但如果您坚持使用正则表达式,我会说,
/“(.+?)”:/
将匹配所有键,前提是您的JSON具有如图所示的精确格式

更新

因此,您是从一个html字符串获取它的。考虑变量是$HTML,当您坚持正则表达式时,使用ReGEX解析JSON如下,然后解码。要分析键,请使用
array\u keys()

preg_match('/(.+?)/s',$html,$matches);
$decoded_array=json_decode($matches[1],true);
打印(解码数组);
$keys=array_keys($decoded_array['products'][0]);
打印(钥匙);

为什么,正则表达式?正如这里提到的其他人,您可以使用json_decode将其解析为数组并对其进行处理

但如果您坚持使用正则表达式,我会说,
/“(.+?)”:/
将匹配所有键,前提是您的JSON具有如图所示的精确格式

更新

因此,您是从一个html字符串获取它的。考虑变量是$HTML,当您坚持正则表达式时,使用ReGEX解析JSON如下,然后解码。要分析键,请使用
array\u keys()

preg_match('/(.+?)/s',$html,$matches);
$decoded_array=json_decode($matches[1],true);
打印(解码数组);
$keys=array_keys($decoded_array['products'][0]);
打印(钥匙);
您可以使用和查找包含JSON的
span
元素,然后再查找。下面是一个粗略的例子,可以帮助您上路:-

<?php
$html = '
<html>
    <head>
        <title>Example</title>
    </head>
    <body>
        <div id="gwt_products_display_results" class="gwt_products_display_results">
            <span class="JSON" style="display: none;">
            {
                "products": [
                    {
                        "targetURL": "/athena-mineral-fabric-by-the-yard/262682",
                        "listIndex": "0",
                        "minimumPrice": 20,
                        "categoryOnSale": "false",
                        "mfPartNumber": "FF010ATM",
                        "hasAtLeastOneBuyableAndPublishedItem": "true",
                        "attributes": [],
                        "partNumber": "b_FF010ATM",
                        "itemAsProduct": "true",
                        "iapAttribute": "",
                        "productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
                        "iapAttributeCode": "",
                        "beanType": "bundle",
                        "name": "Athena Mineral Fabric by the Yard",
                        "maxListPrice": 0,
                        "thumbNail": "null",
                        "hasSaleSKUs": false,
                        "productId": "262682",
                        "currencyCode": "USD",
                        "hasMoreColors": false,
                        "xPriceLabel": "null",
                        "minListPrice": 0,
                        "maximumPrice": 20,
                        "iapAttributeDisplayName": "",
                        "shortDescription": "null",
                        "listId": "SEARCHRESULTS",
                        "categoryId": "null"
                    },
                    {
                        "targetURL": "/athena-slate-fabric-by-the-yard/262683",
                        "listIndex": "1",
                        "minimumPrice": 20,
                        "categoryOnSale": "false",
                        "mfPartNumber": "FF010ATS",
                        "hasAtLeastOneBuyableAndPublishedItem": "true",
                        "attributes": [],
                        "partNumber": "b_FF010ATS",
                        "itemAsProduct": "true",
                        "iapAttribute": "",
                        "productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
                        "iapAttributeCode": "",
                        "beanType": "bundle",
                        "name": "Athena Slate Fabric by the Yard",
                        "maxListPrice": 0,
                        "thumbNail": "null",
                        "hasSaleSKUs": false,
                        "productId": "262683",
                        "currencyCode": "USD",
                        "hasMoreColors": false,
                        "xPriceLabel": "null",
                        "minListPrice": 0,
                        "maximumPrice": 20,
                        "iapAttributeDisplayName": "",
                        "shortDescription": "null",
                        "listId": "SEARCHRESULTS",
                        "categoryId": "null"
                    }
                ]
            }
            </span>
        </div>
    </body>    
</html>
';

$document   = DOMDocument::loadHTML($html);
$xpath      = new DOMXPath($document);
$spans      = $xpath->query('//div/span[@class="JSON"]');

foreach ($spans as $span) {
    $catalog = json_decode($span->nodeValue);
    printf("We found %d products.\n", count($catalog->products));
    foreach ($catalog->products as $index => $product) {
        printf("Product #%d - %s.\n", ++$index, $product->name);
    }
}

/*
    We found 2 products.
    Product #1 - Athena Mineral Fabric by the Yard.
    Product #2 - Athena Slate Fabric by the Yard.
*/
您可以使用和来查找包含JSON的
span
元素,然后找到它。下面是一个粗略的例子,可以帮助您上路:-

<?php
$html = '
<html>
    <head>
        <title>Example</title>
    </head>
    <body>
        <div id="gwt_products_display_results" class="gwt_products_display_results">
            <span class="JSON" style="display: none;">
            {
                "products": [
                    {
                        "targetURL": "/athena-mineral-fabric-by-the-yard/262682",
                        "listIndex": "0",
                        "minimumPrice": 20,
                        "categoryOnSale": "false",
                        "mfPartNumber": "FF010ATM",
                        "hasAtLeastOneBuyableAndPublishedItem": "true",
                        "attributes": [],
                        "partNumber": "b_FF010ATM",
                        "itemAsProduct": "true",
                        "iapAttribute": "",
                        "productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
                        "iapAttributeCode": "",
                        "beanType": "bundle",
                        "name": "Athena Mineral Fabric by the Yard",
                        "maxListPrice": 0,
                        "thumbNail": "null",
                        "hasSaleSKUs": false,
                        "productId": "262682",
                        "currencyCode": "USD",
                        "hasMoreColors": false,
                        "xPriceLabel": "null",
                        "minListPrice": 0,
                        "maximumPrice": 20,
                        "iapAttributeDisplayName": "",
                        "shortDescription": "null",
                        "listId": "SEARCHRESULTS",
                        "categoryId": "null"
                    },
                    {
                        "targetURL": "/athena-slate-fabric-by-the-yard/262683",
                        "listIndex": "1",
                        "minimumPrice": 20,
                        "categoryOnSale": "false",
                        "mfPartNumber": "FF010ATS",
                        "hasAtLeastOneBuyableAndPublishedItem": "true",
                        "attributes": [],
                        "partNumber": "b_FF010ATS",
                        "itemAsProduct": "true",
                        "iapAttribute": "",
                        "productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
                        "iapAttributeCode": "",
                        "beanType": "bundle",
                        "name": "Athena Slate Fabric by the Yard",
                        "maxListPrice": 0,
                        "thumbNail": "null",
                        "hasSaleSKUs": false,
                        "productId": "262683",
                        "currencyCode": "USD",
                        "hasMoreColors": false,
                        "xPriceLabel": "null",
                        "minListPrice": 0,
                        "maximumPrice": 20,
                        "iapAttributeDisplayName": "",
                        "shortDescription": "null",
                        "listId": "SEARCHRESULTS",
                        "categoryId": "null"
                    }
                ]
            }
            </span>
        </div>
    </body>    
</html>
';

$document   = DOMDocument::loadHTML($html);
$xpath      = new DOMXPath($document);
$spans      = $xpath->query('//div/span[@class="JSON"]');

foreach ($spans as $span) {
    $catalog = json_decode($span->nodeValue);
    printf("We found %d products.\n", count($catalog->products));
    foreach ($catalog->products as $index => $product) {
        printf("Product #%d - %s.\n", ++$index, $product->name);
    }
}

/*
    We found 2 products.
    Product #1 - Athena Mineral Fabric by the Yard.
    Product #2 - Athena Slate Fabric by the Yard.
*/

为什么???只要使用
json\u decode
。为什么世界上所有可能被视为神圣的东西都要在json这样的数据结构上使用正则表达式?将其解析为一个对象/数组,并直接或通过循环访问所需的值。如果您计划删除
json\u encode()
,并编写自己的完整fle
<?php
$html = '
<html>
    <head>
        <title>Example</title>
    </head>
    <body>
        <div id="gwt_products_display_results" class="gwt_products_display_results">
            <span class="JSON" style="display: none;">
            {
                "products": [
                    {
                        "targetURL": "/athena-mineral-fabric-by-the-yard/262682",
                        "listIndex": "0",
                        "minimumPrice": 20,
                        "categoryOnSale": "false",
                        "mfPartNumber": "FF010ATM",
                        "hasAtLeastOneBuyableAndPublishedItem": "true",
                        "attributes": [],
                        "partNumber": "b_FF010ATM",
                        "itemAsProduct": "true",
                        "iapAttribute": "",
                        "productDetailTargetURL": "/athena-mineral-fabric-by-the-yard/262682",
                        "iapAttributeCode": "",
                        "beanType": "bundle",
                        "name": "Athena Mineral Fabric by the Yard",
                        "maxListPrice": 0,
                        "thumbNail": "null",
                        "hasSaleSKUs": false,
                        "productId": "262682",
                        "currencyCode": "USD",
                        "hasMoreColors": false,
                        "xPriceLabel": "null",
                        "minListPrice": 0,
                        "maximumPrice": 20,
                        "iapAttributeDisplayName": "",
                        "shortDescription": "null",
                        "listId": "SEARCHRESULTS",
                        "categoryId": "null"
                    },
                    {
                        "targetURL": "/athena-slate-fabric-by-the-yard/262683",
                        "listIndex": "1",
                        "minimumPrice": 20,
                        "categoryOnSale": "false",
                        "mfPartNumber": "FF010ATS",
                        "hasAtLeastOneBuyableAndPublishedItem": "true",
                        "attributes": [],
                        "partNumber": "b_FF010ATS",
                        "itemAsProduct": "true",
                        "iapAttribute": "",
                        "productDetailTargetURL": "/athena-slate-fabric-by-the-yard/262683",
                        "iapAttributeCode": "",
                        "beanType": "bundle",
                        "name": "Athena Slate Fabric by the Yard",
                        "maxListPrice": 0,
                        "thumbNail": "null",
                        "hasSaleSKUs": false,
                        "productId": "262683",
                        "currencyCode": "USD",
                        "hasMoreColors": false,
                        "xPriceLabel": "null",
                        "minListPrice": 0,
                        "maximumPrice": 20,
                        "iapAttributeDisplayName": "",
                        "shortDescription": "null",
                        "listId": "SEARCHRESULTS",
                        "categoryId": "null"
                    }
                ]
            }
            </span>
        </div>
    </body>    
</html>
';

$document   = DOMDocument::loadHTML($html);
$xpath      = new DOMXPath($document);
$spans      = $xpath->query('//div/span[@class="JSON"]');

foreach ($spans as $span) {
    $catalog = json_decode($span->nodeValue);
    printf("We found %d products.\n", count($catalog->products));
    foreach ($catalog->products as $index => $product) {
        printf("Product #%d - %s.\n", ++$index, $product->name);
    }
}

/*
    We found 2 products.
    Product #1 - Athena Mineral Fabric by the Yard.
    Product #2 - Athena Slate Fabric by the Yard.
*/