Regex 使用python正则表达式分隔字符串

Regex 使用python正则表达式分隔字符串,regex,python-3.x,web-scraping,Regex,Python 3.x,Web Scraping,我试图从Amazon产品页面中获取一些图像,我能够在单个字符串中提取页面中的所有图像,但我对正则表达式的了解不足以将其与其他URL分开 我的目标是获得一个数组/列表,其中包含所有雇佣的图像,例如,这张 这是完整的字符串 所有图像都在“colorImage”细分下 P.when('A').register("ImageBlockATF", function(A){ var data = { 'colorImages': { 'initial': [{"h

我试图从Amazon产品页面中获取一些图像,我能够在单个字符串中提取页面中的所有图像,但我对正则表达式的了解不足以将其与其他URL分开

我的目标是获得一个数组/列表,其中包含所有雇佣的图像,例如,这张

这是完整的字符串

所有图像都在“colorImage”细分下

P.when('A').register("ImageBlockATF", function(A){

    var data = {

                'colorImages': { 'initial': [{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/715ljjcwtbL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41vopqQJxiL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41vopqQJxiL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/715ljjcwtbL._SX355_.jpg":[219,355],"https://images-na.ssl-images-amazon.com/images/I/715ljjcwtbL._SX450_.jpg":[278,450],"https://images-na.ssl-images-amazon.com/images/I/715ljjcwtbL._SX425_.jpg":[263,425],"https://images-na.ssl-images-amazon.com/images/I/715ljjcwtbL._SX466_.jpg":[288,466],"https://images-na.ssl-images-amazon.com/images/I/715ljjcwtbL._SX522_.jpg":[323,522],"https://images-na.ssl-images-amazon.com/images/I/715ljjcwtbL._SX569_.jpg":[352,569],"https://images-na.ssl-images-amazon.com/images/I/715ljjcwtbL._SX679_.jpg":[420,679]},"variant":"MAIN","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71wL0h3T8ZL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41gksdhlW%2BL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41gksdhlW%2BL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71wL0h3T8ZL._SX355_.jpg":[215,355],"https://images-na.ssl-images-amazon.com/images/I/71wL0h3T8ZL._SX450_.jpg":[272,450],"https://images-na.ssl-images-amazon.com/images/I/71wL0h3T8ZL._SX425_.jpg":[257,425],"https://images-na.ssl-images-amazon.com/images/I/71wL0h3T8ZL._SX466_.jpg":[282,466],"https://images-na.ssl-images-amazon.com/images/I/71wL0h3T8ZL._SX522_.jpg":[315,522],"https://images-na.ssl-images-amazon.com/images/I/71wL0h3T8ZL._SX569_.jpg":[344,569],"https://images-na.ssl-images-amazon.com/images/I/71wL0h3T8ZL._SX679_.jpg":[410,679]},"variant":"PT01","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71R48178vZL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41LaWXfiG5L._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41LaWXfiG5L.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71R48178vZL._SX355_.jpg":[217,355],"https://images-na.ssl-images-amazon.com/images/I/71R48178vZL._SX450_.jpg":[275,450],"https://images-na.ssl-images-amazon.com/images/I/71R48178vZL._SX425_.jpg":[260,425],"https://images-na.ssl-images-amazon.com/images/I/71R48178vZL._SX466_.jpg":[285,466],"https://images-na.ssl-images-amazon.com/images/I/71R48178vZL._SX522_.jpg":[319,522],"https://images-na.ssl-images-amazon.com/images/I/71R48178vZL._SX569_.jpg":[348,569],"https://images-na.ssl-images-amazon.com/images/I/71R48178vZL._SX679_.jpg":[415,679]},"variant":"PT02","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/81BZF8-hNpL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41f4nUmawdL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41f4nUmawdL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/81BZF8-hNpL._SX355_.jpg":[250,355],"https://images-na.ssl-images-amazon.com/images/I/81BZF8-hNpL._SX450_.jpg":[316,450],"https://images-na.ssl-images-amazon.com/images/I/81BZF8-hNpL._SX425_.jpg":[299,425],"https://images-na.ssl-images-amazon.com/images/I/81BZF8-hNpL._SX466_.jpg":[328,466],"https://images-na.ssl-images-amazon.com/images/I/81BZF8-hNpL._SX522_.jpg":[367,522],"https://images-na.ssl-images-amazon.com/images/I/81BZF8-hNpL._SX569_.jpg":[400,569],"https://images-na.ssl-images-amazon.com/images/I/81BZF8-hNpL._SX679_.jpg":[478,679]},"variant":"PT03","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/A1YDIIAWSWL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/51dH0wiIHjL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/51dH0wiIHjL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/A1YDIIAWSWL._SX355_.jpg":[266,355],"https://images-na.ssl-images-amazon.com/images/I/A1YDIIAWSWL._SX450_.jpg":[338,450],"https://images-na.ssl-images-amazon.com/images/I/A1YDIIAWSWL._SX425_.jpg":[319,425],"https://images-na.ssl-images-amazon.com/images/I/A1YDIIAWSWL._SX466_.jpg":[350,466],"https://images-na.ssl-images-amazon.com/images/I/A1YDIIAWSWL._SX522_.jpg":[392,522],"https://images-na.ssl-images-amazon.com/images/I/A1YDIIAWSWL._SX569_.jpg":[427,569],"https://images-na.ssl-images-amazon.com/images/I/A1YDIIAWSWL._SX679_.jpg":[509,679]},"variant":"PT04","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/61qFqipGhCL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/31RVniM5g2L._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/31RVniM5g2L.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/61qFqipGhCL._SY355_.jpg":[355,355],"https://images-na.ssl-images-amazon.com/images/I/61qFqipGhCL._SY450_.jpg":[450,450],"https://images-na.ssl-images-amazon.com/images/I/61qFqipGhCL._SX425_.jpg":[425,425],"https://images-na.ssl-images-amazon.com/images/I/61qFqipGhCL._SX466_.jpg":[466,466],"https://images-na.ssl-images-amazon.com/images/I/61qFqipGhCL._SX522_.jpg":[522,522],"https://images-na.ssl-images-amazon.com/images/I/61qFqipGhCL._SX569_.jpg":[569,569],"https://images-na.ssl-images-amazon.com/images/I/61qFqipGhCL._SX679_.jpg":[679,679]},"variant":"PT05","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/61C1UIQU7YL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/31593M2oY0L._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/31593M2oY0L.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/61C1UIQU7YL._SY355_.jpg":[355,355],"https://images-na.ssl-images-amazon.com/images/I/61C1UIQU7YL._SY450_.jpg":[450,450],"https://images-na.ssl-images-amazon.com/images/I/61C1UIQU7YL._SX425_.jpg":[425,425],"https://images-na.ssl-images-amazon.com/images/I/61C1UIQU7YL._SX466_.jpg":[466,466],"https://images-na.ssl-images-amazon.com/images/I/61C1UIQU7YL._SX522_.jpg":[522,522],"https://images-na.ssl-images-amazon.com/images/I/61C1UIQU7YL._SX569_.jpg":[569,569],"https://images-na.ssl-images-amazon.com/images/I/61C1UIQU7YL._SX679_.jpg":[679,679]},"variant":"PT06","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71QthPBNgLL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41EChsg2DAL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41EChsg2DAL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71QthPBNgLL._SX355_.jpg":[251,355],"https://images-na.ssl-images-amazon.com/images/I/71QthPBNgLL._SX450_.jpg":[318,450],"https://images-na.ssl-images-amazon.com/images/I/71QthPBNgLL._SX425_.jpg":[301,425],"https://images-na.ssl-images-amazon.com/images/I/71QthPBNgLL._SX466_.jpg":[330,466],"https://images-na.ssl-images-amazon.com/images/I/71QthPBNgLL._SX522_.jpg":[369,522],"https://images-na.ssl-images-amazon.com/images/I/71QthPBNgLL._SX569_.jpg":[402,569],"https://images-na.ssl-images-amazon.com/images/I/71QthPBNgLL._SX679_.jpg":[480,679]},"variant":"PT07","lowRes":null},{"hiRes":"https://images-na.ssl-images-amazon.com/images/I/71xAVDnbRzL._SL1500_.jpg","thumb":"https://images-na.ssl-images-amazon.com/images/I/41LXguq9kAL._SS40_.jpg","large":"https://images-na.ssl-images-amazon.com/images/I/41LXguq9kAL.jpg","main":{"https://images-na.ssl-images-amazon.com/images/I/71xAVDnbRzL._SX355_.jpg":[250,355],"https://images-na.ssl-images-amazon.com/images/I/71xAVDnbRzL._SX450_.jpg":[316,450],"https://images-na.ssl-images-amazon.com/images/I/71xAVDnbRzL._SX425_.jpg":[299,425],"https://images-na.ssl-images-amazon.com/images/I/71xAVDnbRzL._SX466_.jpg":[328,466],"https://images-na.ssl-images-amazon.com/images/I/71xAVDnbRzL._SX522_.jpg":[367,522],"https://images-na.ssl-images-amazon.com/images/I/71xAVDnbRzL._SX569_.jpg":[400,569],"https://images-na.ssl-images-amazon.com/images/I/71xAVDnbRzL._SX679_.jpg":[477,679]},"variant":"PT08","lowRes":null}]},

                'colorToAsin': {'initial': {}},

                'holderRatio': 1.0,

                'holderMaxHeight': 700,

                'heroImage': {'initial': []},

                'heroVideo': {'initial': []},

                'spin360ColorData': {'initial': {}},

                'spin360ColorEnabled': {'initial': 0},

                'spin360ConfigEnabled': false,

                'spin360LazyLoadEnabled': false,

                'playVideoInImmersiveView':'false',

                'tabbedImmersiveViewTreatment':'T2',

                'totalVideoCount':'0',

                'videoIngressATFSlateThumbURL':'',

                'mediaTypeCount':'0',

                'atfEnhancedHoverOverlay' : true,

                'winningAsin': 'B07HGMDY4T',

                'weblabs' : {},

                'aibExp3Layout' : 1,

                'aibRuleName' : 'R17',

                'acEnabled' : false

                };

    A.trigger('P.AboveTheFold'); // trigger ATF event.

    return data;

});

如果您想坚持使用正则表达式解决方案,可以尝试如下匹配:r'\hiRes\:\https.*?\.jpg\'

快速解释:

匹配任何以hiRes:https开头,以.jpg结尾的字符串 使用捕获组获取实际的URL这假设所有URL都以https开头,以.jpg结尾
您可以将上面的正则表达式与re.match或re.search一起使用,希望得到您想要的。您应该能够从这里提取匹配组。请参阅id您需要更多详细信息。

也许您的示例中缺少了一些内容,但data是一个字典,您可以直接为图像引用,例如data['colorImages']['initial'][0]['hiRes']。没有要应用正则表达式的字符串。如果我不够清楚,很抱歉。这段代码本身就是一个字符串,我使用XPath//script[contains.,ImageBlockATF]/text从Amazon页面中提取了该代码。这个示例是我从该产品中提取的。很抱歉回答得这么晚,我忙了几天。这很有效!,最终使用了re.findall'\hiRes\:\https.*?\.jpg\'图像。谢谢你的帮助!