Python 3.x 使用Cookie通过scrapy请求更改货币
我正试图从中提取数据。下面是我为完成这项工作而编写的代码Python 3.x 使用Cookie通过scrapy请求更改货币,python-3.x,web-scraping,cookies,python-requests,scrapy,Python 3.x,Web Scraping,Cookies,Python Requests,Scrapy,我正试图从中提取数据。下面是我为完成这项工作而编写的代码 cookies = {'.AspNetCore.Antiforgery.M6FGfmlbnTQ': 'CfDJ8A-eCRip5cVAs8-wkFU1H8dmFA0OIoxp6pQh1dcRvnpdNHKpKwFF2EhHSg5c-yqb-DriS6AIiwgnIpzyMiiphd42Un2v0-X6l8ePR4KkjG_CR2a_u6qrFOZNrRg0jrhliPOhyV6TKCtu6k-rn92lVMI', 'uCurr': 'E
cookies = {'.AspNetCore.Antiforgery.M6FGfmlbnTQ': 'CfDJ8A-eCRip5cVAs8-wkFU1H8dmFA0OIoxp6pQh1dcRvnpdNHKpKwFF2EhHSg5c-yqb-DriS6AIiwgnIpzyMiiphd42Un2v0-X6l8ePR4KkjG_CR2a_u6qrFOZNrRg0jrhliPOhyV6TKCtu6k-rn92lVMI',
'uCurr': 'EUR'}
header = {'User-Agent': 'APIs-Google (+https://developers.google.com/webmasters/APIs-Google.html)',
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
'X-Requested-With': 'XMLHttpRequest'}
formdata = {'query': 'AR-LCAAAAAAAAApNjr0KwkAQhN9l6gvsXn5M1iqohZUSkweImqjgD6LYhLy7c0UgcMUcM7PzDXjBBrSwRByOMHjxGkkWSVKrmggfHE4zR3XunGGxjwuHbhI9LE8K73CZxJXtVblm-ka1aSqqO8znDg-YZqkLGBo7vOmL0m7DYveMmgM_Hx7imS8rWpDzB1swQmqU22q_q-rQCPSBYBlGw0IqzOk4_gFLkZV45QAAAA2',
'discount': 'AG51bGw1',
'__RequestVerificationToken': 'CfDJ8A-eCRip5cVAs8-wkFU1H8fDgaTCPGKJ6FVhqBSItqEvJHfZlsE0bsh4EKYxRKIpb3CDhue0bRj0jOgsLjyHTIvvEolzWwGt1CDfRlgnXrfO8NZiEE3ZQhIyeVShd5-AxG-QhQp4zSE3WeWo7Fr40DI'}
url = 'https://www.carflexi.com/data/getDeals'
In [47]: fetch(scrapy.FormRequest(url, method='POST', headers=header, formdata=formdata,cookies = cookies))
2021-04-25 19:21:11 [scrapy.core.engine] DEBUG: Crawled (200) <POST https://www.carflexi.com/data/getDeals> (referer: None)
Proxy nova是一个免费的代理提供商,我们正在尝试在生成请求之前从该网站获取代理地址。我使用selenium获取代理IP
import scrapy
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
class CarflexiSpider(scrapy.Spider):
name = 'carflexi'
allowed_domains = ['www.carflexi.com']
start_urls = ['https://www.carflexi.com/data/getDeals']
def __init__(self):
self.proxy_adress_is = self.proxy_adress()
def proxy_adress(self):
url = "https://www.proxynova.com/proxy-server-list/country-de/"
opts = Options()
opts.add_argument('--headless')
driver = webdriver.Chrome(options=opts,executable_path="chromedriver")
driver.get(url)
time.sleep(3)
proxy_ip = driver.find_element_by_xpath('//*[@id="tbl_proxy_list"]/tbody[1]/tr[1]/td[1]/abbr').text
proxy_ip = proxy_ip.replace(",","").strip()
proxy_port = driver.find_element_by_xpath('//*[@id="tbl_proxy_list"]/tbody[1]/tr[1]/td[2]').text
proxy_port = proxy_port.replace(",","").strip()
driver.close()
return proxy_ip+":"+proxy_port
def parse(self, response):
url = 'https://www.carflexi.com/data/getDeals'
request = scrapy.Request(
url, callback=self.parse_product)
request.meta['proxy'] = self.proxy_adress_is
yield request
def parse_product(self,response):
pass
proxy_Address功能将允许您连接到德国IP,然后您可以获得欧元价格
我没有使用表单请求。您将根据脚本进行更改。这称为地理围栏。web服务器正在检查连接到它的客户端的IP地址,并将其与位于该国家/地区内的已知IP范围列表进行比较。你不能得到欧元,你必须使用VPN或代理。如果您想使用代理,我将帮助您提供免费的selenium代理脚本。@MuratDemir请共享该脚本。另外,是否有任何方法可以更改本地计算机的货币/国家/地区以避开此问题?您可以使用代理进行此操作again@MuratDemir你能分享你的剧本吗?当然,让我为你准备
import scrapy
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
class CarflexiSpider(scrapy.Spider):
name = 'carflexi'
allowed_domains = ['www.carflexi.com']
start_urls = ['https://www.carflexi.com/data/getDeals']
def __init__(self):
self.proxy_adress_is = self.proxy_adress()
def proxy_adress(self):
url = "https://www.proxynova.com/proxy-server-list/country-de/"
opts = Options()
opts.add_argument('--headless')
driver = webdriver.Chrome(options=opts,executable_path="chromedriver")
driver.get(url)
time.sleep(3)
proxy_ip = driver.find_element_by_xpath('//*[@id="tbl_proxy_list"]/tbody[1]/tr[1]/td[1]/abbr').text
proxy_ip = proxy_ip.replace(",","").strip()
proxy_port = driver.find_element_by_xpath('//*[@id="tbl_proxy_list"]/tbody[1]/tr[1]/td[2]').text
proxy_port = proxy_port.replace(",","").strip()
driver.close()
return proxy_ip+":"+proxy_port
def parse(self, response):
url = 'https://www.carflexi.com/data/getDeals'
request = scrapy.Request(
url, callback=self.parse_product)
request.meta['proxy'] = self.proxy_adress_is
yield request
def parse_product(self,response):
pass