Python 403 4-5个请求后的状态代码,每个请求之间有超时

Python 403 4-5个请求后的状态代码,每个请求之间有超时,python,web-scraping,python-requests,http-headers,Python,Web Scraping,Python Requests,Http Headers,我正在试图弄清楚我的标题是否有问题或者cookies,因为我不知道为什么在3-8次请求后会出现403状态代码 我正在尝试从此url访问数据: 用ticker表示我感兴趣的股票代码 这是我的密码: import os import sys sys.path.append(os.path.abspath(r'..\..\..\.')) from STOCK_DATA.utils import * import pandas as pd import numpy as np import

我正在试图弄清楚我的
标题是否有问题
或者
cookies
,因为我不知道为什么在3-8次请求后会出现403状态代码

我正在尝试从此url访问数据:

ticker
表示我感兴趣的股票代码

这是我的密码:

import os
import sys
sys.path.append(os.path.abspath(r'..\..\..\.'))

from STOCK_DATA.utils import *





import pandas as pd
import numpy as np
import requests_html
from bs4 import BeautifulSoup
import math

import glob
import re
import random
# loading various user agents
USER_AGENTS =  load_agents()

TICKERS = ['aapl', 'tsla', 'fvrr', 'upwk', 'xone', 'ptmn', 'gogl', 'old',
           'urov', 'mlpc', 'nss', 'pine', 'sxi', 'cai', 'pcgu', 'lcnb',
           'psl', 'adm', 'nkx', 'len.b', 'ndp', 'eyesw', 'metpe', 'play',
           'eig', 'dx', 'lvhd', 'rnmc', 'zyxi', 'qtec', 'cfx', 'mua',
           'nvmi', 'tqqq', 'dcp', 'unma', 'nymt', 'dxyn', 'fthm', 'abg',
           'bsl', 'eqrr', 'gnus', 'ccb', 'thmo', 'chmipb', 'uonek', 'agope',
           'kl', 'rdhl', 'vctr', 'rndv', 'bg', 'tse', 'bgh', 'swm', 'acbi',
           'brgpd', 'fedu', 'mdna', 'tzacu', 'nid', 'happ', 'bkpc', 'e',
           'glre', 'infu', 'cffa', 'rdy', 'wrbpb', 'ctic', 'itub', 'all',
           'mgrc', 'mfin', 'dsxpb', 'ameh', 'ulta', 'ahtpd', 'bap', 'cbpo',
           'csa', 'holx', 'tlk', 'opy', 'phk', 'bprn', 'mlp', 'bscm', 'dzz',
           'tslx', 'ce', 'ibth',
           'sbpc', 'hsbcpa', 'ainv', 'xpo', 'dfphu', 'pdi', 'dgly'] 
下面的函数发出两个请求: 第一个请求只是获取cookie,因为在没有任何cookie的情况下直接请求api会导致
403

def sa_test3(ticker):

    # get random user agent
    ua = random.sample(USER_AGENTS, 1)[0].strip()

    headers = {
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding':  'gzip, deflate, br',
    'Accept-Language':  'en-US,en;q=0.5',
    'Connection':   'keep-alive',
    'Host': 'seekingalpha.com',
    'Upgrade-Insecure-Requests':    '1',
    'User-Agent': ua,
    }

    s = requests.Session()
    s.headers.update(headers)

    
    # make a non-api call to url to update cookies for session
    url = f'https://seekingalpha.com/symbol/{ticker}'
    s.get(url)



    # prep for real api call

    params = (
        ('fields[]', 'long_desc'),
        ('slugs', f'{ticker}'),
    )

    headers2 = headers.update({'Referer': f'https://seekingalpha.com/symbol/{ticker}'})

    response = s.get('https://seekingalpha.com/api/v3/symbol_data',
                     params=params, headers = headers2)

    # Example url = https://seekingalpha.com/api/v3/symbol_data?fields[]=long_desc&slugs=TSLA



    print(response.status_code)
现在,我们只需循环浏览每个股票代码并添加
睡眠

def scraper():
    for ticker in TICKERS:
        sa_test3(ticker)
        time.sleep(random.randint(5, 10))
我能够成功地获得3-10个请求,但是
403
开始弹出。 我不太清楚是什么引发了反机器人技术。 我尝试增加
sleep
参数,但没有成功

多谢各位

以下是便于复制和粘贴的完整代码:

import os
import sys
sys.path.append(os.path.abspath(r'..\..\..\.'))
import pandas as pd
import numpy as np
import requests_html
from bs4 import BeautifulSoup
import math
import glob
import re
import random
from STOCK_DATA.utils import *








# loading various user agents
USER_AGENTS =  load_agents()

TICKERS = ['aapl', 'tsla', 'fvrr', 'upwk', 'xone', 'ptmn', 'gogl', 'old',
           'urov', 'mlpc', 'nss', 'pine', 'sxi', 'cai', 'pcgu', 'lcnb',
           'psl', 'adm', 'nkx', 'len.b', 'ndp', 'eyesw', 'metpe', 'play',
           'eig', 'dx', 'lvhd', 'rnmc', 'zyxi', 'qtec', 'cfx', 'mua',
           'nvmi', 'tqqq', 'dcp', 'unma', 'nymt', 'dxyn', 'fthm', 'abg',
           'bsl', 'eqrr', 'gnus', 'ccb', 'thmo', 'chmipb', 'uonek', 'agope',
           'kl', 'rdhl', 'vctr', 'rndv', 'bg', 'tse', 'bgh', 'swm', 'acbi',
           'brgpd', 'fedu', 'mdna', 'tzacu', 'nid', 'happ', 'bkpc', 'e',
           'glre', 'infu', 'cffa', 'rdy', 'wrbpb', 'ctic', 'itub', 'all',
           'mgrc', 'mfin', 'dsxpb', 'ameh', 'ulta', 'ahtpd', 'bap', 'cbpo',
           'csa', 'holx', 'tlk', 'opy', 'phk', 'bprn', 'mlp', 'bscm', 'dzz',
           'tslx', 'ce', 'ibth',
           'sbpc', 'hsbcpa', 'ainv', 'xpo', 'dfphu', 'pdi', 'dgly']




def sa_test3(ticker):

    # get random user agent
    ua = random.sample(USER_AGENTS, 1)[0].strip()

    headers = {
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Encoding':  'gzip, deflate, br',
    'Accept-Language':  'en-US,en;q=0.5',
    'Connection':   'keep-alive',
    'Host': 'seekingalpha.com',
    'Upgrade-Insecure-Requests':    '1',
    'User-Agent': ua,
    }

    s = requests.Session()
    s.headers.update(headers)

    
    # make a non-api call to url to update cookies for session
    url = f'https://seekingalpha.com/symbol/{ticker}'
    s.get(url)



    # prep for real api call

    params = (
        ('fields[]', 'long_desc'),
        ('slugs', f'{ticker}'),
    )

    headers2 = headers.update({'Referer': f'https://seekingalpha.com/symbol/{ticker}'})

    response = s.get('https://seekingalpha.com/api/v3/symbol_data',
                     params=params, headers = headers2)

    # Example url = https://seekingalpha.com/api/v3/symbol_data?fields[]=long_desc&slugs=TSLA



    print(response.status_code)

        

        

        

    

def scraper():

    for ticker in TICKERS:
        sa_test3(ticker)
        time.sleep(random.randint(8, 10))
        
        
  
    
if __name__ == '__main__':
    
    scraper()
    

被类似的东西粘住了。你明白了吗?不,对不起。如果我弄明白了,我会发帖的。