Python请求和持久会话

Python请求和持久会话,python,python-requests,Python,Python Requests,我正在使用该模块(Python2.5版本为0.10.0)。 我已经知道如何将数据提交到网站上的登录表单并检索会话密钥,但我看不到在后续请求中使用此会话密钥的明显方法。 有人可以在下面的代码中填写省略号或建议其他方法吗 >>> import requests >>> login_data = {'formPosted':'1', 'login_email':'me@example.com', 'password':'pw'} >>> r =

我正在使用该模块(Python2.5版本为0.10.0)。 我已经知道如何将数据提交到网站上的登录表单并检索会话密钥,但我看不到在后续请求中使用此会话密钥的明显方法。 有人可以在下面的代码中填写省略号或建议其他方法吗

>>> import requests
>>> login_data =  {'formPosted':'1', 'login_email':'me@example.com', 'password':'pw'}
>>> r = requests.post('https://localhost/login.py', login_data)
>>> 
>>> r.text
u'You are being redirected <a href="profilePage?_ck=1349394964">here</a>'
>>> r.cookies
{'session_id_myapp': '127-0-0-1-825ff22a-6ed1-453b-aebc-5d3cf2987065'}
>>> 
>>> r2 = requests.get('https://localhost/profile_data.json', ...)
导入请求 >>>login_data={'formPosted':'1','login_email':'me@example.com','密码':'pw'} >>>r=请求。post('https://localhost/login.py,登录名(U数据) >>> >>>r.text “你被重定向了” >>>曲奇饼 {'session\u id\u myapp':'127-0-0-1-825ff22a-6ed1-453b-aebc-5d3cf2987065'} >>> >>>r2=请求。获取('https://localhost/profile_data.json', ...)
文档中说,
get
引入了一个可选的
cookies
参数,允许您指定要使用的cookies:

从文档中:

>>> url = 'http://httpbin.org/cookies'
>>> cookies = dict(cookies_are='working')

>>> r = requests.get(url, cookies=cookies)
>>> r.text
'{"cookies": {"cookies_are": "working"}}'

您可以使用以下方法轻松创建持久会话:

s = requests.Session()
在此之后,继续您的请求,您将:

s.post('https://localhost/login.py', login_data)
#logged in! cookies saved for future requests.
r2 = s.get('https://localhost/profile_data.json', ...)
#cookies sent automatically!
#do whatever, s will keep your cookies intact :)

有关会话的更多信息:

请查看我在这个类似问题中的答案:

编辑:


我知道我的回答遭到了一些反对票,但没有解释性的评论。我猜这是因为我指的是
urllib
库,而不是
请求
。我之所以这样做,是因为OP请求帮助
请求
,或者有人建议另一种方法。

其他答案有助于理解如何维持这样的会话。另外,我想提供一个类,它可以在脚本的不同运行期间(使用缓存文件)保持会话的维护。这意味着只有在需要时才执行正确的“登录”(超时或缓存中不存在会话)。此外,它还支持后续调用“get”或“post”时的代理设置

它是用Python3测试的

使用它作为您自己代码的基础。以下代码段随GPLv3一起发布

import pickle
import datetime
import os
from urllib.parse import urlparse
import requests    

class MyLoginSession:
    """
    a class which handles and saves login sessions. It also keeps track of proxy settings.
    It does also maintine a cache-file for restoring session data from earlier
    script executions.
    """
    def __init__(self,
                 loginUrl,
                 loginData,
                 loginTestUrl,
                 loginTestString,
                 sessionFileAppendix = '_session.dat',
                 maxSessionTimeSeconds = 30 * 60,
                 proxies = None,
                 userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
                 debug = True,
                 forceLogin = False,
                 **kwargs):
        """
        save some information needed to login the session

        you'll have to provide 'loginTestString' which will be looked for in the
        responses html to make sure, you've properly been logged in

        'proxies' is of format { 'https' : 'https://user:pass@server:port', 'http' : ...
        'loginData' will be sent as post data (dictionary of id : value).
        'maxSessionTimeSeconds' will be used to determine when to re-login.
        """
        urlData = urlparse(loginUrl)

        self.proxies = proxies
        self.loginData = loginData
        self.loginUrl = loginUrl
        self.loginTestUrl = loginTestUrl
        self.maxSessionTime = maxSessionTimeSeconds
        self.sessionFile = urlData.netloc + sessionFileAppendix
        self.userAgent = userAgent
        self.loginTestString = loginTestString
        self.debug = debug

        self.login(forceLogin, **kwargs)

    def modification_date(self, filename):
        """
        return last file modification date as datetime object
        """
        t = os.path.getmtime(filename)
        return datetime.datetime.fromtimestamp(t)

    def login(self, forceLogin = False, **kwargs):
        """
        login to a session. Try to read last saved session from cache file. If this fails
        do proper login. If the last cache access was too old, also perform a proper login.
        Always updates session cache file.
        """
        wasReadFromCache = False
        if self.debug:
            print('loading or generating session...')
        if os.path.exists(self.sessionFile) and not forceLogin:
            time = self.modification_date(self.sessionFile)         

            # only load if file less than 30 minutes old
            lastModification = (datetime.datetime.now() - time).seconds
            if lastModification < self.maxSessionTime:
                with open(self.sessionFile, "rb") as f:
                    self.session = pickle.load(f)
                    wasReadFromCache = True
                    if self.debug:
                        print("loaded session from cache (last access %ds ago) "
                              % lastModification)
        if not wasReadFromCache:
            self.session = requests.Session()
            self.session.headers.update({'user-agent' : self.userAgent})
            res = self.session.post(self.loginUrl, data = self.loginData, 
                                    proxies = self.proxies, **kwargs)

            if self.debug:
                print('created new session with login' )
            self.saveSessionToCache()

        # test login
        res = self.session.get(self.loginTestUrl)
        if res.text.lower().find(self.loginTestString.lower()) < 0:
            raise Exception("could not log into provided site '%s'"
                            " (did not find successful login string)"
                            % self.loginUrl)

    def saveSessionToCache(self):
        """
        save session to a cache file
        """
        # always save (to update timeout)
        with open(self.sessionFile, "wb") as f:
            pickle.dump(self.session, f)
            if self.debug:
                print('updated session cache-file %s' % self.sessionFile)

    def retrieveContent(self, url, method = "get", postData = None, **kwargs):
        """
        return the content of the url with respect to the session.

        If 'method' is not 'get', the url will be called with 'postData'
        as a post request.
        """
        if method == 'get':
            res = self.session.get(url , proxies = self.proxies, **kwargs)
        else:
            res = self.session.post(url , data = postData, proxies = self.proxies, **kwargs)

        # the session has been updated on the server, so also update in cache
        self.saveSessionToCache()            

        return res

在尝试上述所有答案后,我发现在后续请求中使用“RequestScookejar”而不是常规的CookieJar解决了我的问题

导入请求
导入json
#登录URL
authUrl=https://whatever.com/login'
#随后的URL
testUrl='1〕https://whatever.com/someEndpoint'
#注销URL
testlogoutUrl=https://whatever.com/logout'
#无论你发布什么
login_data={'formPosted':'1',
“登录电子邮件”:me@example.com', 
“密码”:“pw”
}
#我们将从身份验证请求接收的身份验证令牌或任何其他数据。
令牌=“”
#发布登录请求
loginRequest=requests.post(authUrl,登录数据)
打印(“{}”。格式(loginRequest.text))
#将请求内容保存到变量中。在本例中,我需要一个名为token的字段。
token=str(json.loads(loginRequest.content)['token'])#或['access\u token']
打印(“{}”。格式(令牌))
#验证登录是否成功
打印(“{}”。格式(loginRequest.status_代码))
#为后续请求创建请求Cookie Jar并添加Cookie
jar=requests.cookies.requestScookeJar()
jar.set('LWSSO\u COOKIE\u KEY',令牌)
#使用请求Cookie Jar集执行下一个请求
r=requests.get(testUrl,cookies=jar)
打印(“R.TEXT:{}”。格式(R.TEXT))
打印(“R.STCD:{}”。格式(R.status\u代码))
#使用请求Cookie Jar集执行注销请求
r=requests.delete(testlogoutUrl,cookies=jar)
打印(“R.TEXT:{}”。格式(R.TEXT))#应显示“请求未授权”
打印(“R.STCD:{}”。格式(R.status_code))#应显示401

用于检索json数据的代码段,受密码保护

import requests

username = "my_user_name"
password = "my_super_secret"
url = "https://www.my_base_url.com"
the_page_i_want = "/my_json_data_page"

session = requests.Session()
# retrieve cookie value
resp = session.get(url+'/login')
csrf_token = resp.cookies['csrftoken']
# login, add referer
resp = session.post(url+"/login",
                  data={
                      'username': username,
                      'password': password,
                      'csrfmiddlewaretoken': csrf_token,
                      'next': the_page_i_want,
                  },
                  headers=dict(Referer=url+"/login"))
print(resp.json())

这将在Python中为您工作

# Call JIRA API with HTTPBasicAuth
import json
import requests
from requests.auth import HTTPBasicAuth

JIRA_EMAIL = "****"
JIRA_TOKEN = "****"
BASE_URL = "https://****.atlassian.net"
API_URL = "/rest/api/3/serverInfo"

API_URL = BASE_URL+API_URL

BASIC_AUTH = HTTPBasicAuth(JIRA_EMAIL, JIRA_TOKEN)
HEADERS = {'Content-Type' : 'application/json;charset=iso-8859-1'}

response = requests.get(
    API_URL,
    headers=HEADERS,
    auth=BASIC_AUTH
)

print(json.dumps(json.loads(response.text), sort_keys=True, indent=4, separators=(",", ": ")))

只保存所需的cookie并重新使用它们

import os
import pickle
from urllib.parse import urljoin, urlparse

login = 'my@email.com'
password = 'secret'
# Assuming two cookies are used for persistent login.
# (Find it by tracing the login process)
persistentCookieNames = ['sessionId', 'profileId']
URL = 'http://example.com'
urlData = urlparse(URL)
cookieFile = urlData.netloc + '.cookie'
signinUrl = urljoin(URL, "/signin")
with requests.Session() as session:
    try:
        with open(cookieFile, 'rb') as f:
            print("Loading cookies...")
            session.cookies.update(pickle.load(f))
    except Exception:
        # If could not load cookies from file, get the new ones by login in
        print("Login in...")
        post = session.post(
            signinUrl,
            data={
                'email': login,
                'password': password,
            }
        )
        try:
            with open(cookieFile, 'wb') as f:
                jar = requests.cookies.RequestsCookieJar()
                for cookie in session.cookies:
                    if cookie.name in persistentCookieNames:
                        jar.set_cookie(cookie)
                pickle.dump(jar, f)
        except Exception as e:
            os.remove(cookieFile)
            raise(e)
    MyPage = urljoin(URL, "/mypage")
    page = session.get(MyPage)

我不是你们的反对者之一,但作为猜测,许多读者可能会美化OP的最后一句话:“有人可以在下面的代码中填写省略号,或者建议另一种方法(请求库需要对我的代码进行更多的大手术,而不仅仅是用其他东西填写省略号)。”-但这只是我的猜测。作为OP,我可以说你的回答提供了一个有用的选择。如果只是为了证明
请求
为一个需要3个库才能实现的问题提供了一个简单而高级的解决方案。有什么方法可以在脚本运行之间保存会话本身吗?可以将pickle.dump会话cookie保存到pickle.dump(Session.cookies.\u cookies,file)这样的文件中吗和pickle.load to session类似,cookies=pickle.load(file)cj=requests.cookies.requestScookejar()cj.\u cookies=cookies和session.cookies=cj如果我涉及代理怎么办?对于发送到
localhost
的请求,web服务器返回的登录和其他cookies可能会出现问题,如果它们包含不正确的域属性值。对于
localhost
,web服务器应返回域属性设置为
localhost.local
的cookie,否则cookie将不会应用于会话。在这种情况下,请使用
127.0.0.1
而不是
localhost
@SergeyNudnov非常感谢您的评论我浪费了很多时间试图找出会话无法正确处理Cookie的原因。将域从localhost更改为localhost.local解决了此问题。再次感谢。这是一个很好的答案,搜索此解决方案也异常困难。不应该作为请求模块的一部分实现吗?它确实使用了
请求
模块。作为模块的一部分,您将如何实施它?或者说@user1602是什么意思?回答得很好!如果我可以问一下,很久以前你就知道这一点了。你/某人能解释一下这行代码吗:res.text.lower().find(self.loginTestString.lower())<0?这是否意味着它查找字符串,如果未找到匹配项,则返回0?如果未在字符串中找到搜索字符串,则字符串的find方法将返回
-1
(即,此处:查找
self.loginTestString.lower()
in
res.text.lower()
)谢谢
# Call JIRA API with HTTPBasicAuth
import json
import requests
from requests.auth import HTTPBasicAuth

JIRA_EMAIL = "****"
JIRA_TOKEN = "****"
BASE_URL = "https://****.atlassian.net"
API_URL = "/rest/api/3/serverInfo"

API_URL = BASE_URL+API_URL

BASIC_AUTH = HTTPBasicAuth(JIRA_EMAIL, JIRA_TOKEN)
HEADERS = {'Content-Type' : 'application/json;charset=iso-8859-1'}

response = requests.get(
    API_URL,
    headers=HEADERS,
    auth=BASIC_AUTH
)

print(json.dumps(json.loads(response.text), sort_keys=True, indent=4, separators=(",", ": ")))
import os
import pickle
from urllib.parse import urljoin, urlparse

login = 'my@email.com'
password = 'secret'
# Assuming two cookies are used for persistent login.
# (Find it by tracing the login process)
persistentCookieNames = ['sessionId', 'profileId']
URL = 'http://example.com'
urlData = urlparse(URL)
cookieFile = urlData.netloc + '.cookie'
signinUrl = urljoin(URL, "/signin")
with requests.Session() as session:
    try:
        with open(cookieFile, 'rb') as f:
            print("Loading cookies...")
            session.cookies.update(pickle.load(f))
    except Exception:
        # If could not load cookies from file, get the new ones by login in
        print("Login in...")
        post = session.post(
            signinUrl,
            data={
                'email': login,
                'password': password,
            }
        )
        try:
            with open(cookieFile, 'wb') as f:
                jar = requests.cookies.RequestsCookieJar()
                for cookie in session.cookies:
                    if cookie.name in persistentCookieNames:
                        jar.set_cookie(cookie)
                pickle.dump(jar, f)
        except Exception as e:
            os.remove(cookieFile)
            raise(e)
    MyPage = urljoin(URL, "/mypage")
    page = session.get(MyPage)