Python 3.x 赢得Unicode错误'；即使添加了encoding='；utf-8'；命令_Python 3.x_Python Unicode

Python 3.x 赢得Unicode错误'；即使添加了encoding='；utf-8'；命令

python-3.x

Python 3.x 赢得Unicode错误'；即使添加了encoding='；utf-8'；命令,python-3.x,python-unicode,Python 3.x,Python Unicode,我正试图从这个网站上搜刮。但我发现了一个unicode错误。我做了一些搜索，它似乎是一个编码问题？但在添加编码class='utf-8'后，它并没有消失。不确定问题是什么 import bs4 as bs import urllib.request import csv import numpy as np base_url = "https://www.mobygames.com/developer/sheet/view/developerId,"

我正试图从这个网站上搜刮。但我发现了一个unicode错误。我做了一些搜索，它似乎是一个编码问题？但在添加编码class='utf-8'后，它并没有消失。不确定问题是什么

    import bs4 as bs
    import urllib.request
    import csv
    import numpy as np


    base_url = "https://www.mobygames.com/developer/sheet/view/developerId,"
    url_list =[]

    with open('url.csv', 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            url_list.append(row[0])

    def extract(gameurl):
        req = urllib.request.Request(gameurl,headers={'User-Agent': 'Mozilla/5.0'})
        sauce = urllib.request.urlopen(req).read()
        soup = bs.BeautifulSoup(sauce,'lxml')
        infopage = soup.find_all("div", {"class":"col-md-8 col-lg-8"})
        core_list =[]

        for credits in infopage:
            niceHeaderTitle = credits.find_all("h1", {"class":"niceHeaderTitle"})
            name = niceHeaderTitle[0].text

            Titles = credits.find_all("h3", {"class":"clean"})

            Titles = [title.get_text() for title in Titles]

            tr = credits.find_all("tr")

            for i in range(len(tr)):
                row = tr[i].get_text(strip=True)
                if row in Titles:
                    title = row
                elif len(row) > 1:
                    games=[name,title,row]
                    core_list.append(games)

            core_list = np.matrix(core_list)

            return core_list



    def csv_write(url_data):
        with open ('HRdata.csv','a',encoding='utf-8') as file:
            writer=csv.writer(file)
            for row in url_data:
                writer.writerow(row)

    for url in url_list:
        link = base_url + url            
        url_data = extract(link)
        csv_write(url_data)

我认为这是因为当我试图将其写入csv文件时，我添加了encoding='utf-8'，但它不起作用。。。我不知道该怎么解决这个问题

这是错误消息

---------------------------------------------------------------------------
UnicodeEncodeError                        Traceback (most recent call last)
<ipython-input-22-31928933be8c> in <module>()
     52 for url in url_list:
     53     link = base_url + url
---> 54     url_data = extract(link)
     55     csv_write(url_data)
     56 

<ipython-input-22-31928933be8c> in extract(gameurl)
     15 def extract(gameurl):
     16     req = urllib.request.Request(gameurl,headers={'User-Agent': 'Mozilla/5.0'})
---> 17     sauce = urllib.request.urlopen(req).read()
     18     soup = bs.BeautifulSoup(sauce,'lxml')
     19     infopage = soup.find_all("div", {"class":"col-md-8 col-lg-8"})

C:\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    221     else:
    222         opener = _opener
--> 223     return opener.open(url, data, timeout)
    224 
    225 def install_opener(opener):

C:\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
    524             req = meth(req)
    525 
--> 526         response = self._open(req, data)
    527 
    528         # post-process response

C:\Anaconda3\lib\urllib\request.py in _open(self, req, data)
    542         protocol = req.type
    543         result = self._call_chain(self.handle_open, protocol, protocol +
--> 544                                   '_open', req)
    545         if result:
    546             return result

C:\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
    502         for handler in handlers:
    503             func = getattr(handler, meth_name)
--> 504             result = func(*args)
    505             if result is not None:
    506                 return result

C:\Anaconda3\lib\urllib\request.py in https_open(self, req)
   1359         def https_open(self, req):
   1360             return self.do_open(http.client.HTTPSConnection, req,
-> 1361                 context=self._context, check_hostname=self._check_hostname)
   1362 
   1363         https_request = AbstractHTTPHandler.do_request_

C:\Anaconda3\lib\urllib\request.py in do_open(self, http_class, req, **http_conn_args)
   1316             try:
   1317                 h.request(req.get_method(), req.selector, req.data, headers,
-> 1318                           encode_chunked=req.has_header('Transfer-encoding'))
   1319             except OSError as err: # timeout error
   1320                 raise URLError(err)

C:\Anaconda3\lib\http\client.py in request(self, method, url, body, headers, encode_chunked)
   1237                 encode_chunked=False):
   1238         """Send a complete request to the server."""
-> 1239         self._send_request(method, url, body, headers, encode_chunked)
   1240 
   1241     def _send_request(self, method, url, body, headers, encode_chunked):

C:\Anaconda3\lib\http\client.py in _send_request(self, method, url, body, headers, encode_chunked)
   1248             skips['skip_accept_encoding'] = 1
   1249 
-> 1250         self.putrequest(method, url, **skips)
   1251 
   1252         # chunked encoding will happen if HTTP/1.1 is used and either

C:\Anaconda3\lib\http\client.py in putrequest(self, method, url, skip_host, skip_accept_encoding)
   1115 
   1116         # Non-ASCII characters should have been eliminated earlier
-> 1117         self._output(request.encode('ascii'))
   1118 
   1119         if self._http_vsn == 11:

UnicodeEncodeError: 'ascii' codec can't encode characters in position 38-40: ordinal not in range(128)

---------------------------------------------------------------------------
UnicodeEncodeError回溯（最近一次呼叫最后一次）
在（）
52对于url_列表中的url：
53链接=基本url+url
--->54 url_数据=提取（链接）
55 csv_写入（url_数据）
56
在摘录中（gameurl）
15 def摘录（游戏URL）：
16 req=urllib.request.request（gameurl，headers={'User-Agent'：'Mozilla/5.0'}）
--->17 sause=urllib.request.urlopen（req.read）（）
18汤=bs.BeautifulSoup（酱汁，'lxml'）
19 infopage=soup.find_all（“div”，“class”：“col-md-8 col-lg-8”}）
urlopen中的C:\Anaconda3\lib\urllib\request.py（url、数据、超时、cafile、capath、cadefault、上下文）
221其他：
222开瓶器=_开瓶器
-->223返回opener.open（url、数据、超时）
224
225 def安装开启器（开启器）：
C:\Anaconda3\lib\urllib\request.py处于打开状态（self、fullurl、data、timeout）
524要求=方法（要求）
525
-->526响应=自身打开（请求，数据）
527
528#过程后响应
C:\Anaconda3\lib\urllib\request.py处于打开状态（self、req、data）
542协议=请求类型
543结果=self.\u调用\u链（self.handle\u打开，协议，协议+
-->544'_open'，请求）
545如果结果：
546返回结果
C:\Anaconda3\lib\urllib\request.py在调用链中（self、chain、kind、meth\u name、*args）
502对于处理程序中的处理程序：
503 func=getattr（处理程序，方法名称）
-->504结果=函数（*args）
505如果结果不是无：
506返回结果
https\u open（self，req）中的C:\Anaconda3\lib\urllib\request.py
1359 def https_打开（自身，需要）：
1360返回self.do_open（http.client.HTTPSConnection，req，
->1361 context=self.\u context，check\u hostname=self.\u check\u hostname）
1362
1363 https\u request=AbstractHTTPHandler.do\u request_
C:\Anaconda3\lib\urllib\request.py在do_open中（self，http_类，req，**http_conn_参数）
1316尝试：
1317 h.request（请求获取方法（）、请求选择器、请求数据、标题、，
->1318 encode_chunked=req.has_头（'Transfer-encoding'））
1319除OSError as err外：#超时错误
1320错误（err）
请求中的C:\Anaconda3\lib\http\client.py（self、方法、url、正文、标题、编码\u分块）
1237 encode_chunked=False）：
1238“向服务器发送完整的请求。”“”
->1239 self.\u发送\u请求（方法、url、正文、标题、编码\u分块）
1240
1241 def_send_请求（self、method、url、body、header、encode_chunked）：
C:\Anaconda3\lib\http\client.py in\u send\u请求（self、method、url、body、headers、encode\u chunked）
1248跳过['skip_accept_encoding']=1
1249
->1250 self.putrequest（方法、url、**跳过）
1251
1252#如果使用HTTP/1.1并且
putrequest中的C:\Anaconda3\lib\http\client.py（self、方法、url、跳过\u主机、跳过\u接受\u编码）
1115
1116#非ASCII字符应该更早地被删除
->1117自身输出（请求编码（'ascii'））
1118
1119如果self.\u http\u vsn==11：
UnicodeEncodeError:“ascii”编解码器无法对位置38-40中的字符进行编码：序号不在范围内（128）

http\client.py

正在尝试

ascii

对

gameurl

字符串进行编码，但无法进行编码，因为它包含的字符不在

ascii

字符集中

您需要使用

urllib.parse.quote（）

函数删除url，不包括方案（

https://

）。对于循环，您只需更改此

中的第一行：
    for url in url_list:
        link = base_url + urllib.parse.quote(url) # just doing the end is fine in this case
        url_data = extract(link)
        csv_write(url_data)

或者，您可以使用流行的模块，它可以无缝地为您解决这个问题（我强烈推荐！）。
发生此错误时url变量的值是什么？我认为它是“1”，因为url.csv只是一个从1到19的数字列表。结果是python不喜欢这个部分base_url=“，”在我将基本url更改为更简单的形式后，它起了作用？？