Python Geopy错误:GeocoderServiceError:HTTP错误500:使用带str concat的pandas apply函数时出现内部服务器错误

Python Geopy错误:GeocoderServiceError:HTTP错误500:使用带str concat的pandas apply函数时出现内部服务器错误,python,pandas,geopy,Python,Pandas,Geopy,工作功能(请参阅代码)已停止工作。唯一的区别是我给它传递了一个字符串连接 # Get geocode, return LAT and LON def locate(x): geolocator = Nominatim() print("'" + x + "'") location = geolocator.geocode(x) # Get geocode print(location) lat = location.latitude lon =

工作功能(请参阅代码)已停止工作。唯一的区别是我给它传递了一个字符串连接

# Get geocode, return LAT and LON
def locate(x):
    geolocator = Nominatim()
    print("'" + x + "'")
    location = geolocator.geocode(x)  # Get geocode
    print(location)
    lat = location.latitude
    lon = location.longitude
    try:
        #Get geocode
        location = geolocator.geocode(x, timeout=8, exactly_one=True)
        lat = location.latitude
        lon = location.longitude
    except:
        #didn't work for some reason that I really don't care about
        lat = np.nan
        lon = np.nan
        print(lat,lon)
    return pd.Series([lat,  lon])
这很有效

In[4] locate('MOSCOW   123098 RUSSIA')
'MOSCOW   123098 RUSSIA'
Москва, Центральный административный округ, Москва, ЦФО, Россия
Out[4]:
0    55.751633
1    37.618704
dtype: float64
但这并不是:

df_addr[['LAT','LON']] =  df_addr['COUNTRY'].apply(locate(df_addr['CITY'] + ' ' + \
                                                          df_addr['PROVINCE'] + ' ' + \
                                                          df_addr['STATE'] + ' ' + \
                                                          df_addr['ZIP_CODE'] + ' ' + \
                                                          df_addr['COUNTRY'])) # Geocode it!
我看到函数回显正确的输入字符串:

0                 'INNSBRUCK    AUSTRIA'
1           'BERN   CH-3001 SWITZERLAND'
2                 'INNSBRUCK    AUSTRIA'
3               'MOSCOW   123098 RUSSIA'
4               'MOSCOW   123098 RUSSIA'
5              'FREDERICK  MD 21702 USA'
删除try/except后,我得到以下异常信息

.
.
99    'GLASGOW LANARK  G20 9NB SCOTLAND'
dtype: object
---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
    131         try:
--> 132             page = requester(url, timeout=(timeout or self.timeout), **kwargs)
    133         except Exception as error: # pylint: disable=W0703

C:\Users\gn\Anaconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault)
    152         opener = _opener
--> 153     return opener.open(url, data, timeout)
    154 

C:\Users\gn\Anaconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
    460             meth = getattr(processor, meth_name)
--> 461             response = meth(req, response)
    462 

C:\Users\gn\Anaconda3\lib\urllib\request.py in http_response(self, request, response)
    570             response = self.parent.error(
--> 571                 'http', request, response, code, msg, hdrs)
    572 

C:\Users\gn\Anaconda3\lib\urllib\request.py in error(self, proto, *args)
    498             args = (dict, 'default', 'http_error_default') + orig_args
--> 499             return self._call_chain(*args)
    500 

C:\Users\gn\Anaconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
    432             func = getattr(handler, meth_name)
--> 433             result = func(*args)
    434             if result is not None:

C:\Users\gn\Anaconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
    578     def http_error_default(self, req, fp, code, msg, hdrs):
--> 579         raise HTTPError(req.full_url, code, msg, hdrs, fp)
    580 

HTTPError: HTTP Error 500: Internal Server Error

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
    146                 try:
--> 147                     raise ERROR_CODE_MAP[code](message)
    148                 except KeyError:

KeyError: 500

During handling of the above exception, another exception occurred:

GeocoderServiceError                      Traceback (most recent call last)
<ipython-input-6-7412c2e27dd8> in <module>()
----> 1 df_addr[['LAT','LON']] =  df_addr['COUNTRY'].apply(locate(df_addr['CITY'] + ' ' +                                                           df_addr['PROVINCE'] + ' ' +                                                           df_addr['STATE'] + ' ' +                                                           df_addr['ZIP_CODE'] + ' ' +                                                           df_addr['COUNTRY'])) # Geocode it!
      2 df_addr.head()

<ipython-input-3-d957ac2e2e2e> in locate(x)
      3     geolocator = Nominatim()
      4     print("'" + x + "'")
----> 5     location = geolocator.geocode(x,timeout=20)  # Get geocode
      6     print(location)
      7     lat = location.latitude

C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\osm.py in geocode(self, query, exactly_one, timeout, addressdetails, language, geometry)
    190         logger.debug("%s.geocode: %s", self.__class__.__name__, url)
    191         return self._parse_json(
--> 192             self._call_geocoder(url, timeout=timeout), exactly_one
    193         )
    194 

C:\Users\gn\Anaconda3\lib\site-packages\geopy\geocoders\base.py in _call_geocoder(self, url, timeout, raw, requester, deserializer, **kwargs)
    147                     raise ERROR_CODE_MAP[code](message)
    148                 except KeyError:
--> 149                     raise GeocoderServiceError(message)
    150             elif isinstance(error, URLError):
    151                 if "timed out" in message:

GeocoderServiceError: HTTP Error 500: Internal Server Error
。
.
99'格拉斯哥拉纳克G20 9NB苏格兰'
数据类型:对象
---------------------------------------------------------------------------
HTTPError回溯(最近一次调用上次)
C:\Users\gn\Anaconda3\lib\site packages\geopy\geocoders\base.py in\u call\u geocoder(self、url、timeout、raw、requester、deserializer、**kwargs)
131尝试:
-->132页=请求者(url,超时=(超时或self.timeout),**kwargs)
133例外情况除外,因为错误:#pylint:disable=W0703
urlopen中的C:\Users\gn\Anaconda3\lib\urllib\request.py(url、数据、超时、cafile、capath、cadefault)
152开瓶器=_开瓶器
-->153返回opener.open(url、数据、超时)
154
C:\Users\gn\Anaconda3\lib\urllib\request.py处于打开状态(self、fullurl、data、timeout)
460 meth=getattr(处理器,meth\u名称)
-->461响应=方法(请求,响应)
462
http\U响应中的C:\Users\gn\Anaconda3\lib\urllib\request.py(self、request、response)
570响应=self.parent.error(
-->571“http”、请求、响应、代码、消息、hdrs)
572
C:\Users\gn\Anaconda3\lib\urllib\request.py出错(self、proto、*args)
498 args=(dict'default',http\u error\u default')+orig\u args
-->499返回自我调用链(*args)
500
C:\Users\gn\Anaconda3\lib\urllib\request.py在调用链中(self、chain、kind、meth\u name、*args)
432 func=getattr(处理程序,方法名称)
-->433结果=函数(*args)
434如果结果不是无:
C:\Users\gn\Anaconda3\lib\urllib\request.py在http\u error\u默认值中(self、req、fp、code、msg、hdrs)
578 def http_错误_默认值(self、req、fp、code、msg、hdrs):
-->579 raise HTTPError(请求完整的url、代码、消息、hdrs、fp)
580
HTTPError:HTTP错误500:内部服务器错误
在处理上述异常期间,发生了另一个异常:
KeyError回溯(最近一次呼叫最后一次)
C:\Users\gn\Anaconda3\lib\site packages\geopy\geocoders\base.py in\u call\u geocoder(self、url、timeout、raw、requester、deserializer、**kwargs)
146尝试:
-->147升起错误代码映射[代码](消息)
148除按键错误外:
关键字错误:500
在处理上述异常期间,发生了另一个异常:
GeocoderServiceError回溯(最近一次调用)
在()
---->1地址['LAT','LON']]=df地址['COUNTRY'].申请(地址为(城市]+''+df地址['CITY']+''+df地址['PROVINCE']+''+df地址['STATE']+''+df地址['ZIP\u CODE']+''+df_addr['COUNTRY'])#对其进行地理编码!
2 df_地址头()
在(x)中
3地理定位器=提名m()
4打印(““+x+”))
---->5位置=地理定位器。地理编码(x,超时=20)#获取地理编码
6打印(位置)
7纬度=位置。纬度
地理编码中的C:\Users\gn\Anaconda3\lib\site packages\geopy\geocoders\osm.py(self、query、execute、timeout、addressdetails、language、geometry)
190 logger.debug(“%s.geocode:%s”,self.\uuuuuuuu class.\uuuuuuuuu name\uuuuuuu,url)
191返回self._parse_json(
-->192 self.\u调用地理编码器(url,timeout=timeout),正好是一个
193         )
194
C:\Users\gn\Anaconda3\lib\site packages\geopy\geocoders\base.py in\u call\u geocoder(self、url、timeout、raw、requester、deserializer、**kwargs)
147升起错误代码映射[代码](消息)
148除按键错误外:
-->149 raise GeocoderService错误(消息)
150 elif isinstance(错误,URLError):
151如果消息中出现“超时”:
GeocoderServiceError:HTTP错误500:内部服务器错误
我在我头上。更新了所有库,但问题没有改变


提前感谢

老实说,你所做的有点反常,你在一个系列上调用了
apply
,然后试图从很多列中构造一个str,这是错误的方法,您可以在df上调用apply并传递
axis=1
,这样就可以传递行,访问lambda func中的每一列并将其传递给
locate
,或者在
locate
中提取每一列值,或者只需从所有列的串联中创建一个序列并在此基础上调用apply:

df_addr[['LAT','LON']] = (df_addr['CITY'] + ' ' + df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' + df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY']).apply(locate)

我相信上述方法应该有效。

因此,基于Ed Chum的见解,我编写了以下有效的令人困惑的代码:

#Create a summary address field in a new geo dataframe
df_geo = pd.DataFrame(columns = ['BIG_ADDR', 'LAT', 'LON'])
df_geo['BIG_ADDR'] =  df = df_addr['CITY'] + ' ' +  df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' +  \
                       df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY'] 
# Eliminate dups
df_geo = df_geo['BIG_ADDR'].drop_duplicates().reset_index()

# Geocode ALL THINGS in GEO frame!
df_geo[['LAT','LON']] = df_geo['BIG_ADDR'].apply(locate)

# Create the same index in the address dataframe
df_addr['BIG_ADDR'] =  df = df_addr['CITY'] + ' ' +  df_addr['PROVINCE'] + ' ' + df_addr['STATE'] + ' ' +  \
                       df_addr['ZIP_CODE'] + ' ' + df_addr['COUNTRY'] 

# Combine the address and geo frames 
    df_addr = pd.merge(df_addr, df_geo, on=['BIG_ADDR'], how='left') 
    df_addr.rename(columns={'LAT_y': 'LAT', 'LON_y': 'LON'}, inplace=True)           #cleanup
df_addr.rename(columns={'LAT_y': 'LAT', 'LON_y': 'LON'}, inplace=True)
del df_geo['index']

我不明白这是怎么回事,您正在传递序列并试图将它们连接为arg,而不是字符串。您必须显式地将每个序列作为参数传递,或者传递行并在函数中构造str。问题是,由于缺少数据,您的空格太多吗?您的打印输出似乎表明了这一点,如果您通过字符串“格拉斯哥LANARK G20 9NB SCOTLAND”,它是否仍然会失败?Ed,我明白您的意思-我通过了一个系列-但不确定如何修复它。除了应用和迭代表,或者传递5个参数然后迭代表。我以为apply()为我做到了这一点——每行调用一次函数。调试代码似乎表明了这一点,因为它说x是str类型,而不是series。六羟甲基三聚氰胺六甲醚。。。我相信你,我只是想绕着它转一圈