Python-基于API限制的错误网关错误
我正在尝试从CKAN API URL检索数据:Python-基于API限制的错误网关错误,python,api,urllib,ckan,Python,Api,Urllib,Ckan,我正在尝试从CKAN API URL检索数据: import urllib.request import json import pandas as pd url = 'https://data.gov.il/api/3/action/datastore_search?resource_id=dcf999c1-d394-4b57-a5e0-9d014a62e046&limit=1000000' with urllib.request.urlopen(url) as response:
import urllib.request
import json
import pandas as pd
url = 'https://data.gov.il/api/3/action/datastore_search?resource_id=dcf999c1-d394-4b57-a5e0-9d014a62e046&limit=1000000'
with urllib.request.urlopen(url) as response:
html = response.read()
result = json.loads(html)
df = pd.DataFrame(result['result']['records'])
但是得到以下错误:
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
<ipython-input-44-8484123eecdc> in <module>
2 import pandas as pd
3 url = 'https://data.gov.il/api/3/action/datastore_search?resource_id=dcf999c1-d394-4b57-a5e0-9d014a62e046&limit=1000000'
----> 4 with urllib.request.urlopen(url) as response:
5 html = response.read()
6 result = json.loads(html)
~\miniconda3\lib\urllib\request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
220 else:
221 opener = _opener
--> 222 return opener.open(url, data, timeout)
223
224 def install_opener(opener):
~\miniconda3\lib\urllib\request.py in open(self, fullurl, data, timeout)
529 for processor in self.process_response.get(protocol, []):
530 meth = getattr(processor, meth_name)
--> 531 response = meth(req, response)
532
533 return response
~\miniconda3\lib\urllib\request.py in http_response(self, request, response)
638 # request was successfully received, understood, and accepted.
639 if not (200 <= code < 300):
--> 640 response = self.parent.error(
641 'http', request, response, code, msg, hdrs)
642
~\miniconda3\lib\urllib\request.py in error(self, proto, *args)
567 if http_err:
568 args = (dict, 'default', 'http_error_default') + orig_args
--> 569 return self._call_chain(*args)
570
571 # XXX probably also want an abstract factory that knows when it makes
~\miniconda3\lib\urllib\request.py in _call_chain(self, chain, kind, meth_name, *args)
500 for handler in handlers:
501 func = getattr(handler, meth_name)
--> 502 result = func(*args)
503 if result is not None:
504 return result
~\miniconda3\lib\urllib\request.py in http_error_default(self, req, fp, code, msg, hdrs)
647 class HTTPDefaultErrorHandler(BaseHandler):
648 def http_error_default(self, req, fp, code, msg, hdrs):
--> 649 raise HTTPError(req.full_url, code, msg, hdrs, fp)
650
651 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 502: Bad Gateway
一切正常。如果我不使用任何限制,它只检索前100条记录。谁能解释一下为什么会这样?这是服务器端限制吗?我怎样才能绕过这个问题,这样我就可以得到整个数据集,而不管包含多少记录(有频繁的更新添加更多记录)?
另外,这是从CKAN API获取数据的正确方法吗?如果没有,我很高兴看到应该如何做到这一点。CKAN api有一些限制,如果您需要查询超过100条记录,您需要设置偏移量并根据需要多次查询,如分页。如果有限制,api文档应该调用限制。我相当肯定这是有限度的。
url = 'https://...&limit=10000'