Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/json/13.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 无法从json响应中解析StreetAddress_Python_Json_Python 3.x_Web Scraping - Fatal编程技术网

Python 无法从json响应中解析StreetAddress

Python 无法从json响应中解析StreetAddress,python,json,python-3.x,web-scraping,Python,Json,Python 3.x,Web Scraping,我已经编写了一个脚本,从json响应中获取街道地址es,但我无法访问该部分。我觉得这个结构有点复杂 包含街道地址的响应块es: [[44189579,25735941,-80305513,"$640K",1,0,0,0,["$640K",4,3.0,1963,false,null,"6,000 sqft lot","","ForSale","For Sale by Owner",0,{"zpid": 44189579,"streetAddress": "6811 SW 38th St","zi

我已经编写了一个脚本,从json响应中获取
街道地址
es,但我无法访问该部分。我觉得这个结构有点复杂

包含
街道地址的响应块
es:

[[44189579,25735941,-80305513,"$640K",1,0,0,0,["$640K",4,3.0,1963,false,null,"6,000 sqft lot","","ForSale","For Sale by Owner",0,{"zpid": 44189579,"streetAddress": "6811 SW 38th St","zipcode": "33155","city": "Miami","state": "FL","latitude": 25.735941,"longitude": -80.305513,"price": 640000.0,"dateSold": 0,"bathrooms": 3.0,"bedrooms": 4.0,"livingArea": 1963.0,"yearBuilt": -1,"lotSize": 6000.0,"homeType": "SINGLE_FAMILY",
到目前为止,我已经尝试过:

import requests

url = "https://www.zillow.com/search/GetResults.htm?spt=homes&status=100000&lt=111101&ht=100000&pr=,&mp=,&bd=0%2C&ba=0%2C&sf=,&lot=0%2C&yr=,&singlestory=0&hoa=0%2C&pho=0&pets=0&parking=0&laundry=0&income-restricted=0&fr-bldg=0&condo-bldg=0&furnished-apartments=0&cheap-apartments=0&studio-apartments=0&pnd=0&red=0&zso=0&days=any&ds=all&pmf=0&pf=0&sch=100111&zoom=11&rect=-80419407,25712692,-80201741,25759392&p=1&sort=days&search=map&rid=72458&rt=7&listright=true&isMapSearch=1&zoom=11"

res = requests.get(url,headers={"User-Agent":"Mozilla/5.0"})
print(res.json()['map']['properties'])
预期成果:

6811 SW 38th St

以此类推……

您需要的是一个递归遍历返回对象的函数,并在它遇到的所有字典中查找您的键

def traverse(source, target_key: str, storage: list):
    if isinstance(source, dict):
        for k, v in source.items():
            if k == target_key:
                storage.append(v)
            elif isinstance(v, (dict, list)):
                traverse(v, target_key, storage)
    elif isinstance(source, list):
        for item in source:
            if isinstance(item, (dict, list)):
                traverse(item, target_key, storage)

key = "streetAddress"
source = [[44189579, 25735941, -80305513, "$640K", 1, 0, 0, 0,
           ["$640K", 4, 3.0, 1963, False, None, "6,000 sqft lot", "", "ForSale", "For Sale by Owner", 0,
            {"zpid": 44189579, "streetAddress": "6811 SW 38th St", "zipcode": "33155", "city": "Miami", "state": "FL",
             "latitude": 25.735941, "longitude": -80.305513, "price": 640000.0, "dateSold": 0, "bathrooms": 3.0,
             "bedrooms": 4.0, "livingArea": 1963.0, "yearBuilt": -1, "lotSize": 6000.0, "homeType": "SINGLE_FAMILY"}]]]    

storage = []
traverse(source, key, storage)
print(storage)
输出:

['6811 SW 38th St']

您需要的是一个递归遍历返回对象的函数,并在它遇到的所有字典中查找您的键

def traverse(source, target_key: str, storage: list):
    if isinstance(source, dict):
        for k, v in source.items():
            if k == target_key:
                storage.append(v)
            elif isinstance(v, (dict, list)):
                traverse(v, target_key, storage)
    elif isinstance(source, list):
        for item in source:
            if isinstance(item, (dict, list)):
                traverse(item, target_key, storage)

key = "streetAddress"
source = [[44189579, 25735941, -80305513, "$640K", 1, 0, 0, 0,
           ["$640K", 4, 3.0, 1963, False, None, "6,000 sqft lot", "", "ForSale", "For Sale by Owner", 0,
            {"zpid": 44189579, "streetAddress": "6811 SW 38th St", "zipcode": "33155", "city": "Miami", "state": "FL",
             "latitude": 25.735941, "longitude": -80.305513, "price": 640000.0, "dateSold": 0, "bathrooms": 3.0,
             "bedrooms": 4.0, "livingArea": 1963.0, "yearBuilt": -1, "lotSize": 6000.0, "homeType": "SINGLE_FAMILY"}]]]    

storage = []
traverse(source, key, storage)
print(storage)
输出:

['6811 SW 38th St']

@ebro42是正确的,获取数据的最佳方法是递归遍历json数据对象。我认为他的建议可以通过不依赖已传递的回调容器,而是使其成为您迭代的生成器来改进

from typing import Iterable

def get_by_key(key: str, collection: Iterable):
    if isinstance(collection, dict):
        for k, v in collection.items():
            if k == key:
                yield v
            elif isinstance(v, Iterable) and not isinstance(v, str):
                yield from get_by_key(key, v)
    elif isinstance(collection, Iterable) and not isinstance(collection, str):
        for i in collection:
            yield from get_by_key(key, i)

for address in get_by_key('streetAddress', res.json()):
    print(address)

@ebro42是正确的,获取数据的最佳方法是递归遍历json数据对象。我认为他的建议可以通过不依赖已传递的回调容器,而是使其成为您迭代的生成器来改进

from typing import Iterable

def get_by_key(key: str, collection: Iterable):
    if isinstance(collection, dict):
        for k, v in collection.items():
            if k == key:
                yield v
            elif isinstance(v, Iterable) and not isinstance(v, str):
                yield from get_by_key(key, v)
    elif isinstance(collection, Iterable) and not isinstance(collection, str):
        for i in collection:
            yield from get_by_key(key, i)

for address in get_by_key('streetAddress', res.json()):
    print(address)
这就行了

import requests
from jsonpath_ng.ext import parse
url = "https://www.zillow.com/search/GetResults.htm?spt=homes&status=100000&lt=111101&ht=100000&pr=,&mp=,&bd=0%2C&ba=0%2C&sf=,&lot=0%2C&yr=,&singlestory=0&hoa=0%2C&pho=0&pets=0&parking=0&laundry=0&income-restricted=0&fr-bldg=0&condo-bldg=0&furnished-apartments=0&cheap-apartments=0&studio-apartments=0&pnd=0&red=0&zso=0&days=any&ds=all&pmf=0&pf=0&sch=100111&zoom=11&rect=-80419407,25712692,-80201741,25759392&p=1&sort=days&search=map&rid=72458&rt=7&listright=true&isMapSearch=1&zoom=11"

res = requests.get(url,headers={"User-Agent":"Mozilla/5.0"})
properties = res.json()['map']['properties']

for p in properties:
    found = parse("$..streetAddress").find(p)
    print(found[0].value)
这就行了

import requests
from jsonpath_ng.ext import parse
url = "https://www.zillow.com/search/GetResults.htm?spt=homes&status=100000&lt=111101&ht=100000&pr=,&mp=,&bd=0%2C&ba=0%2C&sf=,&lot=0%2C&yr=,&singlestory=0&hoa=0%2C&pho=0&pets=0&parking=0&laundry=0&income-restricted=0&fr-bldg=0&condo-bldg=0&furnished-apartments=0&cheap-apartments=0&studio-apartments=0&pnd=0&red=0&zso=0&days=any&ds=all&pmf=0&pf=0&sch=100111&zoom=11&rect=-80419407,25712692,-80201741,25759392&p=1&sort=days&search=map&rid=72458&rt=7&listright=true&isMapSearch=1&zoom=11"

res = requests.get(url,headers={"User-Agent":"Mozilla/5.0"})
properties = res.json()['map']['properties']

for p in properties:
    found = parse("$..streetAddress").find(p)
    print(found[0].value)
可能的重复可能的重复