Python-无法从dict解析对象
我有两个版本的Python脚本。第一个版本运行良好,但看起来很难看。从我的观点来看,第二种方法应该有效,但实际上不行。在第二个版本中,我总是遇到以下错误: new_object=Media.objects.create(filename=i.get('filename'), AttributeError:“str”对象没有属性“get” 我也试过I.get['filename'],但运气不好,我做错了什么?! 我不明白的一点是,“在clean_result['streams']”中的s可以很好地工作,但是“在clean_result['format']”中的I不可以,但不是相同的,一个我可以拔出键的dict?!我可能有一个列表和一个dict混合在一起吗 第一个工作版本:Python-无法从dict解析对象,python,json,model,Python,Json,Model,我有两个版本的Python脚本。第一个版本运行良好,但看起来很难看。从我的观点来看,第二种方法应该有效,但实际上不行。在第二个版本中,我总是遇到以下错误: new_object=Media.objects.create(filename=i.get('filename'), AttributeError:“str”对象没有属性“get” 我也试过I.get['filename'],但运气不好,我做错了什么?! 我不明白的一点是,“在clean_result['streams']”中的s可以很好地
def import_media(arg):
"""Takes a single media file as argument from disk or http/https (descriptor="/path/to/file.mp4/m4a/mp3...")
and scrapes it for metadata. A utf8 encoded JSON object will be returned"""
def get_plist_names(name_dict):
return [o["string"] for o in (name_dict if isinstance(name_dict, list) else [name_dict])]
def path_leaf(path):
filename = path.split("?")[0]
return filename
descriptor = str("path_to_my_media_file")
scrape_cmd = join(['ffprobe', '-print_format', 'json',
'-show_streams', '-show_format', '-loglevel',
'quiet', '-hide_banner', descriptor])
scrapped_assets = subprocess.getoutput(scrape_cmd)
clean_result = json.loads(scrapped_assets)
metadata = json.dumps(clean_result, indent=4, ensure_ascii=False, sort_keys=True).encode('utf8').strip().decode()
print(metadata)
filename_value = None
bitrate_value = None
duration_value = None
size_value = None
nb_streams_value = None
media_type_value = None
title_value = None
artist_value = None
release_date_value = None
genre_value = None
synopsis_value = None
disc_value = None
total_discs_value = None
track_value = None
total_tracks_value = None
copyright_value = None
quality_lvl_value = None
album_value = None
show_value = None
season_number_value = None
episode_id_value = None
episode_sort_value = None
rating_unit_value = None
rating_value = None
cast_value = None
directors_value = None
producers_value = None
screenwriters_value = None
studio_value = None
# Parse Metadata
if 'filename' in clean_result['format']:
filename_value = path_leaf(str(clean_result['format']['filename']))
if 'bit_rate' in clean_result['format']:
bitrate_value = int(clean_result['format']['bit_rate'])
if 'duration' in clean_result['format']:
duration_value = clean_result['format']['duration']
if 'size' in clean_result['format']:
size_value = int(clean_result['format']['size'])
if 'nb_streams' in clean_result['format']:
nb_streams_value = int(clean_result['format']['nb_streams'])
if 'media_type' in clean_result['format']['tags']:
media_type_value = int(clean_result['format']['tags']['media_type'])
if 'title' in clean_result['format']['tags']:
title_value = clean_result['format']['tags']['title']
if 'artist' in clean_result['format']['tags']:
artist_value = clean_result['format']['tags']['artist']
if 'date' in clean_result['format']['tags']:
release_date_value = str(clean_result['format']['tags']['date'][:10])
if 'genre' in clean_result['format']['tags']:
genre_value = str(clean_result['format']['tags']['genre'])
if 'synopsis' in clean_result['format']['tags']:
synopsis_value = str(clean_result['format']['tags']['synopsis'])
if 'disc' in clean_result['format']['tags']:
try:
disc_value = int(clean_result['format']['tags']['disc'].rsplit('/', 1)[0])
total_discs_value = int(clean_result['format']['tags']['disc'].rsplit('/', 1)[1])
except Exception as e:
pass
if 'track' in clean_result['format']['tags']:
try:
track_value = int(clean_result['format']['tags']['track'].rsplit('/', 1)[0])
total_tracks_value = int(clean_result['format']['tags']['track'].rsplit('/', 1)[1])
except Exception as e:
pass
if 'copyright' in clean_result['format']['tags']:
copyright_value = str(clean_result['format']['tags']['copyright'])
if 'hd_video' in clean_result['format']['tags']:
quality_lvl_value = int(clean_result['format']['tags']['hd_video'])
# TV-Show Specifics Metadata
if 'album' in clean_result['format']['tags']:
album_value = str(clean_result['format']['tags']['album'])
if 'season_number' in clean_result['format']['tags']:
season_number_value = int(clean_result['format']['tags']['season_number'])
if 'show' in clean_result['format']['tags']:
show_value = str(clean_result['format']['tags']['show'])
if 'episode_id' in clean_result['format']['tags']:
episode_id_value = str(clean_result['format']['tags']['episode_id'])
if 'episode_sort' in clean_result['format']['tags']:
episode_sort_value = int(clean_result['format']['tags']['episode_sort'])
# Packet Metadata
if 'iTunEXTC' in clean_result['format']['tags']:
rating_unit_value = str(clean_result['format']['tags']['iTunEXTC'].rsplit('|', 3)[0])
rating_value = str(clean_result['format']['tags']['iTunEXTC'].rsplit('|', 3)[1])
if 'iTunMOVI' in clean_result['format']['tags']:
xml_parse = xmltodict.parse(clean_result['format']['tags']['iTunMOVI']) # Parse given XML data to dict.
dump_json = json.dumps(xml_parse) # dump given dict as JSON
plist_metadata = json.loads(dump_json)
dict = plist_metadata['plist']['dict']
zipped = zip(dict["key"], dict["array"])
result = {"cast": [], "directors": [], "screenwriters": [], "studio": [], "producers": [], } | {
k: get_plist_names(v["dict"]) for k, v in zipped}
cast_value = str(', '.join(result["cast"]))
directors_value = str(', '.join(result["directors"]))
producers_value = str(', '.join(result["producers"]))
screenwriters_value = str(', '.join(result["screenwriters"]))
if 'studio' in plist_metadata['plist']['dict']['key']:
studio_value = plist_metadata['plist']['dict']['string']
if not Media.objects.filter(filename=filename_value).exists():
new_object = Media.objects.create(filename=filename_value, bitrate=bitrate_value, duration=duration_value,
size=size_value, nb_streams=nb_streams_value, media_type=media_type_value,
title=title_value, artist=artist_value, release_date=release_date_value,
genre=genre_value, synopsis=synopsis_value, disc=disc_value,
total_discs=total_discs_value, track=track_value, total_tracks=total_tracks_value,
copyright=copyright_value, quality_lvl=quality_lvl_value, album=album_value,
season_number=season_number_value, show=show_value, episode_id=episode_id_value,
episode_sort=episode_sort_value, rating_unit=rating_unit_value,
rating=rating_value,
cast=cast_value, directors=directors_value, producers=producers_value,
screenwriters=screenwriters_value, studio=studio_value
)
new_object.save()
for s in clean_result['streams']:
stream = MediaStreams.objects.create(
index=s.get('index'),
stream_bitrate=s.get('bit_rate'),
codec_name=s.get('codec_name'),
codec_type=s.get('codec_type'),
width=s.get('width'),
height=s.get('height'),
channel_layout=s.get('channel_layout'),
language=s.get("tags", {'x': None}).get("language"),
media=new_object
)
stream.save()
第二个版本,不工作:
def import_media(arg):
"""Takes a single media file as argument from disk or http/https (descriptor="/path/to/file.mp4")
and scrapes it for metadata. A utf8 encoded JSON object will be returned"""
def get_plist_names(name_dict):
return [o["string"] for o in (name_dict if isinstance(name_dict, list) else [name_dict])]
def path_leaf(path):
filename = path.split("?")[0]
return filename
descriptor = str("path_to_my_media_file")
scrape_cmd = join(['ffprobe', '-print_format', 'json',
'-show_streams', '-show_format', '-loglevel',
'quiet', '-hide_banner', descriptor])
scrapped_assets = subprocess.getoutput(scrape_cmd)
clean_result = json.loads(scrapped_assets)
metadata = json.dumps(clean_result, indent=4, ensure_ascii=False, sort_keys=True).encode('utf8').strip().decode()
print(metadata)
# General Meta
filename_value = path_leaf(str(clean_result['format']['filename']))
cast_value = None
directors_value = None
producers_value = None
screenwriters_value = None
studio_value = None
# Parse Metadata
if 'iTunMOVI' in clean_result['format']['tags']:
xml_parse = xmltodict.parse(clean_result['format']['tags']['iTunMOVI']) # Parse given XML data to dict.
dump_json = json.dumps(xml_parse) # dump given dict as JSON
plist_metadata = json.loads(dump_json)
dict = plist_metadata['plist']['dict']
zipped = zip(dict["key"], dict["array"])
result = {"cast": [], "directors": [], "screenwriters": [], "studio": [], "producers": [], } | {k: get_plist_names(v["dict"]) for k, v in zipped}
cast_value = str(', '.join(result["cast"]))
directors_value = str(', '.join(result["directors"]))
producers_value = str(', '.join(result["producers"]))
screenwriters_value = str(', '.join(result["screenwriters"]))
if 'studio' in plist_metadata['plist']['dict']['key']:
studio_value = plist_metadata['plist']['dict']['string']
if not Media.objects.filter(filename=filename_value).exists():
for i in clean_result['format']:
new_object = Media.objects.create(filename=i.get('filename'),
bitrate=i.get('bit_rate'),
duration=i.get('duration'),
size=i.get('size'),
nb_streams=i.get('nb_streams'),
media_type=i.get("tags", {'x': None}).get(int(("media_type"))),
title=i.get("tags", {'x': None}).get(str(("title"))),
artist=i.get("tags", {'x': None}).get(str(("artist"))),
release_date=i.get("tags", {'x': None}).get(str(("release_date")[:10])),
genre=i.get("tags", {'x': None}).get(str(("genre"))),
synopsis=i.get("tags", {'x': None}).get(str(("synopsis"))),
disc=i.get("tags", {'x': None}).get(int(("disc").rsplit('/', 1)[0])),
total_discs=i.get("tags", {'x': None}).get(int(("disc").rsplit('/', 1)[1])),
track=i.get("tags", {'x': None}).get(int(("track").rsplit('/', 1)[0])),
total_tracks=i.get("tags", {'x': None}).get(int(("total_tracks").rsplit('/', 1)[1])),
copyright=i.get("tags", {'x': None}).get(str(("copyright"))),
quality_lvl=i.get("tags", {'x': None}).get(int(("hd_video"))),
album=i.get("tags", {'x': None}).get(str(("album"))),
season_number=i.get("tags", {'x': None}).get(int(("season_number"))),
show=i.get("tags", {'x': None}).get(str(("show"))),
episode_id=i.get("tags", {'x': None}).get(int(("episode_id"))),
episode_sort=i.get("tags", {'x': None}).get(int(("episode_sort"))),
rating_unit=i.get("tags", {'x': None}).get(str(("iTunEXTC").rsplit('|', 3)[0])),
rating=i.get("tags", {'x': None}).get(str(("iTunEXTC").rsplit('|', 3)[1])),
cast=i.get(str(cast_value)),
directors=i.get(str(directors_value)),
producers=i.get(str(producers_value)),
screenwriters=i.get(str(screenwriters_value)),
studio=i.get(str(studio_value))
)
new_object.save()
for s in clean_result['streams']:
stream = MediaStreams.objects.create(
index=s.get('index'),
stream_bitrate=s.get('bit_rate'),
codec_name=s.get('codec_name'),
codec_type=s.get('codec_type'),
width=s.get('width'),
height=s.get('height'),
channel_layout=s.get('channel_layout'),
language=s.get("tags", {'x': None}).get("language"),
media=new_object
)
stream.save()
您的问题太多了。如果您将问题缩减到重现错误所需的最小行数,这将很有帮助。如果您包含
clean\u result
的内容,这也会很有帮助
您的错误告诉您,i
是一个字符串。因此,它没有方法get
。在我看来,在您的第一个版本中,clean_result['format']中没有与i对应的,因此您应该在第二个版本中查找错误
在下面的几行中,您试图从i
获取字段(filename=i.get('filename')
等等),但是如果clean_result['format']
是一个dict(似乎是您代码中的一个),则i
可能是dict的键。查看在
中使用迭代dict时发生的情况
您在第一个版本中没有遇到此错误,因为您正在逐个访问字段,例如
if 'show' in clean_result['format']['tags']:
show_value = str(clean_result['format']['tags']['show'])
clean_result['format']的数据类型是什么?我猜是dict还是list。里面是int、decimal和stringsEach“I”的混合体,从clean_result[“format”]中提取必须是一个dict,因为.get函数正在应用于此。好吧,问题是为什么这不起作用?我想使用get提取字符串应该不是问题,但是str由于某种原因不支持此位置的get方法。如果我们讨论数据类型I,那么代码中的for s和for I之间的区别在哪里t仅仅可以是输入格式,对吗?使用for s循环可以提取clean_结果['streams'],而使用i循环可以提取clean_结果['format']。从streams和format中提取的数据可以是不同的数据类型,因此您面临的不匹配