Python 使用列表指定要使用的子词典
不知道我想怎么做,但基本上我有一个项目清单Python 使用列表指定要使用的子词典,python,Python,不知道我想怎么做,但基本上我有一个项目清单 section = ['messages','ProcQueueLen'] 或 …取决于我们所在的部门 以及一些属于procqueuelen部分的数据点 我想创建一个动态字典,以便将数据点(作为字典)添加到正确的字典条目中。例如: <setup> logfile = cdm.log loglevel = 0 cpu_usage_includes_wait = yes internal_alarm_message =
section = ['messages','ProcQueueLen']
或
…取决于我们所在的部门
以及一些属于procqueuelen部分的数据点
我想创建一个动态字典,以便将数据点(作为字典)添加到正确的字典条目中。例如:
<setup>
logfile = cdm.log
loglevel = 0
cpu_usage_includes_wait = yes
internal_alarm_message = InternalAlarm
mem_buffer_used = no
alarm_on_each_sample = no
qos_source_short = yes
trendsubject = cdm
trendpriority = information
paging_in_kilobytes = yes
post_install = 1382462705
allow_qos_source_as_target = no
monitor_iostat = yes
allow_remote_disk_info = yes
</setup>
<messages>
<ProcQueueLen>
text = Average ($value_number samples)
processor queue length is $value$unit, which is >= $value_limit$unit. Last value is $value_last$unit.
level = minor
token = proc_q_len
</ProcQueueLen>
<CpuError>
text = Average ($value_number samples) total cpu is now $value$unit, which is above the error threshold ($value_limit$unit)
level = major
token = cpu_error
i18n_token = as#system.cdm.avrg_total_cpu_above_err_threshold
</CpuError>
</messages>
我正在逐行阅读一个包含这些不同部分的文件,并根据需要通过追加和弹出部分来设置条目进入的部分。但是我不知道如何根据这个节列表指定嵌套字典
这是无效的xml,因为它没有正确的节,并且包含无效字符。我试过beautifulsoup,但速度很慢。通过将数据放入嵌套字典中,我可以更快更容易地导航
目前我仅有的代码如下:
conf = {'messages':{'ProcQueueLen':{'text':'Average ($value_number samples) processor queue length is $value$unit, which is >= $value_limit$unit. Last value is $value_last$unit.','level':'minor','token':'proc_q_len'},'CpuError':{'text':'Average ($value_number samples) total cpu is now $value$unit, which is above the error threshold ($value_limit$unit)','level':'major','token':'cpu_error','i18n_token':'as#system.cdm.avrg_total_cpu_above_err_threshold'}}}
conf = {}
section = []
for i, line in enumerate(out.split('\\n')):
l = line.strip()
if i < 20:
print(l)
if l.startswith('</'):
print('skipping')
elif l.startswith('<'):
conf[l] = {}
section.append(l)
print('create dbentry')
else:
conf[section][l.split('=')[0].strip()] = l.split('=')[1].strip()
print('add to dbentry')
conf={}
节=[]
对于i,行内枚举(out.split('\\n')):
l=行。带()
如果i<20:
印刷品(l)
如果l.startswith('假设没有任意换行符,则可以使用递归和BeautifulSoup
:
from bs4 import BeautifulSoup as soup
import re, collections, functools
def parse(d):
_d = collections.defaultdict(dict)
for i in getattr(d, 'contents', []):
if isinstance(i, str) and i != '\n':
_r = [re.split('\s\=\s', c) for c in re.split('\n\s+', i)]
_d[d.name].update(dict([c for c in _r if c[0]]))
else:
_d[d.name].update(parse(i))
return _d
输出:
{
"setup": {
"logfile": "cdm.log",
"loglevel": "0",
"cpu_usage_includes_wait": "yes",
"internal_alarm_message": "InternalAlarm",
"mem_buffer_used": "no",
"alarm_on_each_sample": "no",
"qos_source_short": "yes",
"trendsubject": "cdm",
"trendpriority": "information ",
"paging_in_kilobytes": "yes",
"post_install": "1382462705",
"allow_qos_source_as_target": "no",
"monitor_iostat": "yes",
"allow_remote_disk_info": "yes\n"
},
"messages": {
"procqueuelen": {
"text": "Average ($value_number samples) processor queue length is $value$unit, which is >= $value_limit$unit. Last value is $value_last$unit.",
"level": "minor",
"token": "proc_q_len"
},
"cpuerror": {
"text": "Average ($value_number samples) total cpu is now $value$unit, which is above the error threshold ($value_limit$unit)",
"level": "major",
"token": "cpu_error",
"i18n_token": "as#system.cdm.avrg_total_cpu_above_err_threshold"
}
}
}
可以使用xml.etree
和re
模块解析示例文本,前提是验证了以下假设:
- xml元素可以包含其他xml元素或文本,但不能同时包含两者
- 连续行不缩进
- xml元素中的标记名包含有效字符
代码可以是:
def process_text(t):
def process_elt(elt, dic): # process the XML part
dic[elt.tag] = {}
dic = dic[elt.tag]
children = elt.getchildren()
if len(children) > 0:
for child in children:
process_elt(child, dic)
else:
process_txt(elt.text, dic)
def process_txt(t, dic): # process the textual part
blank = re.compile(r'\s+')
eq = re.compile(r'\s*([^=]*?)\s*=\s*(.*?)\s*$')
old = None
for line in io.StringIO(t):
# continuation line are not indented
if not blank.match(line) and old is not None:
dic[old] += ' ' + line
elif line.strip() != '': # skip empty line
m = eq.match(line)
if m is None:
print('ERROR', line)
old = m.group(1)
dic[old] = m.group(2)
conf = {}
root = ET.fromstring(t)
process_elt(root, conf)
return conf
通过您的精确输入文本,我获得:
{'messages': {'ProcQueueLen': {'text': 'Average ($value_number samples) processor queue length is $value$unit, which is >= $value_limit$unit. Last value is $value_last$unit.\n', 'level': 'minor', 'token': 'proc_q_len'}, 'CpuError': {'text': 'Average ($value_number samples) total cpu is now $value$unit, which is above the error threshold ($value_limit$unit)', 'level': 'major', 'token': 'cpu_error', 'i18n_token': 'as#system.cdm.avrg_total_cpu_above_err_threshold'}}}
如果可以重新定义输入语法,我建议使用normal.ini文件和Python的configparser
我喜欢Ajax和Serge Ballista的答案,但如果您想修改现有代码以使其正常工作,请尝试以下方法:
import pprint
conf = {}
section = []
for i, line in enumerate(out.split('\n')):
l = line.strip()
if i < 20:
l = l.strip("\n")
if not l:
# skip if end of file
continue
if l.startswith('</'):
# we need to remove this from the list of current sections
section.pop()
print('skipping')
elif l.startswith('<'):
sec_name = l.strip("<>") # what you wanted was conf["messages"], not conf["<messages>"]
secstr = "".join(f"['{x}']" for x in section) # create a string that looks something like ['messages']['ProcQueueLen']
correct = eval(f"conf{secstr}") # use the string to evaluate to an actual section in your conf dict
correct[sec_name] = {} # set the new section to an empty dictionary
section.append(sec_name) # add the new section to the dictionary route
print(f"create dbentry: {secstr}['{sec_name}']")
else:
secstr = "".join(f"['{x}']" for x in section)
correct = eval(f"conf{secstr}")
# you have = in the middle of config values, which means that you can't split on '=', but you can split on ' = ' if your format is consistent.
correct[l.split(' = ')[0].strip()] = l.split(' = ')[1].strip()
print(f"add to dbentry: {correct[l.split(' = ')[0].strip()]}")
pprint.pprint(conf)
你能给我们展示一下你的代码吗?如果你可以依赖文本的缩进,那么就用它来解决嵌套问题。问题是如何序列化dict?你可以编写有效的json或xml。然后你就可以将其作为json或xml来读。这不是[节]的工作方式在这种情况下,需要一个节的列表,我不知道怎么做。你是说CpuError
或proqueuelen
可以有多个条目,比如:conf={'messages':{'proqueuelen':[{'text':'bla','level':'minor','token':'t'},{'text':'bla2','level'minor','token':'t2},{'text':'bla3','level':'minor','token':'t3'}]}
?获取以下错误…”ValueError:dictionary update sequence元素#0的长度为15;行“_d[d.name]”需要2。更新(dict([c代表c in.\r如果c[0]]))。我将用问题文本的更详细部分更新问题。输入非常大,因此很难获取所有重要内容。@user1601716请发布引发该错误的输入。我修改了我认为导致该错误的示例输入。仍在同一步失败。请参阅主节中的更新这对我来说是有效的,输入是非常不可靠的,所以其他解决方案不能正常工作,谢谢!
def process_text(t):
def process_elt(elt, dic): # process the XML part
dic[elt.tag] = {}
dic = dic[elt.tag]
children = elt.getchildren()
if len(children) > 0:
for child in children:
process_elt(child, dic)
else:
process_txt(elt.text, dic)
def process_txt(t, dic): # process the textual part
blank = re.compile(r'\s+')
eq = re.compile(r'\s*([^=]*?)\s*=\s*(.*?)\s*$')
old = None
for line in io.StringIO(t):
# continuation line are not indented
if not blank.match(line) and old is not None:
dic[old] += ' ' + line
elif line.strip() != '': # skip empty line
m = eq.match(line)
if m is None:
print('ERROR', line)
old = m.group(1)
dic[old] = m.group(2)
conf = {}
root = ET.fromstring(t)
process_elt(root, conf)
return conf
{'messages': {'ProcQueueLen': {'text': 'Average ($value_number samples) processor queue length is $value$unit, which is >= $value_limit$unit. Last value is $value_last$unit.\n', 'level': 'minor', 'token': 'proc_q_len'}, 'CpuError': {'text': 'Average ($value_number samples) total cpu is now $value$unit, which is above the error threshold ($value_limit$unit)', 'level': 'major', 'token': 'cpu_error', 'i18n_token': 'as#system.cdm.avrg_total_cpu_above_err_threshold'}}}
import pprint
conf = {}
section = []
for i, line in enumerate(out.split('\n')):
l = line.strip()
if i < 20:
l = l.strip("\n")
if not l:
# skip if end of file
continue
if l.startswith('</'):
# we need to remove this from the list of current sections
section.pop()
print('skipping')
elif l.startswith('<'):
sec_name = l.strip("<>") # what you wanted was conf["messages"], not conf["<messages>"]
secstr = "".join(f"['{x}']" for x in section) # create a string that looks something like ['messages']['ProcQueueLen']
correct = eval(f"conf{secstr}") # use the string to evaluate to an actual section in your conf dict
correct[sec_name] = {} # set the new section to an empty dictionary
section.append(sec_name) # add the new section to the dictionary route
print(f"create dbentry: {secstr}['{sec_name}']")
else:
secstr = "".join(f"['{x}']" for x in section)
correct = eval(f"conf{secstr}")
# you have = in the middle of config values, which means that you can't split on '=', but you can split on ' = ' if your format is consistent.
correct[l.split(' = ')[0].strip()] = l.split(' = ')[1].strip()
print(f"add to dbentry: {correct[l.split(' = ')[0].strip()]}")
pprint.pprint(conf)
{'messages': {'CpuError': {'i18n_token': 'as#system.cdm.avrg_total_cpu_above_err_threshold',
'level': 'major',
'text': 'Average ($value_number samples) total cpu '
'is now $value$unit, which is above the '
'error threshold ($value_limit$unit)',
'token': 'cpu_error'},
'ProcQueueLen': {'level': 'minor',
'text': 'Average ($value_number samples) '
'processor queue length is $value$unit, '
'which is >= $value_limit$unit. Last '
'value is $value_last$unit.',
'token': 'proc_q_len'}}}