Python 2.7 加载yaml文件时编码错误_Python 2.7_Ruamel.yaml

Python 2.7 加载yaml文件时编码错误

python-2.7

Python 2.7 加载yaml文件时编码错误,python-2.7,ruamel.yaml,Python 2.7,Ruamel.yaml,我正在使用python 2.7.13中的ruaml.yaml版本0.15.74。由于外来的限制，我不得不使用这种版本我的ulitmate目标是读取yaml文件并选择其中的某些部分，将其保存在pandas数据框中，最后将其写入csv文件。为此，我有以下自定义的“DoubleMergeKeyEnablerobject” import pandas as pd import ruamel.yaml import json import os yaml = ruamel.yaml.YAML() ya

我正在使用python 2.7.13中的ruaml.yaml版本0.15.74。由于外来的限制，我不得不使用这种版本

我的ulitmate目标是读取yaml文件并选择其中的某些部分，将其保存在pandas数据框中，最后将其写入csv文件。为此，我有以下自定义的“DoubleMergeKeyEnablerobject”

import pandas as pd
import ruamel.yaml
import json
import os

yaml = ruamel.yaml.YAML()
yaml.indent(mapping=2, sequence=4, offset=2)
yaml.preserve_quotes=True
yaml.width = 100000

class DoubleMergeKeyEnabler(object):
    def __init__(self):
        self.pat = '<<: '  # could be at the root level mapping, so no leading space
        self.r_pat = '[<<, {}]: '   # probably not using sequences as keys
        self.pat_nr = -1

    def convert(self, doc):
        while self.pat in doc:
            self.pat_nr += 1
            doc = doc.replace(self.pat, self.r_pat.format(self.pat_nr), 1)
        return doc

    def revert(self, doc):
        while self.pat_nr >= 0:
            doc = doc.replace(self.r_pat.format(self.pat_nr), self.pat, 1)
            self.pat_nr -= 1
        return doc


dmke = DoubleMergeKeyEnabler()

然后，我选择yaml文件的一个特定部分，并尝试定义一个id来跟踪它，该id将是pandas数据框条目名称的名称，并将其存储在pandas数据框中

_item = data.get('items')
for i in range(0, len(_item)):
    if 'representation' in _item[i].keys() and isinstance(_item[i].get('representation'), ruamel.yaml.scalarstring.DoubleQuotedScalarString):
        _id = 'test' + '_' + 'items' + '_' + str(_item[i].get('representation')).replace(" ","_")
        _txt_to_trans = _item[i].get('representation')
        df.loc[_id] = [_txt_to_trans, '']

下面是如何给出yaml文件的。我也不能改变这个

groups:
  - &group-dp
    title: "Abschätzungen"
    reference: "group-dp"
    required: true
    description: >
    help_text: |


items:
  - type: "Group"
    <<: *group-dp
    visible: true
    multiple: false
    representation: "Abschätzungen"

我收到以下错误消息

---------------------------------------------------------------------------
UnicodeEncodeError                        Traceback (most recent call last)
<ipython-input-18-1fa5952ce8cf> in <module>()
----> 1 import codecs, os;__pyfile = codecs.open('''/tmp/py7455hqj''', encoding='''utf-8''');__code = __pyfile.read().encode('''utf-8''');__pyfile.close();os.remove('''/tmp/py7455hqj''');exec(compile(__code, '''/home/nicolas/Desktop/test.py''', 'exec'));

/home/nicolas/Desktop/test.py in <module>()
     39 _item = data.get('items')
     40 for i in range(0, len(_item)):
---> 41     if 'representation' in _item[i].keys() and isinstance(_item[i].get('representation'), ruamel.yaml.scalarstring.DoubleQuotedScalarString):
     42         _id = 'test' + '_' + 'items' + '_' + str(_item[i].get('representation')).replace(" ","_")
     43         _txt_to_trans = _item[i].get('representation')

UnicodeEncodeError: 'ascii' codec can't encode character u'\xe4' in position 5: ordinal not in range(128)

In [19]:

我需要加密，但那不行。我怎样才能解决这个问题？完整的测试代码如下所示

import pandas as pd
import ruamel.yaml
import json
import os

yaml = ruamel.yaml.YAML()
yaml.indent(mapping=2, sequence=4, offset=2)
yaml.preserve_quotes=True
yaml.width = 100000

class DoubleMergeKeyEnabler(object):
    def __init__(self):
        self.pat = '<<: '  # could be at the root level mapping, so no leading space
        self.r_pat = '[<<, {}]: '   # probably not using sequences as keys
        self.pat_nr = -1

    def convert(self, doc):
        while self.pat in doc:
            self.pat_nr += 1
            doc = doc.replace(self.pat, self.r_pat.format(self.pat_nr), 1)
        return doc

    def revert(self, doc):
        while self.pat_nr >= 0:
            doc = doc.replace(self.r_pat.format(self.pat_nr), self.pat, 1)
            self.pat_nr -= 1
        return doc


dmke = DoubleMergeKeyEnabler()

df = pd.DataFrame(columns=['text1', 'text2'])

with open ('/home/nicolas/Desktop/test.yaml') as f:
    data = yaml.load(f)

_item = data.get('items')
for i in range(0, len(_item)):
    if 'representation' in _item[i].keys() and isinstance(_item[i].get('representation'), ruamel.yaml.scalarstring.DoubleQuotedScalarString):
        _id = 'test' + '_' + 'items' + '_' + str(_item[i].get('representation')).replace(" ","_")
        _txt_to_trans = _item[i].get('representation')
        df.loc[_id] = [_txt_to_trans, '']

根据你的追踪，问题是

if 'representation' in _item[i].keys() and isinstance(_item[i].get('representation'), ruamel.yaml.scalarstring.DoubleQuotedScalarString)

测试键中是否包含字符串而不是Unicode，因此Python尝试将键转换为ASCII字符串，但在Umlaut上失败。您应该测试Unicode序列是否在键中：

if u'representation' in _item[i].keys() and isinstance(_item[i].get(u'representation'), ruamel.yaml.scalarstring.DoubleQuotedScalarString)

并避免在下面几行中使用Unicode结果的str强制转换

2.7中的以下工作：

# encoding: utf-8

import ruamel.yaml
import json
import os

yaml = ruamel.yaml.YAML()
yaml.indent(mapping=2, sequence=4, offset=2)
yaml.preserve_quotes=True
yaml.width = 100000

class DoubleMergeKeyEnabler(object):
    def __init__(self):
        self.pat = '<<: '  # could be at the root level mapping, so no leading space
        self.r_pat = '[<<, {}]: '   # probably not using sequences as keys
        self.pat_nr = -1

    def convert(self, doc):
        while self.pat in doc:
            self.pat_nr += 1
            doc = doc.replace(self.pat, self.r_pat.format(self.pat_nr), 1)
        return doc

    def revert(self, doc):
        while self.pat_nr >= 0:
            doc = doc.replace(self.r_pat.format(self.pat_nr), self.pat, 1)
            self.pat_nr -= 1
        return doc


dmke = DoubleMergeKeyEnabler()

data = yaml.load("""\
groups:
  - &group-dp
    title: "Abschätzungen"
    reference: "group-dp"
    required: true
    description: >
    help_text: |


items:
  - type: "Group"
    <<: *group-dp
    visible: true
    multiple: false
    representation: "Abschätzungen"
""")

_item = data.get('items')
for i in range(0, len(_item)):
    if u'representation' in _item[i].keys() and isinstance(_item[i].get(u'representation'), ruamel.yaml.scalarstring.DoubleQuotedScalarString):
        _id = u'test' + u'_' + u'items' + u'_' + unicode(_item[i].get(u'representation')).replace(u" ", u"_")
        _txt_to_trans = _item[i].get(u'representation')

因此，for循环需要在几个地方进行一些调整，以保持基于Unicode。您必须重新插入熊猫的相关资料。

谢谢您的帮助！成功了。我会清除上面的痕迹

# encoding: utf-8

import ruamel.yaml
import json
import os

yaml = ruamel.yaml.YAML()
yaml.indent(mapping=2, sequence=4, offset=2)
yaml.preserve_quotes=True
yaml.width = 100000

class DoubleMergeKeyEnabler(object):
    def __init__(self):
        self.pat = '<<: '  # could be at the root level mapping, so no leading space
        self.r_pat = '[<<, {}]: '   # probably not using sequences as keys
        self.pat_nr = -1

    def convert(self, doc):
        while self.pat in doc:
            self.pat_nr += 1
            doc = doc.replace(self.pat, self.r_pat.format(self.pat_nr), 1)
        return doc

    def revert(self, doc):
        while self.pat_nr >= 0:
            doc = doc.replace(self.r_pat.format(self.pat_nr), self.pat, 1)
            self.pat_nr -= 1
        return doc


dmke = DoubleMergeKeyEnabler()

data = yaml.load("""\
groups:
  - &group-dp
    title: "Abschätzungen"
    reference: "group-dp"
    required: true
    description: >
    help_text: |


items:
  - type: "Group"
    <<: *group-dp
    visible: true
    multiple: false
    representation: "Abschätzungen"
""")

_item = data.get('items')
for i in range(0, len(_item)):
    if u'representation' in _item[i].keys() and isinstance(_item[i].get(u'representation'), ruamel.yaml.scalarstring.DoubleQuotedScalarString):
        _id = u'test' + u'_' + u'items' + u'_' + unicode(_item[i].get(u'representation')).replace(u" ", u"_")
        _txt_to_trans = _item[i].get(u'representation')