Python 自然排序数据rasies错误
我有一个熊猫数据来自以下指数Python 自然排序数据rasies错误,python,python-2.7,pandas,natsort,Python,Python 2.7,Pandas,Natsort,我有一个熊猫数据来自以下指数 print(df.index) MultiIndex(levels=[[u'Day 3', u'Day 4', u'Day 5', u'Day 7', u'Day 9'], [u'D1', u'D10', u'D11', u'D12', u'D2', u'D3', u'D4', u'D5', u'D6', u'D7', u'D8', u'D9'], [1.0, 2.0, 3.0]], labels=[[1, 1, 1, 1, 1, 1, 1, 1, 1
print(df.index)
MultiIndex(levels=[[u'Day 3', u'Day 4', u'Day 5', u'Day 7', u'Day 9'], [u'D1', u'D10', u'D11', u'D12', u'D2', u'D3', u'D4', u'D5', u'D6', u'D7', u'D8', u'D9'], [1.0, 2.0, 3.0]],
labels=[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], [0, 0, 0, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 1, 1, 1, 2, 2, 2, 3, 3, 3, 0, 0, 0, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 1, 1, 1, 2, 2, 2, 3, 3, 3, 0, 0, 0, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 1, 1, 1, 2, 2, 2, 3, 3, 3, 0, 0, 0, 4, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 1, 1, 1, 2, 2, 2, 3, 3, 3, 0, 0, 0, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 1, 1, 1, 2, 2, 2, 3, 3, 3], [1, 2, 0, 1, 2, 0, 0, 2, 1, 0, 1, 2, 2, 0, 1, 0, 2, 1, 1, 2, 0, 1, 0, 2, 2, 0, 1, 0, 1, 2, 2, 1, 0, 1, 2, 0, 0, 2, 1, 0, 2, 1, 2, 0, 1, 0, 2, 1, 1, 0, 2, 0, 1, 2, 0, 2, 1, 2, 0, 1, 0, 2, 1, 0, 2, 1, 2, 0, 1, 0, 2, 1, 2, 1, 0, 0, 2, 1, 1, 2, 0, 0, 2, 1, 0, 1, 2, 0, 1, 2, 2, 1, 0, 1, 0, 2, 1, 0, 2, 0, 1, 2, 2, 0, 1, 1, 0, 2, 1, 2, 0, 1, 1, 2, 0, 2, 1, 0, 1, 2, 0, 0, 1, 2, 0, 1, 2, 2, 1, 0, 1, 0, 2, 2, 0, 1, 0, 1, 2, 0, 2, 1, 2, 0, 1, 1, 2, 0, 0, 2, 1, 0, 2, 1, 0, 2, 1, 2, 1, 0, 0, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 0, 1, 2, 0, 2, 1, 0, 1, 2, 0]],
names=[u'Interval', u'Device', u'Well'])
我用下面的方法分类
df = df.reindex(index=natsorted(df.index))
但是,对于这个特定的df,它返回follow错误
raise Exception("cannot handle a non-unique multi-index!")
Exception: cannot handle a non-unique multi-index!
任何帮助都将不胜感激。我举了一个简单的例子,可以重现您的错误。这似乎发生了,因为在
数组中,tuple第3天、D1和1.0的级别相同。如果你移除其中一个,效果很好
import pandas as pd
import numpy as np
from natsort import natsorted
arrays = [[u'Day 3', u'Day 3', u'Day 4', u'Day 5', u'Day 7', u'Day 9', u'Day 3', u'Day 4', u'Day 5', u'Day 7', u'Day 9'],
[u'D1', u'D1', u'D10', u'D11', u'D12', u'D2', u'D3', u'D4', u'D5', u'D6', u'D7'],
[1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 3.0, 1.0, 2.0]]
tuples = list(zip(*arrays))
index = pd.MultiIndex.from_tuples(tuples, names=[u'Interval', u'Device', u'Well'])
df = pd.Series(np.random.randn(len(arrays[0])), index=index)
print df.index
df = df.reindex(index=natsorted(df.index))
正如您提到的,您使用多个excel文件,这可能会有所帮助:您能提供一个最小、完整且可验证的示例吗?这意味着我可以复制/通过脚本并在本地运行。这很难做到,因为数据是从一系列excel文件读入pandas的。我希望我遗漏了一些明显的东西。请查看我的示例和注释,如果它适合您的输入数据。我想这与natsort
无关。如果最后一行是df=df.reindex(index=df.index)
?或者df=df.reindex(index=sorted(df.index))
?如果是这样的话,你的标题就是误导。可能应该将其重命名为“错误重新索引熊猫数据帧”或类似的名称。