Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/324.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Python 两个熊猫多索引帧将每行与每行相乘_Python_Python 3.x_Pandas_Numpy_Multi Index - Fatal编程技术网

Python 两个熊猫多索引帧将每行与每行相乘

Python 两个熊猫多索引帧将每行与每行相乘,python,python-3.x,pandas,numpy,multi-index,Python,Python 3.x,Pandas,Numpy,Multi Index,我需要将具有相同最高级别索引的两个多索引帧(例如,df1,df2)相乘,这样对于每个最高级别索引,df1的每一行都被元素相乘到df2的每一行。我已经实现了下面的示例,实现了我想要的功能,但是它看起来非常丑陋: a = ['alpha', 'beta'] b = ['A', 'B', 'C'] c = ['foo', 'bar'] df1 = pd.DataFrame(np.random.randn(6, 4), index=pd.MultiIndex.fro

我需要将具有相同最高级别索引的两个多索引帧(例如,
df1,df2
)相乘,这样对于每个最高级别索引,
df1
的每一行都被元素相乘到
df2
的每一行。我已经实现了下面的示例,实现了我想要的功能,但是它看起来非常丑陋:

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
df1 = pd.DataFrame(np.random.randn(6, 4),
                   index=pd.MultiIndex.from_product(
                       [a, b], 
                       names=['greek', 'latin']),
                   columns=['C1', 'C2', 'C3', 'C4'])
df2 = pd.DataFrame(
    np.array([[1, 0, 1, 0], [1, 1, 1, 1], [0, 0, 0, 0], [0, 2, 0, 4]]),
    index=pd.MultiIndex.from_product([a, c], names=['greek', 'foobar']),
    columns=['C1', 'C2', 'C3', 'C4'])

df3 = pd.DataFrame(
    columns=['greek', 'latin', 'foobar', 'C1', 'C2', 'C3', 'C4'])

for i in df1.index.get_level_values('greek').unique():
    for j in df1.loc[i].index.get_level_values('latin').unique():
        for k in df2.loc[i].index.get_level_values('foobar').unique():
            df3 = df3.append(pd.Series([i, j, k], 
                                       index=['greek', 'latin', 'foobar']
                                       ).append(
                df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)

df3.set_index(['greek', 'latin', 'foobar'], inplace=True)
正如您所看到的,代码非常手动,多次手动定义列等,并在最后设置索引。这是输入和optput。它们是正确的,正是我想要的:

df1:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253
df2:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253
结果:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253

提前谢谢

这是没有for循环的代码。基本思想是将两个矩阵展开,使它们大小相同,可以相乘。然后乘以

代码:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253
测试数据:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253
定时代码:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253
结果:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253

我创建了下面的解决方案,它似乎可以工作并提供正确的结果。虽然Stephen的答案仍然是最快的解决方案,但这已经足够接近了,但提供了一个很大的优势,它适用于任意多索引帧,而不是索引是列表的产物的帧。这是我需要解决的问题,尽管我提供的示例没有反映这一点。感谢Stephen为该案例提供了出色且快速的解决方案-当然从该代码中学到了一些东西

代码:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253
测试数据:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253
定时代码:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253
结果:

                   C1        C2        C3        C4
 greek latin                                        
 alpha A      0.208380  0.856373 -1.041598  1.219707
       B      1.547903 -0.001023  0.918973  1.153554
       C      0.195868  2.772840  0.060960  0.311247
 beta  A      0.690405 -1.258012  0.118000 -0.346677
       B      0.488327 -1.206428  0.967658  1.198287
       C      0.420098 -0.165721  0.626893 -0.377909,
                C1  C2  C3  C4
greek foobar                
 alpha foo      1   0   1   0
       bar      1   1   1   1
 beta  foo      0   0   0   0
       bar      0   2   0   4
                           C1        C2        C3        C4
 greek latin foobar                                        
 alpha A     foo     0.208380  0.000000 -1.041598  0.000000
             bar     0.208380  0.856373 -1.041598  1.219707
       B     foo     1.547903 -0.000000  0.918973  0.000000
             bar     1.547903 -0.001023  0.918973  1.153554
       C     foo     0.195868  0.000000  0.060960  0.000000
             bar     0.195868  2.772840  0.060960  0.311247
 beta  A     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -2.516025  0.000000 -1.386708
       B     foo     0.000000 -0.000000  0.000000  0.000000
             bar     0.000000 -2.412855  0.000000  4.793149
       C     foo     0.000000 -0.000000  0.000000 -0.000000
             bar     0.000000 -0.331443  0.000000 -1.511638
# build an index from the three index columns
idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
       ] + [df2.index.get_level_values(columns[2]).unique()]
size = [len(x) for x in idx]
index = pd.MultiIndex.from_product(idx, names=columns[:3])

# get the indices needed for df1 and df2
idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
idx_1 = idx_a[0]
idx_2 = idx_a[1] + idx_b[0] * size[2]

# map the two frames into a multiply-able form
y1 = df1.values[idx_1, :]
y2 = df2.values[idx_2, :]

# multiply the two frames
df = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4

from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
7.96668368373
0.149504332128
dft = df2.swaplevel()
dft.sortlevel(level=0,inplace=True)
df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
df5.sortlevel(0,inplace=True)
import pandas as pd
import numpy as np

a = ['alpha', 'beta']
b = ['A', 'B', 'C']
c = ['foo', 'bar']
data_columns = ['C1', 'C2', 'C3', 'C4']
columns = ['greek', 'latin', 'foobar'] + data_columns

df1 = pd.DataFrame(np.random.randn(len(a) * len(b), len(data_columns)),
                   index=pd.MultiIndex.from_product(
                       [a,b], names=columns[0:2]),
                   columns=data_columns
                   )
df2 = pd.DataFrame(np.array([[1, 0, 1, 0],
                             [1, 1, 1, 1],
                             [0, 0, 0, 0],
                             [0, 2, 0, 4],
                             ]),
                   index=pd.MultiIndex.from_product(
                       [a, c],
                       names=[columns[0], columns[2]]),
                   columns=data_columns
                   )
def method1():
    df3 = pd.DataFrame(columns=columns)

    for i in df1.index.get_level_values('greek').unique():
            for j in df1.loc[i].index.get_level_values('latin').unique():
                for k in df2.loc[i].index.get_level_values('foobar').unique():
                    df3 = df3.append(pd.Series(
                        [i, j, k],
                        index=columns[:3]).append(
                        df1.loc[i, j] * df2.loc[i, k]), ignore_index=True)
    df3.set_index(columns[:3], inplace=True)
    return df3

def method2():
    # build an index from the three index columns
    idx = [df1.index.get_level_values(col).unique() for col in columns[:2]
           ] + [df2.index.get_level_values(columns[2]).unique()]
    size = [len(x) for x in idx]
    index = pd.MultiIndex.from_product(idx, names=columns[:3])

    # get the indices needed for df1 and df2
    idx_a = np.indices((size[0] * size[1], size[2])).reshape(2, -1)
    idx_b = np.indices((size[0], size[1] * size[2])).reshape(2, -1)
    idx_1 = idx_a[0]
    idx_2 = idx_a[1] + idx_b[0] * size[2]

    # map the two frames into a multiply-able form
    y1 = df1.values[idx_1, :]
    y2 = df2.values[idx_2, :]

    # multiply the to frames
    df4 = pd.DataFrame(y1 * y2, index=index, columns=columns[3:])
    return df4


def method3():
    dft = df2.swaplevel()
    dft.sortlevel(level=0,inplace=True)
    df5=pd.concat([df1*dft.loc[i,:] for i in dft.index.get_level_values('foobar').unique() ], keys=dft.index.get_level_values('foobar').unique().tolist(), names=['foobar'])
    df5=df5.reorder_levels(['greek', 'latin', 'foobar'],axis=0)
    df5.sortlevel(0,inplace=True)
    return df5


from timeit import timeit
print(timeit(method1, number=50))
print(timeit(method2, number=50))
print(timeit(method3, number=50))
4.089807642158121
0.12291539693251252
0.33667341712862253