Python 如何向行中添加值?

Python 如何向行中添加值?,python,pandas,Python,Pandas,如何向行添加值: 我在数据框中创建了一列,并将值赋给0 现在更新这些列值的书面逻辑,但不反映 输入: >>> parafix_df = main_df[["line_width", "para_num", "bbox" ]] >>> parafix_df line_width para_num bbox 0 238.546 NaN (50.0, 579.3, 288.546, 5

如何向行添加值

  • 我在数据框中创建了一列,并将值赋给0
  • 现在更新这些列值的书面逻辑,但不反映 输入:

    >>> parafix_df = main_df[["line_width", "para_num", "bbox" ]]
    >>> parafix_df
       line_width para_num                             bbox
    0     238.546      NaN  (50.0, 579.3, 288.546, 598.022)
    1         318        1    (64.0, 564.9, 382.0, 583.622)
    2         332        2    (50.0, 550.5, 382.0, 569.222)
    3         332        2    (50.0, 536.1, 382.0, 554.822)
    4     328.977        2  (50.0, 521.7, 378.977, 540.422)
    5         318        3    (64.0, 507.3, 382.0, 526.022)
    6         332        3    (50.0, 492.9, 382.0, 511.622)
    7         332        3    (50.0, 478.5, 382.0, 497.222)
    8         332        3    (50.0, 464.1, 382.0, 482.822)
    9         332        3    (50.0, 449.7, 382.0, 468.422)
    10      59.04        3   (50.0, 435.3, 109.04, 454.022)
    11    304.007        4  (64.0, 420.9, 368.007, 439.622)
    12        318        5    (64.0, 406.5, 382.0, 425.222)
    13        332        5    (50.0, 392.1, 382.0, 410.822)
    14        332        5    (50.0, 377.7, 382.0, 396.422)
    15        332        5    (50.0, 363.3, 382.0, 382.022)
    16     43.252        5   (50.0, 348.9, 93.252, 367.622)
    17        318        6    (64.0, 334.5, 382.0, 353.222)
    18        332        6    (50.0, 320.1, 382.0, 338.822)
    19        332        6    (50.0, 305.7, 382.0, 324.422)
    20        332        6    (50.0, 291.3, 382.0, 310.022)
    21        332        6    (50.0, 276.9, 382.0, 295.622)
    22     317.02        6   (50.0, 262.5, 367.02, 281.222)
    23        318        7    (64.0, 248.1, 382.0, 266.822)
    24        332        7    (50.0, 233.7, 382.0, 252.422)
    25     47.014        7   (50.0, 219.3, 97.014, 238.022)
    26        318        8    (64.0, 204.9, 382.0, 223.622)
    27    316.723        8  (50.0, 190.5, 366.723, 209.222)
    28        318        9    (64.0, 176.1, 382.0, 194.822)
    29    326.766        9  (50.0, 161.7, 376.766, 180.422)
    30        318       10    (64.0, 147.3, 382.0, 166.022)
    31        332       10    (50.0, 132.9, 382.0, 151.622)
    32        332       10    (50.0, 118.5, 382.0, 137.222)
    33    305.393       11  (64.0, 104.1, 369.393, 122.822)
    34        318       12     (64.0, 89.7, 382.0, 108.422)
    35        318       13      (64.0, 75.3, 382.0, 94.022)
    36    319.165       13    (50.0, 60.9, 369.165, 79.622)
    37    308.165       14    (64.0, 46.5, 372.165, 65.222)
    38        318       15      (64.0, 32.1, 382.0, 50.822)
    39    329.153       15    (50.0, 17.7, 379.153, 36.422)
    40        318       16       (64.0, 3.3, 382.0, 22.022)
    41    324.335       16    (50.0, -11.1, 374.335, 7.622)
    
    para1: 2
    para1: 3
    para1: 4
    para1: 5
    para1: 6
    para1: 7
    para1: 8
    para1: 9
    para1: 10
    para1: 11
    para1: 12
    para1: 13
    para1: 14
    para1: 15
    para1: 16
    
    parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
    parafix_df["new_para_num"] = 0
    
    max_width = parafix_df['line_width'].max()
    bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]
    
    para1 = 1
    for indx, current in enumerate(parafix_df.iterrows(), start=0):
        if indx!=0:
            bbox_current = current[1]["bbox"]
            if bbox_current[0]>bbox_max_width[0]:
                para1 += 1
        parafix_df.iloc[indx, 4] = para1
    
    代码

    parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
    parafix_df["new_para_num"] = 0
    
    max_width = parafix_df['line_width'].max()
    bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]
    
    previous = None
    para1 = 1
    for current, next in izip(parafix_df.iterrows(), parafix_df.iloc[1:].iterrows()):
        if previous==None:
            current[1]["new_para_num"] = para1
        else:
            bbox_current = current[1]["bbox"]
            bbox_next = next[1]["bbox"]
            bbox_previous = previous[1]["bbox"]
            if bbox_current[0]>bbox_max_width[0]:
                para1 += 1
                print "para1:", para1
            current[1]["new_para_num"] = para1
    
        previous = current
    
                                  bbox  new_para_num  
    0   (50.0, 579.3, 288.546, 598.022)             0  
    1     (64.0, 564.9, 382.0, 583.622)             0  
    2     (50.0, 550.5, 382.0, 569.222)             0  
    3     (50.0, 536.1, 382.0, 554.822)             0  
    4   (50.0, 521.7, 378.977, 540.422)             0  
    5     (64.0, 507.3, 382.0, 526.022)             0  
    6     (50.0, 492.9, 382.0, 511.622)             0  
    7     (50.0, 478.5, 382.0, 497.222)             0  
    8     (50.0, 464.1, 382.0, 482.822)             0  
    9     (50.0, 449.7, 382.0, 468.422)             0  
    10   (50.0, 435.3, 109.04, 454.022)             0  
    11  (64.0, 420.9, 368.007, 439.622)             0  
    12    (64.0, 406.5, 382.0, 425.222)             0  
    13    (50.0, 392.1, 382.0, 410.822)             0  
    14    (50.0, 377.7, 382.0, 396.422)             0  
    15    (50.0, 363.3, 382.0, 382.022)             0  
    16   (50.0, 348.9, 93.252, 367.622)             0  
    17    (64.0, 334.5, 382.0, 353.222)             0  
    18    (50.0, 320.1, 382.0, 338.822)             0  
    19    (50.0, 305.7, 382.0, 324.422)             0  
    20    (50.0, 291.3, 382.0, 310.022)             0  
    21    (50.0, 276.9, 382.0, 295.622)             0  
    22   (50.0, 262.5, 367.02, 281.222)             0  
    23    (64.0, 248.1, 382.0, 266.822)             0  
    24    (50.0, 233.7, 382.0, 252.422)             0  
    25   (50.0, 219.3, 97.014, 238.022)             0  
    26    (64.0, 204.9, 382.0, 223.622)             0  
    27  (50.0, 190.5, 366.723, 209.222)             0  
    28    (64.0, 176.1, 382.0, 194.822)             0  
    29  (50.0, 161.7, 376.766, 180.422)             0  
    30    (64.0, 147.3, 382.0, 166.022)             0  
    31    (50.0, 132.9, 382.0, 151.622)             0  
    32    (50.0, 118.5, 382.0, 137.222)             0  
    33  (64.0, 104.1, 369.393, 122.822)             0  
    34     (64.0, 89.7, 382.0, 108.422)             0  
    35      (64.0, 75.3, 382.0, 94.022)             0  
    36    (50.0, 60.9, 369.165, 79.622)             0  
    37    (64.0, 46.5, 372.165, 65.222)             0  
    38      (64.0, 32.1, 382.0, 50.822)             0  
    39    (50.0, 17.7, 379.153, 36.422)             0  
    40       (64.0, 3.3, 382.0, 22.022)             0  
    41    (50.0, -11.1, 374.335, 7.622)             0  
    
    上述代码的输出

    parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
    parafix_df["new_para_num"] = 0
    
    max_width = parafix_df['line_width'].max()
    bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]
    
    previous = None
    para1 = 1
    for current, next in izip(parafix_df.iterrows(), parafix_df.iloc[1:].iterrows()):
        if previous==None:
            current[1]["new_para_num"] = para1
        else:
            bbox_current = current[1]["bbox"]
            bbox_next = next[1]["bbox"]
            bbox_previous = previous[1]["bbox"]
            if bbox_current[0]>bbox_max_width[0]:
                para1 += 1
                print "para1:", para1
            current[1]["new_para_num"] = para1
    
        previous = current
    
                                  bbox  new_para_num  
    0   (50.0, 579.3, 288.546, 598.022)             0  
    1     (64.0, 564.9, 382.0, 583.622)             0  
    2     (50.0, 550.5, 382.0, 569.222)             0  
    3     (50.0, 536.1, 382.0, 554.822)             0  
    4   (50.0, 521.7, 378.977, 540.422)             0  
    5     (64.0, 507.3, 382.0, 526.022)             0  
    6     (50.0, 492.9, 382.0, 511.622)             0  
    7     (50.0, 478.5, 382.0, 497.222)             0  
    8     (50.0, 464.1, 382.0, 482.822)             0  
    9     (50.0, 449.7, 382.0, 468.422)             0  
    10   (50.0, 435.3, 109.04, 454.022)             0  
    11  (64.0, 420.9, 368.007, 439.622)             0  
    12    (64.0, 406.5, 382.0, 425.222)             0  
    13    (50.0, 392.1, 382.0, 410.822)             0  
    14    (50.0, 377.7, 382.0, 396.422)             0  
    15    (50.0, 363.3, 382.0, 382.022)             0  
    16   (50.0, 348.9, 93.252, 367.622)             0  
    17    (64.0, 334.5, 382.0, 353.222)             0  
    18    (50.0, 320.1, 382.0, 338.822)             0  
    19    (50.0, 305.7, 382.0, 324.422)             0  
    20    (50.0, 291.3, 382.0, 310.022)             0  
    21    (50.0, 276.9, 382.0, 295.622)             0  
    22   (50.0, 262.5, 367.02, 281.222)             0  
    23    (64.0, 248.1, 382.0, 266.822)             0  
    24    (50.0, 233.7, 382.0, 252.422)             0  
    25   (50.0, 219.3, 97.014, 238.022)             0  
    26    (64.0, 204.9, 382.0, 223.622)             0  
    27  (50.0, 190.5, 366.723, 209.222)             0  
    28    (64.0, 176.1, 382.0, 194.822)             0  
    29  (50.0, 161.7, 376.766, 180.422)             0  
    30    (64.0, 147.3, 382.0, 166.022)             0  
    31    (50.0, 132.9, 382.0, 151.622)             0  
    32    (50.0, 118.5, 382.0, 137.222)             0  
    33  (64.0, 104.1, 369.393, 122.822)             0  
    34     (64.0, 89.7, 382.0, 108.422)             0  
    35      (64.0, 75.3, 382.0, 94.022)             0  
    36    (50.0, 60.9, 369.165, 79.622)             0  
    37    (64.0, 46.5, 372.165, 65.222)             0  
    38      (64.0, 32.1, 382.0, 50.822)             0  
    39    (50.0, 17.7, 379.153, 36.422)             0  
    40       (64.0, 3.3, 382.0, 22.022)             0  
    41    (50.0, -11.1, 374.335, 7.622)             0  
    
    但我需要新的参数值:

    >>> parafix_df = main_df[["line_width", "para_num", "bbox" ]]
    >>> parafix_df
       line_width para_num                             bbox
    0     238.546      NaN  (50.0, 579.3, 288.546, 598.022)
    1         318        1    (64.0, 564.9, 382.0, 583.622)
    2         332        2    (50.0, 550.5, 382.0, 569.222)
    3         332        2    (50.0, 536.1, 382.0, 554.822)
    4     328.977        2  (50.0, 521.7, 378.977, 540.422)
    5         318        3    (64.0, 507.3, 382.0, 526.022)
    6         332        3    (50.0, 492.9, 382.0, 511.622)
    7         332        3    (50.0, 478.5, 382.0, 497.222)
    8         332        3    (50.0, 464.1, 382.0, 482.822)
    9         332        3    (50.0, 449.7, 382.0, 468.422)
    10      59.04        3   (50.0, 435.3, 109.04, 454.022)
    11    304.007        4  (64.0, 420.9, 368.007, 439.622)
    12        318        5    (64.0, 406.5, 382.0, 425.222)
    13        332        5    (50.0, 392.1, 382.0, 410.822)
    14        332        5    (50.0, 377.7, 382.0, 396.422)
    15        332        5    (50.0, 363.3, 382.0, 382.022)
    16     43.252        5   (50.0, 348.9, 93.252, 367.622)
    17        318        6    (64.0, 334.5, 382.0, 353.222)
    18        332        6    (50.0, 320.1, 382.0, 338.822)
    19        332        6    (50.0, 305.7, 382.0, 324.422)
    20        332        6    (50.0, 291.3, 382.0, 310.022)
    21        332        6    (50.0, 276.9, 382.0, 295.622)
    22     317.02        6   (50.0, 262.5, 367.02, 281.222)
    23        318        7    (64.0, 248.1, 382.0, 266.822)
    24        332        7    (50.0, 233.7, 382.0, 252.422)
    25     47.014        7   (50.0, 219.3, 97.014, 238.022)
    26        318        8    (64.0, 204.9, 382.0, 223.622)
    27    316.723        8  (50.0, 190.5, 366.723, 209.222)
    28        318        9    (64.0, 176.1, 382.0, 194.822)
    29    326.766        9  (50.0, 161.7, 376.766, 180.422)
    30        318       10    (64.0, 147.3, 382.0, 166.022)
    31        332       10    (50.0, 132.9, 382.0, 151.622)
    32        332       10    (50.0, 118.5, 382.0, 137.222)
    33    305.393       11  (64.0, 104.1, 369.393, 122.822)
    34        318       12     (64.0, 89.7, 382.0, 108.422)
    35        318       13      (64.0, 75.3, 382.0, 94.022)
    36    319.165       13    (50.0, 60.9, 369.165, 79.622)
    37    308.165       14    (64.0, 46.5, 372.165, 65.222)
    38        318       15      (64.0, 32.1, 382.0, 50.822)
    39    329.153       15    (50.0, 17.7, 379.153, 36.422)
    40        318       16       (64.0, 3.3, 382.0, 22.022)
    41    324.335       16    (50.0, -11.1, 374.335, 7.622)
    
    para1: 2
    para1: 3
    para1: 4
    para1: 5
    para1: 6
    para1: 7
    para1: 8
    para1: 9
    para1: 10
    para1: 11
    para1: 12
    para1: 13
    para1: 14
    para1: 15
    para1: 16
    
    parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
    parafix_df["new_para_num"] = 0
    
    max_width = parafix_df['line_width'].max()
    bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]
    
    para1 = 1
    for indx, current in enumerate(parafix_df.iterrows(), start=0):
        if indx!=0:
            bbox_current = current[1]["bbox"]
            if bbox_current[0]>bbox_max_width[0]:
                para1 += 1
        parafix_df.iloc[indx, 4] = para1
    
    你能帮我吗

    以下是我的最终工作代码:

    >>> parafix_df = main_df[["line_width", "para_num", "bbox" ]]
    >>> parafix_df
       line_width para_num                             bbox
    0     238.546      NaN  (50.0, 579.3, 288.546, 598.022)
    1         318        1    (64.0, 564.9, 382.0, 583.622)
    2         332        2    (50.0, 550.5, 382.0, 569.222)
    3         332        2    (50.0, 536.1, 382.0, 554.822)
    4     328.977        2  (50.0, 521.7, 378.977, 540.422)
    5         318        3    (64.0, 507.3, 382.0, 526.022)
    6         332        3    (50.0, 492.9, 382.0, 511.622)
    7         332        3    (50.0, 478.5, 382.0, 497.222)
    8         332        3    (50.0, 464.1, 382.0, 482.822)
    9         332        3    (50.0, 449.7, 382.0, 468.422)
    10      59.04        3   (50.0, 435.3, 109.04, 454.022)
    11    304.007        4  (64.0, 420.9, 368.007, 439.622)
    12        318        5    (64.0, 406.5, 382.0, 425.222)
    13        332        5    (50.0, 392.1, 382.0, 410.822)
    14        332        5    (50.0, 377.7, 382.0, 396.422)
    15        332        5    (50.0, 363.3, 382.0, 382.022)
    16     43.252        5   (50.0, 348.9, 93.252, 367.622)
    17        318        6    (64.0, 334.5, 382.0, 353.222)
    18        332        6    (50.0, 320.1, 382.0, 338.822)
    19        332        6    (50.0, 305.7, 382.0, 324.422)
    20        332        6    (50.0, 291.3, 382.0, 310.022)
    21        332        6    (50.0, 276.9, 382.0, 295.622)
    22     317.02        6   (50.0, 262.5, 367.02, 281.222)
    23        318        7    (64.0, 248.1, 382.0, 266.822)
    24        332        7    (50.0, 233.7, 382.0, 252.422)
    25     47.014        7   (50.0, 219.3, 97.014, 238.022)
    26        318        8    (64.0, 204.9, 382.0, 223.622)
    27    316.723        8  (50.0, 190.5, 366.723, 209.222)
    28        318        9    (64.0, 176.1, 382.0, 194.822)
    29    326.766        9  (50.0, 161.7, 376.766, 180.422)
    30        318       10    (64.0, 147.3, 382.0, 166.022)
    31        332       10    (50.0, 132.9, 382.0, 151.622)
    32        332       10    (50.0, 118.5, 382.0, 137.222)
    33    305.393       11  (64.0, 104.1, 369.393, 122.822)
    34        318       12     (64.0, 89.7, 382.0, 108.422)
    35        318       13      (64.0, 75.3, 382.0, 94.022)
    36    319.165       13    (50.0, 60.9, 369.165, 79.622)
    37    308.165       14    (64.0, 46.5, 372.165, 65.222)
    38        318       15      (64.0, 32.1, 382.0, 50.822)
    39    329.153       15    (50.0, 17.7, 379.153, 36.422)
    40        318       16       (64.0, 3.3, 382.0, 22.022)
    41    324.335       16    (50.0, -11.1, 374.335, 7.622)
    
    para1: 2
    para1: 3
    para1: 4
    para1: 5
    para1: 6
    para1: 7
    para1: 8
    para1: 9
    para1: 10
    para1: 11
    para1: 12
    para1: 13
    para1: 14
    para1: 15
    para1: 16
    
    parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
    parafix_df["new_para_num"] = 0
    
    max_width = parafix_df['line_width'].max()
    bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]
    
    para1 = 1
    for indx, current in enumerate(parafix_df.iterrows(), start=0):
        if indx!=0:
            bbox_current = current[1]["bbox"]
            if bbox_current[0]>bbox_max_width[0]:
                para1 += 1
        parafix_df.iloc[indx, 4] = para1
    

    我们可以优化更多吗?

    更新:

    >>> parafix_df = main_df[["line_width", "para_num", "bbox" ]]
    >>> parafix_df
       line_width para_num                             bbox
    0     238.546      NaN  (50.0, 579.3, 288.546, 598.022)
    1         318        1    (64.0, 564.9, 382.0, 583.622)
    2         332        2    (50.0, 550.5, 382.0, 569.222)
    3         332        2    (50.0, 536.1, 382.0, 554.822)
    4     328.977        2  (50.0, 521.7, 378.977, 540.422)
    5         318        3    (64.0, 507.3, 382.0, 526.022)
    6         332        3    (50.0, 492.9, 382.0, 511.622)
    7         332        3    (50.0, 478.5, 382.0, 497.222)
    8         332        3    (50.0, 464.1, 382.0, 482.822)
    9         332        3    (50.0, 449.7, 382.0, 468.422)
    10      59.04        3   (50.0, 435.3, 109.04, 454.022)
    11    304.007        4  (64.0, 420.9, 368.007, 439.622)
    12        318        5    (64.0, 406.5, 382.0, 425.222)
    13        332        5    (50.0, 392.1, 382.0, 410.822)
    14        332        5    (50.0, 377.7, 382.0, 396.422)
    15        332        5    (50.0, 363.3, 382.0, 382.022)
    16     43.252        5   (50.0, 348.9, 93.252, 367.622)
    17        318        6    (64.0, 334.5, 382.0, 353.222)
    18        332        6    (50.0, 320.1, 382.0, 338.822)
    19        332        6    (50.0, 305.7, 382.0, 324.422)
    20        332        6    (50.0, 291.3, 382.0, 310.022)
    21        332        6    (50.0, 276.9, 382.0, 295.622)
    22     317.02        6   (50.0, 262.5, 367.02, 281.222)
    23        318        7    (64.0, 248.1, 382.0, 266.822)
    24        332        7    (50.0, 233.7, 382.0, 252.422)
    25     47.014        7   (50.0, 219.3, 97.014, 238.022)
    26        318        8    (64.0, 204.9, 382.0, 223.622)
    27    316.723        8  (50.0, 190.5, 366.723, 209.222)
    28        318        9    (64.0, 176.1, 382.0, 194.822)
    29    326.766        9  (50.0, 161.7, 376.766, 180.422)
    30        318       10    (64.0, 147.3, 382.0, 166.022)
    31        332       10    (50.0, 132.9, 382.0, 151.622)
    32        332       10    (50.0, 118.5, 382.0, 137.222)
    33    305.393       11  (64.0, 104.1, 369.393, 122.822)
    34        318       12     (64.0, 89.7, 382.0, 108.422)
    35        318       13      (64.0, 75.3, 382.0, 94.022)
    36    319.165       13    (50.0, 60.9, 369.165, 79.622)
    37    308.165       14    (64.0, 46.5, 372.165, 65.222)
    38        318       15      (64.0, 32.1, 382.0, 50.822)
    39    329.153       15    (50.0, 17.7, 379.153, 36.422)
    40        318       16       (64.0, 3.3, 382.0, 22.022)
    41    324.335       16    (50.0, -11.1, 374.335, 7.622)
    
    para1: 2
    para1: 3
    para1: 4
    para1: 5
    para1: 6
    para1: 7
    para1: 8
    para1: 9
    para1: 10
    para1: 11
    para1: 12
    para1: 13
    para1: 14
    para1: 15
    para1: 16
    
    parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
    parafix_df["new_para_num"] = 0
    
    max_width = parafix_df['line_width'].max()
    bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]
    
    para1 = 1
    for indx, current in enumerate(parafix_df.iterrows(), start=0):
        if indx!=0:
            bbox_current = current[1]["bbox"]
            if bbox_current[0]>bbox_max_width[0]:
                para1 += 1
        parafix_df.iloc[indx, 4] = para1
    
    IIUC,你可以这样做:

    df.new_para_num = 1
    
    In [210]: df.loc[df.line_width == df.line_width.max(), 'new_para_num'].cumsum() + 1
    Out[210]:
    2      2
    3      3
    6      4
    7      5
    8      6
    9      7
    13     8
    14     9
    15    10
    18    11
    19    12
    20    13
    21    14
    24    15
    31    16
    32    17
    Name: new_para_num, dtype: int64
    
    如果要有条件地更新原始DF中的
    new_para_num
    列:

    In [223]: df.new_para_num = 1
    
    In [224]: selected = df.loc[df.line_width == df.line_width.max()].copy()
    
    In [226]: selected.new_para_num = selected.new_para_num.cumsum() + 1
    
    In [227]: selected
    Out[227]:
        line_width  para_num                           bbox  new_para_num
    2        332.0       2.0  [50.0, 550.5, 382.0, 569.222]             2
    3        332.0       2.0  [50.0, 536.1, 382.0, 554.822]             3
    6        332.0       3.0  [50.0, 492.9, 382.0, 511.622]             4
    7        332.0       3.0  [50.0, 478.5, 382.0, 497.222]             5
    8        332.0       3.0  [50.0, 464.1, 382.0, 482.822]             6
    9        332.0       3.0  [50.0, 449.7, 382.0, 468.422]             7
    13       332.0       5.0  [50.0, 392.1, 382.0, 410.822]             8
    14       332.0       5.0  [50.0, 377.7, 382.0, 396.422]             9
    15       332.0       5.0  [50.0, 363.3, 382.0, 382.022]            10
    18       332.0       6.0  [50.0, 320.1, 382.0, 338.822]            11
    19       332.0       6.0  [50.0, 305.7, 382.0, 324.422]            12
    20       332.0       6.0  [50.0, 291.3, 382.0, 310.022]            13
    21       332.0       6.0  [50.0, 276.9, 382.0, 295.622]            14
    24       332.0       7.0  [50.0, 233.7, 382.0, 252.422]            15
    31       332.0      10.0  [50.0, 132.9, 382.0, 151.622]            16
    32       332.0      10.0  [50.0, 118.5, 382.0, 137.222]            17
    
    In [228]: df.loc[df.line_width == df.line_width.max(), 'new_para_num'] = selected
    
    In [229]: df
    Out[229]:
        line_width  para_num                             bbox  new_para_num
    0      238.546       NaN  [50.0, 579.3, 288.546, 598.022]             1
    1      318.000       1.0    [64.0, 564.9, 382.0, 583.622]             1
    2      332.000       2.0    [50.0, 550.5, 382.0, 569.222]             2
    3      332.000       2.0    [50.0, 536.1, 382.0, 554.822]             3
    4      328.977       2.0  [50.0, 521.7, 378.977, 540.422]             1
    5      318.000       3.0    [64.0, 507.3, 382.0, 526.022]             1
    6      332.000       3.0    [50.0, 492.9, 382.0, 511.622]             4
    7      332.000       3.0    [50.0, 478.5, 382.0, 497.222]             5
    8      332.000       3.0    [50.0, 464.1, 382.0, 482.822]             6
    9      332.000       3.0    [50.0, 449.7, 382.0, 468.422]             7
    10      59.040       3.0   [50.0, 435.3, 109.04, 454.022]             1
    11     304.007       4.0  [64.0, 420.9, 368.007, 439.622]             1
    12     318.000       5.0    [64.0, 406.5, 382.0, 425.222]             1
    13     332.000       5.0    [50.0, 392.1, 382.0, 410.822]             8
    14     332.000       5.0    [50.0, 377.7, 382.0, 396.422]             9
    15     332.000       5.0    [50.0, 363.3, 382.0, 382.022]            10
    16      43.252       5.0   [50.0, 348.9, 93.252, 367.622]             1
    17     318.000       6.0    [64.0, 334.5, 382.0, 353.222]             1
    18     332.000       6.0    [50.0, 320.1, 382.0, 338.822]            11
    19     332.000       6.0    [50.0, 305.7, 382.0, 324.422]            12
    20     332.000       6.0    [50.0, 291.3, 382.0, 310.022]            13
    21     332.000       6.0    [50.0, 276.9, 382.0, 295.622]            14
    22     317.020       6.0   [50.0, 262.5, 367.02, 281.222]             1
    23     318.000       7.0    [64.0, 248.1, 382.0, 266.822]             1
    24     332.000       7.0    [50.0, 233.7, 382.0, 252.422]            15
    25      47.014       7.0   [50.0, 219.3, 97.014, 238.022]             1
    26     318.000       8.0    [64.0, 204.9, 382.0, 223.622]             1
    27     316.723       8.0  [50.0, 190.5, 366.723, 209.222]             1
    28     318.000       9.0    [64.0, 176.1, 382.0, 194.822]             1
    29     326.766       9.0  [50.0, 161.7, 376.766, 180.422]             1
    30     318.000      10.0    [64.0, 147.3, 382.0, 166.022]             1
    31     332.000      10.0    [50.0, 132.9, 382.0, 151.622]            16
    32     332.000      10.0    [50.0, 118.5, 382.0, 137.222]            17
    33     305.393      11.0  [64.0, 104.1, 369.393, 122.822]             1
    34     318.000      12.0     [64.0, 89.7, 382.0, 108.422]             1
    35     318.000      13.0      [64.0, 75.3, 382.0, 94.022]             1
    36     319.165      13.0    [50.0, 60.9, 369.165, 79.622]             1
    37     308.165      14.0    [64.0, 46.5, 372.165, 65.222]             1
    38     318.000      15.0      [64.0, 32.1, 382.0, 50.822]             1
    39     329.153      15.0    [50.0, 17.7, 379.153, 36.422]             1
    40     318.000      16.0       [64.0, 3.3, 382.0, 22.022]             1
    41     324.335      16.0    [50.0, -11.1, 374.335, 7.622]             1
    
    PS,但我仍然不确定我是否正确理解了你的目标

    旧答案:

    >>> parafix_df = main_df[["line_width", "para_num", "bbox" ]]
    >>> parafix_df
       line_width para_num                             bbox
    0     238.546      NaN  (50.0, 579.3, 288.546, 598.022)
    1         318        1    (64.0, 564.9, 382.0, 583.622)
    2         332        2    (50.0, 550.5, 382.0, 569.222)
    3         332        2    (50.0, 536.1, 382.0, 554.822)
    4     328.977        2  (50.0, 521.7, 378.977, 540.422)
    5         318        3    (64.0, 507.3, 382.0, 526.022)
    6         332        3    (50.0, 492.9, 382.0, 511.622)
    7         332        3    (50.0, 478.5, 382.0, 497.222)
    8         332        3    (50.0, 464.1, 382.0, 482.822)
    9         332        3    (50.0, 449.7, 382.0, 468.422)
    10      59.04        3   (50.0, 435.3, 109.04, 454.022)
    11    304.007        4  (64.0, 420.9, 368.007, 439.622)
    12        318        5    (64.0, 406.5, 382.0, 425.222)
    13        332        5    (50.0, 392.1, 382.0, 410.822)
    14        332        5    (50.0, 377.7, 382.0, 396.422)
    15        332        5    (50.0, 363.3, 382.0, 382.022)
    16     43.252        5   (50.0, 348.9, 93.252, 367.622)
    17        318        6    (64.0, 334.5, 382.0, 353.222)
    18        332        6    (50.0, 320.1, 382.0, 338.822)
    19        332        6    (50.0, 305.7, 382.0, 324.422)
    20        332        6    (50.0, 291.3, 382.0, 310.022)
    21        332        6    (50.0, 276.9, 382.0, 295.622)
    22     317.02        6   (50.0, 262.5, 367.02, 281.222)
    23        318        7    (64.0, 248.1, 382.0, 266.822)
    24        332        7    (50.0, 233.7, 382.0, 252.422)
    25     47.014        7   (50.0, 219.3, 97.014, 238.022)
    26        318        8    (64.0, 204.9, 382.0, 223.622)
    27    316.723        8  (50.0, 190.5, 366.723, 209.222)
    28        318        9    (64.0, 176.1, 382.0, 194.822)
    29    326.766        9  (50.0, 161.7, 376.766, 180.422)
    30        318       10    (64.0, 147.3, 382.0, 166.022)
    31        332       10    (50.0, 132.9, 382.0, 151.622)
    32        332       10    (50.0, 118.5, 382.0, 137.222)
    33    305.393       11  (64.0, 104.1, 369.393, 122.822)
    34        318       12     (64.0, 89.7, 382.0, 108.422)
    35        318       13      (64.0, 75.3, 382.0, 94.022)
    36    319.165       13    (50.0, 60.9, 369.165, 79.622)
    37    308.165       14    (64.0, 46.5, 372.165, 65.222)
    38        318       15      (64.0, 32.1, 382.0, 50.822)
    39    329.153       15    (50.0, 17.7, 379.153, 36.422)
    40        318       16       (64.0, 3.3, 382.0, 22.022)
    41    324.335       16    (50.0, -11.1, 374.335, 7.622)
    
    para1: 2
    para1: 3
    para1: 4
    para1: 5
    para1: 6
    para1: 7
    para1: 8
    para1: 9
    para1: 10
    para1: 11
    para1: 12
    para1: 13
    para1: 14
    para1: 15
    para1: 16
    
    parafix_df = main_df[["line_text", "line_width", "para_num", "bbox" ]]
    parafix_df["new_para_num"] = 0
    
    max_width = parafix_df['line_width'].max()
    bbox_max_width = parafix_df.loc[selected['line_width'] == max_width].iloc[0]["bbox"]
    
    para1 = 1
    for indx, current in enumerate(parafix_df.iterrows(), start=0):
        if indx!=0:
            bbox_current = current[1]["bbox"]
            if bbox_current[0]>bbox_max_width[0]:
                para1 += 1
        parafix_df.iloc[indx, 4] = para1
    
    您可以使用函数访问上一行和下一行:

    df.shift(-1)  # df will be shifted one row backwards (will show `next` row) 
    
    df.shift(1)  # df will be shifted one row forwards (will show `prev` row)
    
    例如:

    In [142]: df
    Out[142]:
       a  b  c
    0  8  3  0
    1  8  3  4
    2  9  4  1
    3  2  1  8
    4  5  6  3
    
    In [147]: df['prev_a'] = df.a.shift(1)
    
    In [148]: df['next_a'] = df.a.shift(-1)
    
    In [149]: df
    Out[149]:
       a  b  c  prev_a  next_a
    0  8  3  0     NaN     8.0
    1  8  3  4     8.0     9.0
    2  9  4  1     8.0     2.0
    3  2  1  8     9.0     5.0
    4  5  6  3     2.0     NaN
    

    谢谢你+10,但我无法反映
    new\u para\u num
    column@VivekSable,您能否发布示例输入和所需输出数据集(CSV/dict/JSON/Python代码格式的5-7行作为文本,因此在编码时可以使用它),并描述您希望如何处理输入数据以获得输出数据集?我想根据逻辑为新列赋值。(逻辑我将更新)@VivekSable,您是否也可以为输入数据集的前5行(这应该足够了)添加所需的输出-这将有助于更好地理解您的逻辑?实际我的逻辑不正确,我只想为
    for
    循环的
    中的每一行分配
    新参数中的值。运行上述代码后,我将获得所有行的
    0
    。在您的代码中,您总是只访问第二行(索引为:
    1
    )-
    当前[1]
    下一行[1]
    。但是我想你不需要通过你的DF循环来实现你的目标——应该可以用“熊猫”的方式来实现——是的,我是熊猫的新手。你能再检查一下我的代码吗?我更新了代码。