Python 列表索引超出范围:是否可以填充文本以避免?

Python 列表索引超出范围:是否可以填充文本以避免?,python,google-colaboratory,huggingface-transformers,Python,Google Colaboratory,Huggingface Transformers,我完全理解为什么会出现这个错误,但我想知道是否有办法将文本填充到14个单词。出于上下文目的,这是一个使用GPT-2的文本热图。如果你有一个更简单的想法,我也会非常感激。要自己测试代码,请执行以下操作:。提前感谢您的帮助 def apply(f): text = f text = re.sub(r'\W+', ' ', text) res = LM().check_probabilities(text, topk=50) word_list = f.spl

我完全理解为什么会出现这个错误,但我想知道是否有办法将文本填充到14个单词。出于上下文目的,这是一个使用GPT-2的文本热图。如果你有一个更简单的想法,我也会非常感激。要自己测试代码,请执行以下操作:。提前感谢您的帮助

def apply(f):
    text = f
    text = re.sub(r'\W+', ' ', text)
    res = LM().check_probabilities(text, topk=50)
    
    word_list = f.split()
    one = word_list[0]
    two = word_list[1]
    three = word_list[2]
    four = word_list[3]
    five = word_list[4]
    six = five = word_list[5]
    seven = word_list[6]
    eight = word_list[7]
    nine = word_list[8]
    ten = word_list[9]
    eleven = word_list[10]
    twelve = word_list[11]
    thirteen = word_list[12]
    fourteen = word_list[13]

    data = [[
{'token': '[CLR]',
 'meta': ['', '', ''],
 'heat': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},
{'token': ' ',
 'format': True},
{'token': one,
 'meta': res['pred_topk'][0],
 'heat': [0.13271349668502808, 0.4047139883041382, 0.23314827680587769, 1.0, 0.5698219537734985, 0.20001010596752167, 0.41732218861579895, 0.2375192940235138, 0.12837326526641846, 0.3011391758918762, 0.2920743227005005, 0.15121395885944366, 0.4707326292991638, 0.141720250248909, 0.1146061047911644, 0.3309290111064911, 0.2721664309501648, 0.38880598545074463, 0.28752031922340393, 0.30476102232933044, 0.40849509835243225, 0.12109626829624176, 0.236867755651474, 0.15692873299121857, 0.08568184077739716, 0.28222283720970154, 0.10787433385848999, 0.09868176281452179, 0.11645302921533585, 0.27660083770751953, 0.1150846853852272, 0.13137750327587128, 0.2834398150444031, 0.1425863653421402, 0.7729436159133911, 0.15550559759140015, 0.3342195451259613, 0.2743198275566101]},
{'token': ' ',
 'format': True},
{'token': two,
 'meta': res['pred_topk'][1],
 'heat': [0.11053311824798584, 1.0, 0.3417408764362335, 0.5805244445800781, 0.596860408782959, 0.18530210852622986, 0.2305091768503189, 0.19138814508914948, 0.08227257430553436, 0.19505015015602112, 0.10965480655431747, 0.07133453339338303, 0.21702361106872559, 0.07083487510681152, 0.05262206494808197, 0.09487571567296982, 0.07871642708778381, 0.09568451344966888, 0.10381820052862167, 0.11150145530700684, 0.08054117858409882, 0.06160977482795715, 0.13430000841617584, 0.07046942412853241, 0.04503295198082924, 0.10039176791906357, 0.07321848720312119, 0.04508531466126442, 0.04002087190747261, 0.1304282695055008, 0.05149686336517334, 0.05910608172416687, 0.1943625509738922, 0.05612911283969879, 0.2365487962961197, 0.0644913837313652, 0.08357883244752884, 0.10955799371004105]},
{'token': ' ',
 'format': True},
{'token': three,
 'meta': res['pred_topk'][2],
 'heat': [0.13794338703155518, 0.7412312626838684, 0.2688325345516205, 0.3519371747970581, 1.0, 0.3511815071105957, 0.6799001097679138, 0.23039610683918, 0.10480885207653046, 0.29196831583976746, 0.24283158779144287, 0.08086933195590973, 0.3110826909542084, 0.16006161272525787, 0.07783187925815582, 0.23599569499492645, 0.2036796659231186, 0.25475823879241943, 0.39147695899009705, 0.4029639661312103, 0.16113890707492828, 0.08008856326341629, 0.4354044497013092, 0.14515410363674164, 0.05876074731349945, 0.21267741918563843, 0.11644049733877182, 0.08587612956762314, 0.08814962208271027, 0.363741010427475, 0.07122389227151871, 0.07023804634809494, 0.1380654275417328, 0.1375676840543747, 0.7550925016403198, 0.10494624823331833, 0.23596565425395966, 0.12745369970798492]},
{'token': ' ',
 'format': True},
{'token': four,
 'meta': res['pred_topk'][3],
 'heat': [0.09374084323644638, 0.27613726258277893, 0.19584566354751587, 1.0, 0.2668629586696625, 0.12618684768676758, 0.5485848784446716, 0.10671643167734146, 0.05578231066465378, 0.16895149648189545, 0.14708179235458374, 0.08301705121994019, 0.2549331486225128, 0.05449998006224632, 0.0407552570104599, 0.09658133238554001, 0.08113130927085876, 0.10979730635881424, 0.09126582741737366, 0.16856855154037476, 0.10670913755893707, 0.049128126353025436, 0.12720689177513123, 0.10207141935825348, 0.040946654975414276, 0.14924436807632446, 0.07131370157003403, 0.05912680923938751, 0.057828083634376526, 0.2358609288930893, 0.05285044014453888, 0.03720799833536148, 0.08448022603988647, 0.05244402214884758, 0.2379569709300995, 0.07916100323200226, 0.06218649446964264, 0.10799198597669601]},
{'token': ' ',
 'format': True},
{'token': five,
 'meta': res['pred_topk'][4],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949, 0.08953408151865005, 0.13667654991149902, 0.1143374964594841, 0.11026952415704727, 0.05795498564839363, 0.12386422604322433, 0.08859734237194061, 0.042766354978084564, 0.3162827491760254, 0.07349050790071487, 0.09265555441379547, 0.08770584315061569, 0.2039150893688202, 0.05270526185631752, 0.06614900380373001, 0.16070793569087982, 0.05872023105621338, 0.3202408254146576, 0.062171820551157, 0.14679910242557526, 0.08074744045734406]},
{'token': ' ',
 'format': True},
 {'token': six,
 'meta': res['pred_topk'][5],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949]},
{'token': ' ',
 'format': True},
 {'token': seven,
 'meta': res['pred_topk'][6],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949]},
{'token': ' ',
 'format': True},
 {'token': eight,
 'meta': res['pred_topk'][7],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949]},
{'token': ' ',
 'format': True},
 {'token': nine,
 'meta': res['pred_topk'][8],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949]},
{'token': ' ',
 'format': True},
  {'token': ten,
 'meta': res['pred_topk'][9],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949]},
{'token': ' ',
 'format': True},
  {'token': eleven,
 'meta': res['pred_topk'][10],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949]},
{'token': ' ',
 'format': True},
  {'token': twelve,
 'meta': res['pred_topk'][11],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949]},
{'token': ' ',
 'format': True},
  {'token': thirteen,
 'meta': res['pred_topk'][12],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949]},
{'token': ' ',
 'format': True},
  {'token': fourteen,
 'meta': res['pred_topk'][13],
 'heat': [0.1723620444536209, 1.0, 0.49656736850738525, 0.5609704256057739, 0.6928957104682922, 0.37088003754615784, 0.5890324115753174, 0.1961166113615036, 0.09367834031581879, 0.19113656878471375, 0.13310600817203522, 0.13753651082515717, 0.2627904713153839, 0.08134050667285919, 0.053574152290821075, 0.10540777444839478, 0.09048342704772949]},
{'token': ' ',
 'format': True},
{'token': '[SEP]',
 'meta': ['', '', ''],
 'heat': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]}
]]

    from textualheatmap import TextualHeatmap
    heatmap = TextualHeatmap(facet_titles = ['BERT'], show_meta=True, width=3800)
    heatmap.set_data(data)
    print("    ")

你在寻找这样的答案吗

x = 'while one way'
y = x.split()
if len(y) < 14:
    z = 14 - len(y)
    for i in range(z): y.append(y[-1])

print (y)


我不明白为什么你认为在
单词列表
中每个单词有14个单独的命名变量是有用的,我也不明白为什么你选择有14个这样的变量,而不是任何其他数量的变量;我不明白你为什么要在
数据中包含任何与
单词列表中不存在的单词相对应的内容。嗨,卡尔。我只是在隔离后才开始编写代码,没有任何类,所以我似乎做得不那么实际。由于我缺乏知识,我认为分句是填写热图的最简单方法。请注意,您没有将
res['pred_topk']
的内容分解成单独的变量,但您能够很好地使用这些数据?注意这使您的代码变得多么简单?考虑应用同样的技术,你是对的。这肯定会简化事情。谢谢你的输入,我为代码的丑陋道歉。
['while', 'one', 'way']
['while', 'one', 'way', 'way', 'way', 'way', 'way', 'way', 'way', 'way', 'way', 'way', 'way', 'way']