python请求澄清TypeError:Unhabable类型：'；列表'；_Python_Set_Typeerror

python请求澄清TypeError:Unhabable类型：'；列表'；

python

python请求澄清TypeError:Unhabable类型：'；列表'；,python,set,typeerror,Python,Set,Typeerror,我需要澄清一下我面临的错误语料库是一个Python字典，它将一个页面名称映射到该页面链接的一组所有页面页面是表示页面的字符串当我尝试这个linkouts=corpus[page] TypeError:不可损坏的类型：“列表” 当我打印corpus[page]时，这就是输出（corpus是集合的dict） {'3.html'，'1.html'} 当我打印（键入（语料库[page]）时，设置输出我可以在corpus[page]中进行迭代，但是如果我尝试len（corpus[page]）也会发

我需要澄清一下我面临的错误

语料库是一个Python字典，它将一个页面名称映射到该页面链接的一组所有页面

页面是表示页面的字符串

当我尝试这个

linkouts=corpus[page]

TypeError:不可损坏的类型：“列表”

当我打印

corpus[page]

时，这就是输出（corpus是集合的dict）

{'3.html'，'1.html'}

当我打印（键入（语料库[page]）时，设置输出

我可以在

corpus[page]

中进行迭代，但是如果我尝试

len（corpus[page]）

也会发生同样的错误。

corpus[page]

不是一个集合吗？我应该如何绕过这个错误？Makinf a

语料库[page].copy（）

也面临同样的问题。非常感谢您的建议和帮助，谢谢大家

pagelink.py的代码

import os
import random
import re
import sys

DAMPING = 0.85
SAMPLES = 10000


def main():
    if len(sys.argv) != 2:
        sys.exit("Usage: python pagerank.py corpus")
    corpus = crawl(sys.argv[1])
    ranks = sample_pagerank(corpus, DAMPING, SAMPLES)
    print(f"PageRank Results from Sampling (n = {SAMPLES})")
    for page in sorted(ranks):
        print(f"  {page}: {ranks[page]:.4f}")
    #ranks = iterate_pagerank(corpus, DAMPING)
    #print(f"PageRank Results from Iteration")
    for page in sorted(ranks):
        print(f"  {page}: {ranks[page]:.4f}")


def crawl(directory):
    """
    Parse a directory of HTML pages and check for links to other pages.
    Return a dictionary where each key is a page, and values are
    a list of all other pages in the corpus that are linked to by the page.
    """
    pages = dict()

    # Extract all links from HTML files
    for filename in os.listdir(directory):
        if not filename.endswith(".html"):
            continue
        with open(os.path.join(directory, filename)) as f:
            contents = f.read()
            links = re.findall(r"<a\s+(?:[^>]*?)href=\"([^\"]*)\"", contents)
            pages[filename] = set(links) - {filename}

    # Only include links to other pages in the corpus
    for filename in pages:
        pages[filename] = set(
            link for link in pages[filename]
            if link in pages
        )

    return pages


def transition_model(corpus, page, damping_factor):
    """
    Return a probability distribution over which page to visit next,
    given a current page.

    With probability `damping_factor`, choose a link at random
    linked to by `page`. With probability `1 - damping_factor`, choose
    a link at random chosen from all pages in the corpus.
    """
    linkouts =  set(corpus[page])
    output = {}
    for key in corpus:
        output[key] = 0.00
    dampvalue = damping_factor / len(linkouts)
    for link in linkouts:
        output[link] += dampvalue
    if linkouts:
        dampvalue = 1 - damping_factor
        dampvalue = dampvalue / len(corpus)
        for key in corpus:
            output[key] += dampvalue
    else:
        dampvalue = 1 / len(corpus)
        for key in corpus:
            output[key] += dampvalue
    return output



def sample_pagerank(corpus, damping_factor, n):
    """
    Return PageRank values for each page by sampling `n` pages
    according to transition model, starting with a page at random.

    Return a dictionary where keys are page names, and values are
    their estimated PageRank value (a value between 0 and 1). All
    PageRank values should sum to 1.
    """
    samples = []
    first = random.choice(list(corpus))
    samples.append(first)
    for i in range(n-1):
        output = transition_model(corpus, first, damping_factor)
        second = random.choices(list(output), weights=(output.values()))
        samples.append(second)
        first = second

    output = {}
    for link in corpus:
        num = 0
        for sample in samples:
            if sample == link:
                num += 1
        output[link] = num / n

    return output





def iterate_pagerank(corpus, damping_factor):
    """
    Return PageRank values for each page by iteratively updating
    PageRank values until convergence.

    Return a dictionary where keys are page names, and values are
    their estimated PageRank value (a value between 0 and 1). All
    PageRank values should sum to 1.
    """
    raise NotImplementedError


if __name__ == "__main__":
    main()

2.html

<!DOCTYPE html>
<html lang="en">
    <head>
        <title>2</title>
    </head>
    <body>
        <h1>2</h1>

        <div>Links:</div>
        <ul>
            <li><a href="1.html">1</a></li>
            <li><a href="3.html">3</a></li>
        </ul>
    </body>
</html>

给出相同的类型错误，但如果我替换

链接。用print（i）
追加（i）

没有错误，而且

是类型

str
随机。即使k=1
，选项也会返回列表，因此，在第二次迭代之后，我在页面

内部sample\u pagerank
由
for i in range(n-1):
        output = transition_model(corpus, first, damping_factor)
        second = random.choices(list(output), weights=(output.values()), k=1)[0]
        samples.append(second)
        first = second

这取决于页面的类型。我认为您可能正在使用不同的页面对象进行索引，并获得不同的结果。考虑发布一个最小的可重复的例子（）以获得更好的反馈。但是page
是一个字符串问题，问题似乎在于sample\u pagerank
而不是transition\u model
linkouts = []
    for i in corpus[page]:
        linkouts.append(i)

for i in range(n-1):
        output = transition_model(corpus, first, damping_factor)
        second = random.choices(list(output), weights=(output.values()), k=1)[0]
        samples.append(second)
        first = second