导入目录时,如何在python中选择某个文件夹

导入目录时,如何在python中选择某个文件夹,python,python-3.x,Python,Python 3.x,我写的代码是,当你输入5个不同的目录时,做5个不同的循环。但是我不想做的是插入一个目录,它有5个不同的文件夹,并实现一个循环 以下是我现在编写的代码: def find_threshold(dir1, dir2, dir3, dir4, dir5): #Finding mean for Buisness business_mean = 0 business_sum = 0 n = 0 index = 0 business_mean_list = [

我写的代码是,当你输入5个不同的目录时,做5个不同的循环。但是我不想做的是插入一个目录,它有5个不同的文件夹,并实现一个循环

以下是我现在编写的代码:

def find_threshold(dir1, dir2, dir3, dir4, dir5):
    #Finding mean for Buisness
    business_mean = 0
    business_sum = 0
    n = 0
    index = 0
    business_mean_list = []

    for path, _, files in os.walk(dir1):
        for file_name in files:
            filepath = os.path.join(path, file_name)

            print(f"Checking --> {filepath}")

            filename_1 = filepath

            for path, _, files in os.walk(dir1):
                for file_name in files:
                    filepath = os.path.join(path, file_name)

                    #print(f"Checking --> {filepath}")

                    filename_2 = filepath

                    # Program to measure the similarity between 
                    # two sentences using cosine similarity.
                    from nltk.corpus import stopwords
                    from nltk.tokenize import word_tokenize

                    # X = input("Enter first string: ").lower()
                    # Y = input("Enter second string: ").lower()
                    #filename_1 = "buisness1.txt"
                    #filename_2 = "world1.txt"
                    A = open(filename_1, encoding='utf-8')
                    B = open(filename_2, encoding='utf-8')

                    X = A.read()
                    Y = B.read()

                    # tokenization
                    X_list = word_tokenize(X) 
                    Y_list = word_tokenize(Y)

                    # sw contains the list of stopwords
                    sw = stopwords.words('english') 
                    l1 =[];l2 =[]

                    # remove stop words from the string
                    X_set = {w for w in X_list if not w in sw} 
                    Y_set = {w for w in Y_list if not w in sw}

                    # form a set containing keywords of both strings 
                    rvector = X_set.union(Y_set) 
                    for w in rvector:
                        if w in X_set: l1.append(1) # create a vector
                        else: l1.append(0)
                        if w in Y_set: l2.append(1)
                        else: l2.append(0)
                    c = 0

                    # cosine formula 
                    for i in range(len(rvector)):
                            c+= l1[i]*l2[i]
                    cosine = c / float((sum(l1)*sum(l2))**0.5)
                    n += 1
                    if cosine != 0: 
                        #print("similarity: ", cosine)
                        #mean += np.mean(cosine)
                        business_sum += cosine
                        #print("sum: ", business)
                        #print("n: ", n)
                    #else:
                        #print("similarity is zero")

            business_mean = business_sum/n
            #print(index)
            business_mean_list.insert(index, business_mean)
            index += 1
            #print("business_mean: ", business_mean)
            #print("business mean list: ", mean_list)
    business_threshold = min(business_mean_list)
    
    #Finding mean for Entertainment
    entertainment_mean = 0
    entertainment_sum = 0
    n = 0
    index = 0
    entertainment_mean_list = []

    for path, _, files in os.walk(dir2):
        for file_name in files:
            filepath = os.path.join(path, file_name)

            print(f"Checking --> {filepath}")

            filename_1 = filepath

            for path, _, files in os.walk(dir2):
                for file_name in files:
                    filepath = os.path.join(path, file_name)

                    #print(f"Checking --> {filepath}")

                    filename_2 = filepath

                    # Program to measure the similarity between 
                    # two sentences using cosine similarity.
                    from nltk.corpus import stopwords
                    from nltk.tokenize import word_tokenize

                    # X = input("Enter first string: ").lower()
                    # Y = input("Enter second string: ").lower()
                    #filename_1 = "buisness1.txt"
                    #filename_2 = "world1.txt"
                    A = open(filename_1, encoding='utf-8')
                    B = open(filename_2, encoding='utf-8')

                    X = A.read()
                    Y = B.read()

                    # tokenization
                    X_list = word_tokenize(X) 
                    Y_list = word_tokenize(Y)

                    # sw contains the list of stopwords
                    sw = stopwords.words('english') 
                    l1 =[];l2 =[]

                    # remove stop words from the string
                    X_set = {w for w in X_list if not w in sw} 
                    Y_set = {w for w in Y_list if not w in sw}

                    # form a set containing keywords of both strings 
                    rvector = X_set.union(Y_set) 
                    for w in rvector:
                        if w in X_set: l1.append(1) # create a vector
                        else: l1.append(0)
                        if w in Y_set: l2.append(1)
                        else: l2.append(0)
                    c = 0

                    # cosine formula 
                    for i in range(len(rvector)):
                            c+= l1[i]*l2[i]
                    cosine = c / float((sum(l1)*sum(l2))**0.5)
                    n += 1
                    if cosine != 0: 
                        #print("similarity: ", cosine)
                        #mean += np.mean(cosine)
                        entertainment_sum += cosine
                        #print("sum: ", entertainment_sum)
                        #print("n: ", n)
                    #else:
                        #print("similarity is zero")

            entertainment_mean = entertainment_sum/n
            #print(index)
            entertainment_mean_list.insert(index, entertainment_mean)
            index += 1
            #print("entertainment_mean: ", entertainment_mean)
            #print("entertainment mean list: ", mean_list)
    entertainment_threshold = min(entertainment_mean_list)
    
    #Finding mean for local
    local_mean = 0
    local_sum = 0
    n = 0
    index = 0
    local_mean_list = []

    for path, _, files in os.walk(dir3):
        for file_name in files:
            filepath = os.path.join(path, file_name)

            print(f"Checking --> {filepath}")

            filename_1 = filepath


            for path, _, files in os.walk(dir3):
                for file_name in files:
                    filepath = os.path.join(path, file_name)

                    #print(f"Checking --> {filepath}")

                    filename_1 = filepath

                    # Program to measure the similarity between 
                    # two sentences using cosine similarity.
                    from nltk.corpus import stopwords
                    from nltk.tokenize import word_tokenize

                    # X = input("Enter first string: ").lower()
                    # Y = input("Enter second string: ").lower()
                    #filename_1 = "buisness1.txt"
                    filename_2 = "world1.txt"
                    A = open(filename_1, encoding='utf-8')
                    B = open(filename_2, encoding='utf-8')

                    X = A.read()
                    Y = B.read()

                    # tokenization
                    X_list = word_tokenize(X) 
                    Y_list = word_tokenize(Y)

                    # sw contains the list of stopwords
                    sw = stopwords.words('english') 
                    l1 =[];l2 =[]

                    # remove stop words from the string
                    X_set = {w for w in X_list if not w in sw} 
                    Y_set = {w for w in Y_list if not w in sw}

                    # form a set containing keywords of both strings 
                    rvector = X_set.union(Y_set) 
                    for w in rvector:
                        if w in X_set: l1.append(1) # create a vector
                        else: l1.append(0)
                        if w in Y_set: l2.append(1)
                        else: l2.append(0)
                    c = 0

                    # cosine formula 
                    for i in range(len(rvector)):
                            c+= l1[i]*l2[i]
                    cosine = c / float((sum(l1)*sum(l2))**0.5)
                    n += 1
                    if cosine != 0: 
                        #print("similarity: ", cosine)
                        #mean += np.mean(cosine)
                        local_sum += cosine
                        #print("sum: ", local_sum)
                        #print("n: ", n)
                    #else:
                        #print("similarity is zero")
            local_mean = local_sum/n
            #print(index)
            local_mean_list.insert(index, local_mean)
            index += 1
            #print("local_mean: ", local_mean)
            #print("local mean mean list: ", mean_list)
    local_threshold = min(local_mean_list)
    
    #Finding mean for sports
    sports_mean = 0
    sports_sum = 0
    n = 0
    index = 0
    sports_mean_list = []

    for path, _, files in os.walk(dir4):
        for file_name in files:
            filepath = os.path.join(path, file_name)

            print(f"Checking --> {filepath}")

            filename_1 = filepath


            for path, _, files in os.walk(dir4):
                for file_name in files:
                    filepath = os.path.join(path, file_name)

                    #print(f"Checking --> {filepath}")

                    filename_1 = filepath

                    # Program to measure the similarity between 
                    # two sentences using cosine similarity.
                    from nltk.corpus import stopwords
                    from nltk.tokenize import word_tokenize

                    # X = input("Enter first string: ").lower()
                    # Y = input("Enter second string: ").lower()
                    #filename_1 = "buisness1.txt"
                    filename_2 = "world1.txt"
                    A = open(filename_1, encoding='utf-8')
                    B = open(filename_2, encoding='utf-8')

                    X = A.read()
                    Y = B.read()

                    # tokenization
                    X_list = word_tokenize(X) 
                    Y_list = word_tokenize(Y)

                    # sw contains the list of stopwords
                    sw = stopwords.words('english') 
                    l1 =[];l2 =[]

                    # remove stop words from the string
                    X_set = {w for w in X_list if not w in sw} 
                    Y_set = {w for w in Y_list if not w in sw}

                    # form a set containing keywords of both strings 
                    rvector = X_set.union(Y_set) 
                    for w in rvector:
                        if w in X_set: l1.append(1) # create a vector
                        else: l1.append(0)
                        if w in Y_set: l2.append(1)
                        else: l2.append(0)
                    c = 0

                    # cosine formula 
                    for i in range(len(rvector)):
                            c+= l1[i]*l2[i]
                    cosine = c / float((sum(l1)*sum(l2))**0.5)
                    n += 1
                    if cosine != 0: 
                        #print("similarity: ", cosine)
                        #mean += np.mean(cosine)
                        sports_sum += cosine
                        #print("sum: ", sports_sum)
                        #print("n: ", n)
                    #else:
                        #print("similarity is zero")
            sports_mean = sports_sum/n
            #print(index)
            sports_mean_list.insert(index, sports_mean)
            index += 1
            #print("sports mean: ", sports_mean)
            #print("sports mean list: ", mean_list)
    sports_threshold = min(sports_mean_list)

    #Finding mean for world
    world_mean = 0
    world_sum = 0
    n = 0
    index = 0
    world_mean_list = []

    for path, _, files in os.walk(dir5):
        for file_name in files:
            filepath = os.path.join(path, file_name)

            print(f"Checking --> {filepath}")

            filename_1 = filepath

            for path, _, files in os.walk(dir5):
                for file_name in files:
                    filepath = os.path.join(path, file_name)

                    #print(f"Checking --> {filepath}")

                    filename_1 = filepath

                    # Program to measure the similarity between 
                    # two sentences using cosine similarity.
                    from nltk.corpus import stopwords
                    from nltk.tokenize import word_tokenize

                    # X = input("Enter first string: ").lower()
                    # Y = input("Enter second string: ").lower()
                    #filename_1 = "buisness1.txt"
                    filename_2 = "world1.txt"
                    A = open(filename_1, encoding='utf-8')
                    B = open(filename_2, encoding='utf-8')

                    X = A.read()
                    Y = B.read()

                    # tokenization
                    X_list = word_tokenize(X) 
                    Y_list = word_tokenize(Y)

                    # sw contains the list of stopwords
                    sw = stopwords.words('english') 
                    l1 =[];l2 =[]

                    # remove stop words from the string
                    X_set = {w for w in X_list if not w in sw} 
                    Y_set = {w for w in Y_list if not w in sw}

                    # form a set containing keywords of both strings 
                    rvector = X_set.union(Y_set) 
                    for w in rvector:
                        if w in X_set: l1.append(1) # create a vector
                        else: l1.append(0)
                        if w in Y_set: l2.append(1)
                        else: l2.append(0)
                    c = 0

                    # cosine formula 
                    for i in range(len(rvector)):
                            c+= l1[i]*l2[i]
                    cosine = c / float((sum(l1)*sum(l2))**0.5)
                    n += 1
                    if cosine != 0: 
                        #print("similarity: ", cosine)
                        #mean += np.mean(cosine)
                        world_sum += cosine
                        #print("sum: ", world_sum)
                        #print("n: ", n)
                    #else:
                        #print("similarity is zero")
            world_mean = world_sum/n
            #print(index)
            world_mean_list.insert(index, world_mean)
            index += 1
            #print("world mean: ", world_mean)
            #print("world mean list: ", mean_list)
    world_threshold = min(world_mean_list)
    
    return (business_threshold, entertainment_threshold, local_threshold, sports_threshold, world_threshold)
如你所见,我向方法
find_threshold
输入5个目录,并对5个循环中的5个目录进行余弦计算,最后给出5个值作为每个循环的输出。我想做的是为方法
find_threshold
提供一个目录(它有5个文件夹,每个文件夹有100个文本文件),并获得相同的输出

诸如此类

def find_threshold(dir):

有人能帮我写这方面的代码吗?

把你的任何一个部分,让find_threshold()一次只做一个目录。然后,当用户指定父文件夹时,可以在其中搜索直接子文件夹,并对每个子文件夹调用find_threshold()

也许像这样的东西我完全没有测试过:

def find_threshold_by_parent(parent_directory):
    return [find_threshold(dir) for dir in os.listdir(parent_directory) if os.path.isdir(dir)]

def find_threshold(child_directory):
    #Finding mean for Buisness
    mean = 0
    sum = 0
    n = 0
    index = 0
    mean_list = []

    for path, _, files in os.walk(child_directory):
        for file_name in files:
            filepath = os.path.join(path, file_name)

            print(f"Checking --> {filepath}")

            filename_1 = filepath

            for path, _, files in os.walk(child_directory):
                for file_name in files:
                    filepath = os.path.join(path, file_name)

                    #print(f"Checking --> {filepath}")

                    filename_2 = filepath

                    # Program to measure the similarity between
                    # two sentences using cosine similarity.
                    from nltk.corpus import stopwords
                    from nltk.tokenize import word_tokenize

                    # X = input("Enter first string: ").lower()
                    # Y = input("Enter second string: ").lower()
                    #filename_1 = "buisness1.txt"
                    #filename_2 = "world1.txt"
                    A = open(filename_1, encoding='utf-8')
                    B = open(filename_2, encoding='utf-8')

                    X = A.read()
                    Y = B.read()

                    # tokenization
                    X_list = word_tokenize(X)
                    Y_list = word_tokenize(Y)

                    # sw contains the list of stopwords
                    sw = stopwords.words('english')
                    l1 =[];l2 =[]

                    # remove stop words from the string
                    X_set = {w for w in X_list if not w in sw}
                    Y_set = {w for w in Y_list if not w in sw}

                    # form a set containing keywords of both strings
                    rvector = X_set.union(Y_set)
                    for w in rvector:
                        if w in X_set: l1.append(1) # create a vector
                        else: l1.append(0)
                        if w in Y_set: l2.append(1)
                        else: l2.append(0)
                    c = 0

                    # cosine formula
                    for i in range(len(rvector)):
                            c+= l1[i]*l2[i]
                    cosine = c / float((sum(l1)*sum(l2))**0.5)
                    n += 1
                    if cosine != 0:
                        #print("similarity: ", cosine)
                        #mean += np.mean(cosine)
                        sum += cosine
                        #print("sum: ", sum)
                        #print("n: ", n)
                    #else:
                        #print("similarity is zero")

            mean = sum/n
            #print(index)
            mean_list.insert(index, mean)
            index += 1
            #print("mean: ", mean)
            #print("mean list: ", list)
    threshold = min(mean_list)

    return threshold
```定义您的_函数(dir_列表):对于dir_列表中的dirname,ans=[]为:必需的_输出=您的_余弦函数(dirname)ans.append(必需的_输出)返回元组(ans)```