导入目录时,如何在python中选择某个文件夹
我写的代码是,当你输入5个不同的目录时,做5个不同的循环。但是我不想做的是插入一个目录,它有5个不同的文件夹,并实现一个循环 以下是我现在编写的代码:导入目录时,如何在python中选择某个文件夹,python,python-3.x,Python,Python 3.x,我写的代码是,当你输入5个不同的目录时,做5个不同的循环。但是我不想做的是插入一个目录,它有5个不同的文件夹,并实现一个循环 以下是我现在编写的代码: def find_threshold(dir1, dir2, dir3, dir4, dir5): #Finding mean for Buisness business_mean = 0 business_sum = 0 n = 0 index = 0 business_mean_list = [
def find_threshold(dir1, dir2, dir3, dir4, dir5):
#Finding mean for Buisness
business_mean = 0
business_sum = 0
n = 0
index = 0
business_mean_list = []
for path, _, files in os.walk(dir1):
for file_name in files:
filepath = os.path.join(path, file_name)
print(f"Checking --> {filepath}")
filename_1 = filepath
for path, _, files in os.walk(dir1):
for file_name in files:
filepath = os.path.join(path, file_name)
#print(f"Checking --> {filepath}")
filename_2 = filepath
# Program to measure the similarity between
# two sentences using cosine similarity.
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# X = input("Enter first string: ").lower()
# Y = input("Enter second string: ").lower()
#filename_1 = "buisness1.txt"
#filename_2 = "world1.txt"
A = open(filename_1, encoding='utf-8')
B = open(filename_2, encoding='utf-8')
X = A.read()
Y = B.read()
# tokenization
X_list = word_tokenize(X)
Y_list = word_tokenize(Y)
# sw contains the list of stopwords
sw = stopwords.words('english')
l1 =[];l2 =[]
# remove stop words from the string
X_set = {w for w in X_list if not w in sw}
Y_set = {w for w in Y_list if not w in sw}
# form a set containing keywords of both strings
rvector = X_set.union(Y_set)
for w in rvector:
if w in X_set: l1.append(1) # create a vector
else: l1.append(0)
if w in Y_set: l2.append(1)
else: l2.append(0)
c = 0
# cosine formula
for i in range(len(rvector)):
c+= l1[i]*l2[i]
cosine = c / float((sum(l1)*sum(l2))**0.5)
n += 1
if cosine != 0:
#print("similarity: ", cosine)
#mean += np.mean(cosine)
business_sum += cosine
#print("sum: ", business)
#print("n: ", n)
#else:
#print("similarity is zero")
business_mean = business_sum/n
#print(index)
business_mean_list.insert(index, business_mean)
index += 1
#print("business_mean: ", business_mean)
#print("business mean list: ", mean_list)
business_threshold = min(business_mean_list)
#Finding mean for Entertainment
entertainment_mean = 0
entertainment_sum = 0
n = 0
index = 0
entertainment_mean_list = []
for path, _, files in os.walk(dir2):
for file_name in files:
filepath = os.path.join(path, file_name)
print(f"Checking --> {filepath}")
filename_1 = filepath
for path, _, files in os.walk(dir2):
for file_name in files:
filepath = os.path.join(path, file_name)
#print(f"Checking --> {filepath}")
filename_2 = filepath
# Program to measure the similarity between
# two sentences using cosine similarity.
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# X = input("Enter first string: ").lower()
# Y = input("Enter second string: ").lower()
#filename_1 = "buisness1.txt"
#filename_2 = "world1.txt"
A = open(filename_1, encoding='utf-8')
B = open(filename_2, encoding='utf-8')
X = A.read()
Y = B.read()
# tokenization
X_list = word_tokenize(X)
Y_list = word_tokenize(Y)
# sw contains the list of stopwords
sw = stopwords.words('english')
l1 =[];l2 =[]
# remove stop words from the string
X_set = {w for w in X_list if not w in sw}
Y_set = {w for w in Y_list if not w in sw}
# form a set containing keywords of both strings
rvector = X_set.union(Y_set)
for w in rvector:
if w in X_set: l1.append(1) # create a vector
else: l1.append(0)
if w in Y_set: l2.append(1)
else: l2.append(0)
c = 0
# cosine formula
for i in range(len(rvector)):
c+= l1[i]*l2[i]
cosine = c / float((sum(l1)*sum(l2))**0.5)
n += 1
if cosine != 0:
#print("similarity: ", cosine)
#mean += np.mean(cosine)
entertainment_sum += cosine
#print("sum: ", entertainment_sum)
#print("n: ", n)
#else:
#print("similarity is zero")
entertainment_mean = entertainment_sum/n
#print(index)
entertainment_mean_list.insert(index, entertainment_mean)
index += 1
#print("entertainment_mean: ", entertainment_mean)
#print("entertainment mean list: ", mean_list)
entertainment_threshold = min(entertainment_mean_list)
#Finding mean for local
local_mean = 0
local_sum = 0
n = 0
index = 0
local_mean_list = []
for path, _, files in os.walk(dir3):
for file_name in files:
filepath = os.path.join(path, file_name)
print(f"Checking --> {filepath}")
filename_1 = filepath
for path, _, files in os.walk(dir3):
for file_name in files:
filepath = os.path.join(path, file_name)
#print(f"Checking --> {filepath}")
filename_1 = filepath
# Program to measure the similarity between
# two sentences using cosine similarity.
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# X = input("Enter first string: ").lower()
# Y = input("Enter second string: ").lower()
#filename_1 = "buisness1.txt"
filename_2 = "world1.txt"
A = open(filename_1, encoding='utf-8')
B = open(filename_2, encoding='utf-8')
X = A.read()
Y = B.read()
# tokenization
X_list = word_tokenize(X)
Y_list = word_tokenize(Y)
# sw contains the list of stopwords
sw = stopwords.words('english')
l1 =[];l2 =[]
# remove stop words from the string
X_set = {w for w in X_list if not w in sw}
Y_set = {w for w in Y_list if not w in sw}
# form a set containing keywords of both strings
rvector = X_set.union(Y_set)
for w in rvector:
if w in X_set: l1.append(1) # create a vector
else: l1.append(0)
if w in Y_set: l2.append(1)
else: l2.append(0)
c = 0
# cosine formula
for i in range(len(rvector)):
c+= l1[i]*l2[i]
cosine = c / float((sum(l1)*sum(l2))**0.5)
n += 1
if cosine != 0:
#print("similarity: ", cosine)
#mean += np.mean(cosine)
local_sum += cosine
#print("sum: ", local_sum)
#print("n: ", n)
#else:
#print("similarity is zero")
local_mean = local_sum/n
#print(index)
local_mean_list.insert(index, local_mean)
index += 1
#print("local_mean: ", local_mean)
#print("local mean mean list: ", mean_list)
local_threshold = min(local_mean_list)
#Finding mean for sports
sports_mean = 0
sports_sum = 0
n = 0
index = 0
sports_mean_list = []
for path, _, files in os.walk(dir4):
for file_name in files:
filepath = os.path.join(path, file_name)
print(f"Checking --> {filepath}")
filename_1 = filepath
for path, _, files in os.walk(dir4):
for file_name in files:
filepath = os.path.join(path, file_name)
#print(f"Checking --> {filepath}")
filename_1 = filepath
# Program to measure the similarity between
# two sentences using cosine similarity.
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# X = input("Enter first string: ").lower()
# Y = input("Enter second string: ").lower()
#filename_1 = "buisness1.txt"
filename_2 = "world1.txt"
A = open(filename_1, encoding='utf-8')
B = open(filename_2, encoding='utf-8')
X = A.read()
Y = B.read()
# tokenization
X_list = word_tokenize(X)
Y_list = word_tokenize(Y)
# sw contains the list of stopwords
sw = stopwords.words('english')
l1 =[];l2 =[]
# remove stop words from the string
X_set = {w for w in X_list if not w in sw}
Y_set = {w for w in Y_list if not w in sw}
# form a set containing keywords of both strings
rvector = X_set.union(Y_set)
for w in rvector:
if w in X_set: l1.append(1) # create a vector
else: l1.append(0)
if w in Y_set: l2.append(1)
else: l2.append(0)
c = 0
# cosine formula
for i in range(len(rvector)):
c+= l1[i]*l2[i]
cosine = c / float((sum(l1)*sum(l2))**0.5)
n += 1
if cosine != 0:
#print("similarity: ", cosine)
#mean += np.mean(cosine)
sports_sum += cosine
#print("sum: ", sports_sum)
#print("n: ", n)
#else:
#print("similarity is zero")
sports_mean = sports_sum/n
#print(index)
sports_mean_list.insert(index, sports_mean)
index += 1
#print("sports mean: ", sports_mean)
#print("sports mean list: ", mean_list)
sports_threshold = min(sports_mean_list)
#Finding mean for world
world_mean = 0
world_sum = 0
n = 0
index = 0
world_mean_list = []
for path, _, files in os.walk(dir5):
for file_name in files:
filepath = os.path.join(path, file_name)
print(f"Checking --> {filepath}")
filename_1 = filepath
for path, _, files in os.walk(dir5):
for file_name in files:
filepath = os.path.join(path, file_name)
#print(f"Checking --> {filepath}")
filename_1 = filepath
# Program to measure the similarity between
# two sentences using cosine similarity.
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# X = input("Enter first string: ").lower()
# Y = input("Enter second string: ").lower()
#filename_1 = "buisness1.txt"
filename_2 = "world1.txt"
A = open(filename_1, encoding='utf-8')
B = open(filename_2, encoding='utf-8')
X = A.read()
Y = B.read()
# tokenization
X_list = word_tokenize(X)
Y_list = word_tokenize(Y)
# sw contains the list of stopwords
sw = stopwords.words('english')
l1 =[];l2 =[]
# remove stop words from the string
X_set = {w for w in X_list if not w in sw}
Y_set = {w for w in Y_list if not w in sw}
# form a set containing keywords of both strings
rvector = X_set.union(Y_set)
for w in rvector:
if w in X_set: l1.append(1) # create a vector
else: l1.append(0)
if w in Y_set: l2.append(1)
else: l2.append(0)
c = 0
# cosine formula
for i in range(len(rvector)):
c+= l1[i]*l2[i]
cosine = c / float((sum(l1)*sum(l2))**0.5)
n += 1
if cosine != 0:
#print("similarity: ", cosine)
#mean += np.mean(cosine)
world_sum += cosine
#print("sum: ", world_sum)
#print("n: ", n)
#else:
#print("similarity is zero")
world_mean = world_sum/n
#print(index)
world_mean_list.insert(index, world_mean)
index += 1
#print("world mean: ", world_mean)
#print("world mean list: ", mean_list)
world_threshold = min(world_mean_list)
return (business_threshold, entertainment_threshold, local_threshold, sports_threshold, world_threshold)
如你所见,我向方法find_threshold
输入5个目录,并对5个循环中的5个目录进行余弦计算,最后给出5个值作为每个循环的输出。我想做的是为方法find_threshold
提供一个目录(它有5个文件夹,每个文件夹有100个文本文件),并获得相同的输出
诸如此类
def find_threshold(dir):
有人能帮我写这方面的代码吗?把你的任何一个部分,让find_threshold()一次只做一个目录。然后,当用户指定父文件夹时,可以在其中搜索直接子文件夹,并对每个子文件夹调用find_threshold() 也许像这样的东西我完全没有测试过:
def find_threshold_by_parent(parent_directory):
return [find_threshold(dir) for dir in os.listdir(parent_directory) if os.path.isdir(dir)]
def find_threshold(child_directory):
#Finding mean for Buisness
mean = 0
sum = 0
n = 0
index = 0
mean_list = []
for path, _, files in os.walk(child_directory):
for file_name in files:
filepath = os.path.join(path, file_name)
print(f"Checking --> {filepath}")
filename_1 = filepath
for path, _, files in os.walk(child_directory):
for file_name in files:
filepath = os.path.join(path, file_name)
#print(f"Checking --> {filepath}")
filename_2 = filepath
# Program to measure the similarity between
# two sentences using cosine similarity.
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
# X = input("Enter first string: ").lower()
# Y = input("Enter second string: ").lower()
#filename_1 = "buisness1.txt"
#filename_2 = "world1.txt"
A = open(filename_1, encoding='utf-8')
B = open(filename_2, encoding='utf-8')
X = A.read()
Y = B.read()
# tokenization
X_list = word_tokenize(X)
Y_list = word_tokenize(Y)
# sw contains the list of stopwords
sw = stopwords.words('english')
l1 =[];l2 =[]
# remove stop words from the string
X_set = {w for w in X_list if not w in sw}
Y_set = {w for w in Y_list if not w in sw}
# form a set containing keywords of both strings
rvector = X_set.union(Y_set)
for w in rvector:
if w in X_set: l1.append(1) # create a vector
else: l1.append(0)
if w in Y_set: l2.append(1)
else: l2.append(0)
c = 0
# cosine formula
for i in range(len(rvector)):
c+= l1[i]*l2[i]
cosine = c / float((sum(l1)*sum(l2))**0.5)
n += 1
if cosine != 0:
#print("similarity: ", cosine)
#mean += np.mean(cosine)
sum += cosine
#print("sum: ", sum)
#print("n: ", n)
#else:
#print("similarity is zero")
mean = sum/n
#print(index)
mean_list.insert(index, mean)
index += 1
#print("mean: ", mean)
#print("mean list: ", list)
threshold = min(mean_list)
return threshold
```定义您的_函数(dir_列表):对于dir_列表中的dirname,ans=[]为:必需的_输出=您的_余弦函数(dirname)ans.append(必需的_输出)返回元组(ans)```