如何在python中增加元组值并在循环中搜索字符串
我有这个密码如何在python中增加元组值并在循环中搜索字符串,python,dictionary,tuples,Python,Dictionary,Tuples,我有这个密码 arfffile = [] inputed = raw_input("Enter Evaluation for name including file extension...") reader = open(inputed, 'r') verses = [] for line in reader: verses.append(line) for line in verses: if line.split('@') == "@": ver
arfffile = []
inputed = raw_input("Enter Evaluation for name including file extension...")
reader = open(inputed, 'r')
verses = []
for line in reader:
verses.append(line)
for line in verses:
if line.split('@') == "@":
verses.pop(line)
numclusters = int(raw_input("Enter the number of clusters"))
clusters = {}
for i in range(1,numclusters+1):
clusters["cluster"+str(i)] = 0
print clusters
# If verse belongs to a cluster, increment the cluster count by one in the dictionary value.
for verse in verses:
for k in clusters:
if k in verse:
clusters[k] += 1
else:
print "not in"
print clusters
yeslist = []
for verse in verses:
for k in clusters:
if k not in yeslist:
yeslist.append((k,0))
elif k in yeslist:
print "already in" + k
for verse in verses:
for k in clusters:
if k in verse and "Yes" in verse:
yeslist.append(yeslist.index(k), +1)
# iterate through dictionary and iterate through the lines
# need to read in file line by line,
# if "yes" and cluster x increment cluster
# need to work out percentage of possitive verses in each cluster.
arff文件的一个示例是
@relation tester999.arff_clustered
@attribute Instance_number numeric
@attribute allah numeric
@attribute day numeric
@attribute lord numeric
@attribute people numeric
@attribute earth numeric
@attribute men numeric
@attribute truth numeric
@attribute verily numeric
@attribute chapter numeric
@attribute verse numeric
@attribute CLASS {Yes,No}
@attribute Cluster {cluster1,cluster2,cluster3}
@data
0,1,0,0,0,0,0,0,0,1,1,No,cluster3
1,1,0,0,0,0,0,0,0,1,2,No,cluster3
2,0,0,0,0,0,0,0,0,1,3,No,cluster3
3,0,1,0,0,0,1,0,0,1,4,No,cluster3
4,0,0,0,0,0,0,0,0,1,5,No,cluster3
5,0,0,0,0,0,0,0,0,1,6,No,cluster3
6,0,0,0,0,0,0,0,0,1,7,No,cluster3
7,0,0,0,0,0,0,0,0,2,1,No,cluster3
8,1,0,0,0,0,0,0,0,2,2,No,cluster3
9,0,0,0,0,0,0,0,0,2,3,No,cluster3
10,0,0,0,0,0,0,0,0,2,4,No,cluster3
11,0,0,1,0,0,0,0,0,2,5,No,cluster2
当它处于静止状态时,程序读取数据行,例如
0,1,0,0,0,0,0,0,0,1,1,No,cluster3
我还创建了一个字典来检测数据文件中有多少集群。在这个例子中有3个。集群1集群2和集群3。然后,代码将每个集群作为键值追加到字典“clusters”中的字符串中然后我迭代所有的诗句,数一数每一行,看看它属于哪个集群 我的下一步是尝试计算每个集群中出现“Yes”行的次数。因此,假设数据中的每一行有10行字符串中带有“yes”,那么代码应该能够计算出出现这种情况的次数 到目前为止,我所做的代码就在这里
for verse in verses:
for k in clusters:
if k in verse and "Yes" in verse:
yeslist.append(yeslist.index(k), +1)
我正在创建一个名为“yeslist”的元组列表,其值如下[(cluster1,0),(cluster2,3)]
因此,对于每一行(表示为字符串),检查其中是否有“是”,如果有检查它属于哪个集群,则将该元组值增加1
我想不出怎么做的逻辑。。。有人能帮忙吗
谢谢
import collections
inputed = raw_input("Enter Evaluation for name including file extension...")
reader = open(inputed, 'r')
verses = [ line.strip() for line in reader.readlines() if line[0] != '@' ]
reader.close()
cluster_count = collections.defaultdict(int)
yes_count = collections.defaultdict(int)
verse_infos = [ (split_verse[-1],split_verse[-2]) for split_verse \
in verses.split(",") ]
for verse in verse_infos:
cluster_count[verse[0]]+=1
if verse[1] == 'yes':
yes_count[verse[0]]+=1
您最终会得到两本词典:
cluster_count : keys = cluster#, values = count
yes_count : keys = cluster#, values = #yes
如果确实需要元组列表:
yes_tuples = ( x for x in sorted(yes_count.iteritems()) )
问题的简短变体是?我很确定元组是不可变的。