Python中的类实现(TF-IDF-CF)
我想用TF-IDF-CF方法进行单词加权。我从github获得了这样的代码,但我仍然不知道如何在我的数据框架中实现它。我拥有的数据集包含总共1000行的文本集合。代码如下:Python中的类实现(TF-IDF-CF),python,jupyter-notebook,tf-idf,Python,Jupyter Notebook,Tf Idf,我想用TF-IDF-CF方法进行单词加权。我从github获得了这样的代码,但我仍然不知道如何在我的数据框架中实现它。我拥有的数据集包含总共1000行的文本集合。代码如下: import math """ """ class FrequencyCalc: def tfidfcf(self, tfidfZip, classWordLists): """ """ tfidfcf = [] N = len(classWordLists) for (w,
import math
"""
"""
class FrequencyCalc:
def tfidfcf(self, tfidfZip, classWordLists):
"""
"""
tfidfcf = []
N = len(classWordLists)
for (w, f) in tfidfZip:
ncij = 0
for words in classWordLists:
if w in words:
ncij += 1
v = f * (ncij / N)
tfidfcf.append(v)
return tfidfcf
def tfidf(self, tf, idf):
"""
"""
tfidf = []
for i in range(len(tf)):
v = tf[i] * idf[i]
tfidf.append(v)
return tfidf
def tf(self, wordCount):
"""
"""
tf = []
sum = self.__totalWords(wordCount)
for (w, n) in wordCount:
tf.append(int(n) / sum)
return tf
def idf(self, docWords, wordLists):
"""
"""
idf = []
N = len(wordLists)
for w in docWords:
nt = 0
for words in wordLists:
if w in words:
nt += 1
r = math.log(N / nt, 10)
idf.append(r)
return idf
def __totalWords(self, wordCount):
"""
"""
sum = 0
for (w, n) in wordCount:
sum += int(n)
return sum
请给我一个使用这个类的例子。谢谢你我得到了它的类实现这里是代码
def c_tf_idf(documents, m, ngram_range=(1, 1)):
""" Calculate a class-based TF-IDF where m is the number of total documents. """
count = CountVectorizer(ngram_range=ngram_range, stop_words="english").fit(documents)
t = count.transform(documents).toarray()
w = t.sum(axis=1)
tf = np.divide(t.T, w)
sum_t = t.sum(axis=0)
idf = np.log(np.divide(m, sum_t)).reshape(-1, 1)
tf_idf = np.multiply(tf, idf)
return tf_idf, count