R 输入矩阵的每一行需要至少包含一个非零条目_R_Memory_Lda_Topic Modeling

R 输入矩阵的每一行需要至少包含一个非零条目

r memory

R 输入矩阵的每一行需要至少包含一个非零条目,r,memory,lda,topic-modeling,R,Memory,Lda,Topic Modeling,我在运行这段代码时遇到了这个问题 text_lda <- LDA(text_dtm, k = 2, method = "VEM", control = NULL) 试试这个： empty.rows <- text_dtm[rowTotals == 0, ]$dimnames[1][[1]] corpus_new <- corpus[-as.numeric(empty.rows)] 我建议为您的DTM使用dgCMatrix类。它随R一起提供，作为广泛使用的矩阵包的一部分，与

我在运行这段代码时遇到了这个问题

text_lda <- LDA(text_dtm, k = 2, method = "VEM", control = NULL)

试试这个：

empty.rows <- text_dtm[rowTotals == 0, ]$dimnames[1][[1]] 
corpus_new <- corpus[-as.numeric(empty.rows)]

我建议为您的DTM使用

dgCMatrix

类。它随R一起提供，作为广泛使用的

矩阵

包的一部分，与

topicmodels:：LDA

和许多其他NLP包（

textmineR

，

text2vec

，

tidytext

等）一起使用，其方法让您可以像使用密集矩阵一样使用它

library(tm)
library(topicmodels)
library(Matrix)

# grab a character vector of text. Your source may be different
text <- textmineR::nih_sample$ABSTRACT_TEXT

text_corpus <- SimpleCorpus(VectorSource(text))

text_dtm <- DocumentTermMatrix(text_corpus,
                               control = list(tolower=TRUE,
                                              removePunctuation = TRUE, 
                                              removeNumbers= TRUE,
                                              stopwords = TRUE,
                                              sparse=TRUE))

text_dtm2 <- cast_sparse(text_dtm)

text_dtm2 <- Matrix::sparseMatrix(i=text_dtm$i, 
                                  j=text_dtm$j,
                                  x=text_dtm$v, 
                                  dims=c(text_dtm$nrow, text_dtm$ncol), 
                                  dimnames = text_dtm$dimnames)

doc_lengths <- Matrix::rowSums(text_dtm2)

text_dtm3 <- text_dtm2[doc_lengths > 0, ]

text_lda <- LDA(text_dtm3,  k = 2, method = "VEM", control = NULL)

library（tm）
库（topicmodels）
图书馆（矩阵）
#获取文本的字符向量。你的来源可能不同
text问题是apply
将稀疏矩阵转换为密集矩阵，从而导致内存错误。您可以查看是否存在稀疏矩阵行和
方法，而不是应用如何生成dtm？你用的是什么软件包？text\u dtm我下一个错误是这个text\u dtm我做的这个m当然可以用。但是我可以请你分享一下制作文本dtm的代码吗？我想亲自看看为什么cast_sparse不起作用。谢谢，当然可以！谢谢。今晚我会看看，然后更新我的答案，这样它就可以工作了。
empty.rows <- text_dtm[rowTotals == 0, ]$dimnames[1][[1]] 
corpus_new <- corpus[-as.numeric(empty.rows)]

ui = unique(text_dtm$i)
text_dtm.new = text_dtm[ui,]

library(tm)
library(topicmodels)
library(Matrix)

# grab a character vector of text. Your source may be different
text <- textmineR::nih_sample$ABSTRACT_TEXT

text_corpus <- SimpleCorpus(VectorSource(text))

text_dtm <- DocumentTermMatrix(text_corpus,
                               control = list(tolower=TRUE,
                                              removePunctuation = TRUE, 
                                              removeNumbers= TRUE,
                                              stopwords = TRUE,
                                              sparse=TRUE))

text_dtm2 <- cast_sparse(text_dtm)

text_dtm2 <- Matrix::sparseMatrix(i=text_dtm$i, 
                                  j=text_dtm$j,
                                  x=text_dtm$v, 
                                  dims=c(text_dtm$nrow, text_dtm$ncol), 
                                  dimnames = text_dtm$dimnames)

doc_lengths <- Matrix::rowSums(text_dtm2)

text_dtm3 <- text_dtm2[doc_lengths > 0, ]

text_lda <- LDA(text_dtm3,  k = 2, method = "VEM", control = NULL)