使用R中的短语创建文本关联图
我对R比较陌生 我可以创建一个如下所示的关联图: 使用以下代码:使用R中的短语创建文本关联图,r,parsing,plot,correlation,text-mining,R,Parsing,Plot,Correlation,Text Mining,我对R比较陌生 我可以创建一个如下所示的关联图: 使用以下代码: source("https://bioconductor.org/biocLite.R") biocLite("Rgraphviz") library(tm) library(qdap) library(qdapTools) # creating corpus on variable that I want to create plot on myCorpus <- Corpus(VectorSource(final$
source("https://bioconductor.org/biocLite.R")
biocLite("Rgraphviz")
library(tm)
library(qdap)
library(qdapTools)
# creating corpus on variable that I want to create plot on
myCorpus <- Corpus(VectorSource(final$MH2))
dtm2 <- DocumentTermMatrix(myCorpus)
# correlation of terms plot
freq.terms <- findFreqTerms(dtm2)[1:25] # choose top 25 terms
plot(dtm2, term = freq.terms, corThreshold = 0.1, weighting = T) # choose terms with correlation of at least 0.1
源代码(“https://bioconductor.org/biocLite.R")
生物晶石(“Rgraphviz”)
图书馆(tm)
图书馆(qdap)
库(qdapTools)
#在要创建绘图的变量上创建语料库
支原体
source("https://bioconductor.org/biocLite.R")
biocLite("Rgraphviz")
library(tm)
library(qdap)
library(qdapTools)
# create corpus with phrases kept together based off https://stackoverflow.com/questions/24038498/corpus-build-with-phrases
dat <- final[ , 3]
colnames(dat) <- c("text")
# create 2 variables to combine into 1 that will eventually read doc1...doc1000 etc
dat$docs <- "doc"
dat$num <- ""
dat$num <- 1:nrow(dat)
# combine both variables
dat$docs <- paste(dat$docs, dat$num, sep = "")
dat <- dat[ , -c(3)]
x <- sub_holder(", ", dat$text)
# create dtm here
MH_parsed <- apply_as_tm(t(wfm(x$unhold(gsub(" ", "~~", x$output)), dat$docs)),
weightTfIdf, to.qdap = FALSE)
# correlation of terms plot
freq.terms <- findFreqTerms(MH_parsed)[1:25] # choose top 25 terms
plot(MH_parsed, term = freq.terms, corThreshold = 0.1, weighting = T) # choose terms with correlation of at least 0.1