如何在R中绘制/可视化C50决策树?
我使用的是C50决策树算法。我能够构建这棵树并获得摘要,但无法确定如何绘制或viz这棵树 我的C50模型被称为credit_模型 在其他决策树包中,我通常使用plot(credit_模型)之类的东西。在rpart中,它是rpart.plot(credit_模型)如何在R中绘制/可视化C50决策树?,r,plot,visualization,data-mining,decision-tree,R,Plot,Visualization,Data Mining,Decision Tree,我使用的是C50决策树算法。我能够构建这棵树并获得摘要,但无法确定如何绘制或viz这棵树 我的C50模型被称为credit_模型 在其他决策树包中,我通常使用plot(credit_模型)之类的东西。在rpart中,它是rpart.plot(credit_模型) C50算法中用于绘图的等价物是什么?现在,没有内置的。我一直在为partykit包(例如as.party)开发一个适配器,但进展不大 Max您可以使用以下例程,直接将决策树转换为GraphViz点语言(然后使用GraphViz进行绘图-
C50算法中用于绘图的等价物是什么?现在,没有内置的。我一直在为
partykit
包(例如as.party
)开发一个适配器,但进展不大
Max您可以使用以下例程,直接将决策树转换为GraphViz点语言(然后使用GraphViz进行绘图-需要以前安装GraphViz()) 编辑:下面包含的版本2能够处理多分支树(版本1可以处理只有两个分支的树)。版本2.2更正了丢失的初始化 在R中调用的示例:
library(C50)
data(churn)
treeModel <- C5.0(x = churnTrain[, -20], y = churnTrain$churn)
C5.0.graphviz(treeModel, 'C:\\mydotfile.txt')
然后,您可以将mygraph.png文件作为png(位图)打开,并在应用程序中使用它
有关更多详细信息,请参阅原始帖子:
C5.0.graphviz这是您正在寻找的函数:
C5.0.graphviz <- function( C5.0.model, filename, fontname ='Arial',col.draw ='black',
col.font ='blue',col.conclusion ='lightpink',col.question = 'grey78',
shape.conclusion ='box3d',shape.question ='diamond',
bool.substitute = 'None', prefix=FALSE, vertical=TRUE ) {
library(cwhmisc)
library(stringr)
treeout <- C5.0.model$output
treeout<- substr(treeout, cpos(treeout, 'Decision tree:', start=1)+14,nchar(treeout))
treeout<- substr(treeout, 1,cpos(treeout, 'Evaluation on training data', start=1)-2)
variables <- data.frame(matrix(nrow=500, ncol=4))
names(variables) <- c('SYMBOL','TOKEN', 'TYPE' , 'QUERY')
connectors <- data.frame(matrix(nrow=500, ncol=3))
names(connectors) <- c('TOKEN', 'START','END')
theStack <- data.frame(matrix(nrow=500, ncol=1))
names(theStack) <- c('ITEM')
theStackIndex <- 1
currentvar <- 1
currentcon <- 1
open_connection <- TRUE
previousindent <- -1
firstindent <- 4
substitutes <- data.frame(None=c('= 0','= 1'), yesno=c('no','yes'),
truefalse=c('false', 'true'),TF=c('F','T'))
dtreestring<-unlist( scan(text= treeout, sep='\n', what =list('character')))
for (linecount in c(1:length(dtreestring))) {
lineindent<-0
shortstring <- str_trim(dtreestring[linecount], side='left')
leadingspaces <- nchar(dtreestring[linecount]) - nchar(shortstring)
lineindent <- leadingspaces/4
dtreestring[linecount]<-str_trim(dtreestring[linecount], side='left')
while (!is.na(cpos(dtreestring[linecount], ': ', start=1)) ) {
lineindent<-lineindent + 1
dtreestring[linecount]<-substr(dtreestring[linecount],
ifelse(is.na(cpos(dtreestring[linecount], ': ', start=1)), 1,
cpos(dtreestring[linecount], ': ', start=1)+4),
nchar(dtreestring[linecount]) )
shortstring <- str_trim(dtreestring[linecount], side='left')
leadingspaces <- nchar(dtreestring[linecount]) - nchar(shortstring)
lineindent <- lineindent + leadingspaces/4
dtreestring[linecount]<-str_trim(dtreestring[linecount], side='left')
}
if (!is.na(cpos(dtreestring[linecount], ':...', start=1)))
lineindent<- lineindent + 1
dtreestring[linecount]<-substr(dtreestring[linecount],
ifelse(is.na(cpos(dtreestring[linecount], ':...', start=1)), 1,
cpos(dtreestring[linecount], ':...', start=1)+4),
nchar(dtreestring[linecount]) )
dtreestring[linecount]<-str_trim(dtreestring[linecount])
stringlist <- strsplit(dtreestring[linecount],'\\:')
stringpart <- strsplit(unlist(stringlist)[1],'\\s')
if (open_connection==TRUE) {
variables[currentvar,'TOKEN'] <- unlist(stringpart)[1]
variables[currentvar,'SYMBOL'] <- paste('node',as.character(currentvar), sep='')
variables[currentvar,'TYPE'] <- shape.question
variables[currentvar,'QUERY'] <- 1
theStack[theStackIndex,'ITEM']<-variables[currentvar,'SYMBOL']
theStack[theStackIndex,'INDENT'] <-firstindent
theStackIndex<-theStackIndex+1
currentvar <- currentvar + 1
if(currentvar>2) {
connectors[currentcon - 1,'END'] <- variables[currentvar - 1, 'SYMBOL']
}
}
connectors[currentcon,'TOKEN'] <- paste(unlist(stringpart)[2],unlist(stringpart)[3])
if (connectors[currentcon,'TOKEN']=='= 0')
connectors[currentcon,'TOKEN'] <- as.character(substitutes[1,bool.substitute])
if (connectors[currentcon,'TOKEN']=='= 1')
connectors[currentcon,'TOKEN'] <- as.character(substitutes[2,bool.substitute])
if (open_connection==TRUE) {
if (lineindent<previousindent) {
theStackIndex <- theStackIndex-(( previousindent- lineindent) +1 )
currentsymbol <-theStack[theStackIndex,'ITEM']
} else
currentsymbol <-variables[currentvar - 1,'SYMBOL']
} else {
currentsymbol <-theStack[theStackIndex-((previousindent -lineindent ) +1 ),'ITEM']
theStackIndex <- theStackIndex-(( previousindent- lineindent) )
}
connectors[currentcon, 'START'] <- currentsymbol
currentcon <- currentcon + 1
open_connection <- TRUE
if (length(unlist(stringlist))==2) {
stringpart2 <- strsplit(unlist(stringlist)[2],'\\s')
variables[currentvar,'TOKEN'] <- paste(ifelse((prefix==FALSE),'','Class'), unlist(stringpart2)[2])
variables[currentvar,'SYMBOL'] <- paste('node',as.character(currentvar), sep='')
variables[currentvar,'TYPE'] <- shape.conclusion
variables[currentvar,'QUERY'] <- 0
currentvar <- currentvar + 1
connectors[currentcon - 1,'END'] <- variables[currentvar - 1,'SYMBOL']
open_connection <- FALSE
}
previousindent<-lineindent
}
runningstring <- paste('digraph g {', 'graph ', sep='\n')
runningstring <- paste(runningstring, ' [rankdir="', sep='')
runningstring <- paste(runningstring, ifelse(vertical==TRUE,'TB','LR'), sep='' )
runningstring <- paste(runningstring, '"]', sep='')
for (lines in c(1:(currentvar-1))) {
runningline <- paste(variables[lines,'SYMBOL'], '[shape="')
runningline <- paste(runningline,variables[lines,'TYPE'], sep='' )
runningline <- paste(runningline,'" label ="', sep='' )
runningline <- paste(runningline,variables[lines,'TOKEN'], sep='' )
runningline <- paste(runningline,
'" style=filled fontcolor=', sep='')
runningline <- paste(runningline, col.font)
runningline <- paste(runningline,' color=' )
runningline <- paste(runningline, col.draw)
runningline <- paste(runningline,' fontname=')
runningline <- paste(runningline, fontname)
runningline <- paste(runningline,' fillcolor=')
runningline <- paste(runningline,
ifelse(variables[lines,'QUERY']== 0 ,col.conclusion,col.question))
runningline <- paste(runningline,'];')
runningstring <- paste(runningstring, runningline , sep='\n')
}
for (lines in c(1:(currentcon-1))) {
runningline <- paste (connectors[lines,'START'], '->')
runningline <- paste (runningline, connectors[lines,'END'])
runningline <- paste (runningline,'[label="')
runningline <- paste (runningline,connectors[lines,'TOKEN'], sep='')
runningline <- paste (runningline,'" fontname=', sep='')
runningline <- paste (runningline, fontname)
runningline <- paste (runningline,'];')
runningstring <- paste(runningstring, runningline , sep='\n')
}
runningstring <- paste(runningstring,'}')
cat(runningstring)
sink(filename, split=TRUE)
cat(runningstring)
sink()
}
C5.0.graphviz如何使graphviz函数在R中可用?您没有使它可用。您可以从操作系统(Windows、Linux等)调用Graphviz的dot命令。输入参数是由上述C5.0.graphviz函数生成的文本文件,而dot命令的输出文件将是图形文件,用于您的应用程序(Word等)。我收到一个错误dot:无法打开“mytree.txt”
知道为什么吗?它现在就在那里,对于新来的人…它绘制了树,但没有基于规则的模型。。。顺便说一句:目前(2015-08-07)C5.0有一个绘图功能这是更多关于主题的,这里的所有答案在任何情况下都是过时的,现在有了绘图功能。
C5.0.graphviz <- function( C5.0.model, filename, fontname ='Arial',
col.draw ='black',col.font ='blue',col.conclusion ='lightpink',
col.question = 'grey78', shape.conclusion ='box3d',shape.question ='diamond',
bool.substitute = 'None', prefix=FALSE, vertical=TRUE ) {
library(cwhmisc)
library(stringr)
treeout <- C5.0.model$output
treeout<- substr(treeout, cpos(treeout, 'Decision tree:', start=1)+14,nchar(treeout))
treeout<- substr(treeout, 1,cpos(treeout, 'Evaluation on training data', start=1)-2)
variables <- data.frame(matrix(nrow=500, ncol=4))
names(variables) <- c('SYMBOL','TOKEN', 'TYPE' , 'QUERY')
connectors <- data.frame(matrix(nrow=500, ncol=3))
names(connectors) <- c('TOKEN', 'START','END')
theStack <- data.frame(matrix(nrow=500, ncol=1))
names(theStack) <- c('ITEM')
theStackIndex <- 1
currentvar <- 1
currentcon <- 1
open_connection <- TRUE
previousindent <- -1
firstindent <- 4
substitutes <- data.frame(None=c('= 0','= 1'), yesno=c('no','yes'),
truefalse=c('false', 'true'),TF=c('F','T'))
dtreestring<-unlist( scan(text= treeout, sep='\n', what =list('character')))
for (linecount in c(1:length(dtreestring))) {
lineindent<-0
shortstring <- str_trim(dtreestring[linecount], side='left')
leadingspaces <- nchar(dtreestring[linecount]) - nchar(shortstring)
lineindent <- leadingspaces/4
dtreestring[linecount]<-str_trim(dtreestring[linecount], side='left')
while (!is.na(cpos(dtreestring[linecount], ': ', start=1)) ) {
lineindent<-lineindent + 1
dtreestring[linecount]<-substr(dtreestring[linecount],
ifelse(is.na(cpos(dtreestring[linecount], ': ', start=1)), 1,
cpos(dtreestring[linecount], ': ', start=1)+4),
nchar(dtreestring[linecount]) )
shortstring <- str_trim(dtreestring[linecount], side='left')
leadingspaces <- nchar(dtreestring[linecount]) - nchar(shortstring)
lineindent <- lineindent + leadingspaces/4
dtreestring[linecount]<-str_trim(dtreestring[linecount], side='left')
}
if (!is.na(cpos(dtreestring[linecount], ':...', start=1)))
lineindent<- lineindent + 1
dtreestring[linecount]<-substr(dtreestring[linecount],
ifelse(is.na(cpos(dtreestring[linecount], ':...', start=1)), 1,
cpos(dtreestring[linecount], ':...', start=1)+4),
nchar(dtreestring[linecount]) )
dtreestring[linecount]<-str_trim(dtreestring[linecount])
stringlist <- strsplit(dtreestring[linecount],'\\:')
stringpart <- strsplit(unlist(stringlist)[1],'\\s')
if (open_connection==TRUE) {
variables[currentvar,'TOKEN'] <- unlist(stringpart)[1]
variables[currentvar,'SYMBOL'] <- paste('node',as.character(currentvar), sep='')
variables[currentvar,'TYPE'] <- shape.question
variables[currentvar,'QUERY'] <- 1
theStack[theStackIndex,'ITEM']<-variables[currentvar,'SYMBOL']
theStack[theStackIndex,'INDENT'] <-firstindent
theStackIndex<-theStackIndex+1
currentvar <- currentvar + 1
if(currentvar>2) {
connectors[currentcon - 1,'END'] <- variables[currentvar - 1, 'SYMBOL']
}
}
connectors[currentcon,'TOKEN'] <- paste(unlist(stringpart)[2],unlist(stringpart)[3])
if (connectors[currentcon,'TOKEN']=='= 0')
connectors[currentcon,'TOKEN'] <- as.character(substitutes[1,bool.substitute])
if (connectors[currentcon,'TOKEN']=='= 1')
connectors[currentcon,'TOKEN'] <- as.character(substitutes[2,bool.substitute])
if (open_connection==TRUE) {
if (lineindent<previousindent) {
theStackIndex <- theStackIndex-(( previousindent- lineindent) +1 )
currentsymbol <-theStack[theStackIndex,'ITEM']
} else
currentsymbol <-variables[currentvar - 1,'SYMBOL']
} else {
currentsymbol <-theStack[theStackIndex-((previousindent -lineindent ) +1 ),'ITEM']
theStackIndex <- theStackIndex-(( previousindent- lineindent) )
}
connectors[currentcon, 'START'] <- currentsymbol
currentcon <- currentcon + 1
open_connection <- TRUE
if (length(unlist(stringlist))==2) {
stringpart2 <- strsplit(unlist(stringlist)[2],'\\s')
variables[currentvar,'TOKEN'] <- paste(ifelse((prefix==FALSE),'','Class'), unlist(stringpart2)[2])
variables[currentvar,'SYMBOL'] <- paste('node',as.character(currentvar), sep='')
variables[currentvar,'TYPE'] <- shape.conclusion
variables[currentvar,'QUERY'] <- 0
currentvar <- currentvar + 1
connectors[currentcon - 1,'END'] <- variables[currentvar - 1,'SYMBOL']
open_connection <- FALSE
}
previousindent<-lineindent
}
runningstring <- paste('digraph g {', 'graph ', sep='\n')
runningstring <- paste(runningstring, ' [rankdir="', sep='')
runningstring <- paste(runningstring, ifelse(vertical==TRUE,'TB','LR'), sep='' )
runningstring <- paste(runningstring, '"]', sep='')
for (lines in c(1:(currentvar-1))) {
runningline <- paste(variables[lines,'SYMBOL'], '[shape="')
runningline <- paste(runningline,variables[lines,'TYPE'], sep='' )
runningline <- paste(runningline,'" label ="', sep='' )
runningline <- paste(runningline,variables[lines,'TOKEN'], sep='' )
runningline <- paste(runningline,
'" style=filled fontcolor=', sep='')
runningline <- paste(runningline, col.font)
runningline <- paste(runningline,' color=' )
runningline <- paste(runningline, col.draw)
runningline <- paste(runningline,' fontname=')
runningline <- paste(runningline, fontname)
runningline <- paste(runningline,' fillcolor=')
runningline <- paste(runningline,
ifelse(variables[lines,'QUERY']== 0 ,col.conclusion,col.question))
runningline <- paste(runningline,'];')
runningstring <- paste(runningstring, runningline , sep='\n')
}
for (lines in c(1:(currentcon-1))) {
runningline <- paste (connectors[lines,'START'], '->')
runningline <- paste (runningline, connectors[lines,'END'])
runningline <- paste (runningline,'[label="')
runningline <- paste (runningline,connectors[lines,'TOKEN'], sep='')
runningline <- paste (runningline,'" fontname=', sep='')
runningline <- paste (runningline, fontname)
runningline <- paste (runningline,'];')
runningstring <- paste(runningstring, runningline , sep='\n')
}
runningstring <- paste(runningstring,'}')
cat(runningstring)
sink(filename, split=TRUE)
cat(runningstring)
sink()
}
C5.0.graphviz <- function( C5.0.model, filename, fontname ='Arial',col.draw ='black',
col.font ='blue',col.conclusion ='lightpink',col.question = 'grey78',
shape.conclusion ='box3d',shape.question ='diamond',
bool.substitute = 'None', prefix=FALSE, vertical=TRUE ) {
library(cwhmisc)
library(stringr)
treeout <- C5.0.model$output
treeout<- substr(treeout, cpos(treeout, 'Decision tree:', start=1)+14,nchar(treeout))
treeout<- substr(treeout, 1,cpos(treeout, 'Evaluation on training data', start=1)-2)
variables <- data.frame(matrix(nrow=500, ncol=4))
names(variables) <- c('SYMBOL','TOKEN', 'TYPE' , 'QUERY')
connectors <- data.frame(matrix(nrow=500, ncol=3))
names(connectors) <- c('TOKEN', 'START','END')
theStack <- data.frame(matrix(nrow=500, ncol=1))
names(theStack) <- c('ITEM')
theStackIndex <- 1
currentvar <- 1
currentcon <- 1
open_connection <- TRUE
previousindent <- -1
firstindent <- 4
substitutes <- data.frame(None=c('= 0','= 1'), yesno=c('no','yes'),
truefalse=c('false', 'true'),TF=c('F','T'))
dtreestring<-unlist( scan(text= treeout, sep='\n', what =list('character')))
for (linecount in c(1:length(dtreestring))) {
lineindent<-0
shortstring <- str_trim(dtreestring[linecount], side='left')
leadingspaces <- nchar(dtreestring[linecount]) - nchar(shortstring)
lineindent <- leadingspaces/4
dtreestring[linecount]<-str_trim(dtreestring[linecount], side='left')
while (!is.na(cpos(dtreestring[linecount], ': ', start=1)) ) {
lineindent<-lineindent + 1
dtreestring[linecount]<-substr(dtreestring[linecount],
ifelse(is.na(cpos(dtreestring[linecount], ': ', start=1)), 1,
cpos(dtreestring[linecount], ': ', start=1)+4),
nchar(dtreestring[linecount]) )
shortstring <- str_trim(dtreestring[linecount], side='left')
leadingspaces <- nchar(dtreestring[linecount]) - nchar(shortstring)
lineindent <- lineindent + leadingspaces/4
dtreestring[linecount]<-str_trim(dtreestring[linecount], side='left')
}
if (!is.na(cpos(dtreestring[linecount], ':...', start=1)))
lineindent<- lineindent + 1
dtreestring[linecount]<-substr(dtreestring[linecount],
ifelse(is.na(cpos(dtreestring[linecount], ':...', start=1)), 1,
cpos(dtreestring[linecount], ':...', start=1)+4),
nchar(dtreestring[linecount]) )
dtreestring[linecount]<-str_trim(dtreestring[linecount])
stringlist <- strsplit(dtreestring[linecount],'\\:')
stringpart <- strsplit(unlist(stringlist)[1],'\\s')
if (open_connection==TRUE) {
variables[currentvar,'TOKEN'] <- unlist(stringpart)[1]
variables[currentvar,'SYMBOL'] <- paste('node',as.character(currentvar), sep='')
variables[currentvar,'TYPE'] <- shape.question
variables[currentvar,'QUERY'] <- 1
theStack[theStackIndex,'ITEM']<-variables[currentvar,'SYMBOL']
theStack[theStackIndex,'INDENT'] <-firstindent
theStackIndex<-theStackIndex+1
currentvar <- currentvar + 1
if(currentvar>2) {
connectors[currentcon - 1,'END'] <- variables[currentvar - 1, 'SYMBOL']
}
}
connectors[currentcon,'TOKEN'] <- paste(unlist(stringpart)[2],unlist(stringpart)[3])
if (connectors[currentcon,'TOKEN']=='= 0')
connectors[currentcon,'TOKEN'] <- as.character(substitutes[1,bool.substitute])
if (connectors[currentcon,'TOKEN']=='= 1')
connectors[currentcon,'TOKEN'] <- as.character(substitutes[2,bool.substitute])
if (open_connection==TRUE) {
if (lineindent<previousindent) {
theStackIndex <- theStackIndex-(( previousindent- lineindent) +1 )
currentsymbol <-theStack[theStackIndex,'ITEM']
} else
currentsymbol <-variables[currentvar - 1,'SYMBOL']
} else {
currentsymbol <-theStack[theStackIndex-((previousindent -lineindent ) +1 ),'ITEM']
theStackIndex <- theStackIndex-(( previousindent- lineindent) )
}
connectors[currentcon, 'START'] <- currentsymbol
currentcon <- currentcon + 1
open_connection <- TRUE
if (length(unlist(stringlist))==2) {
stringpart2 <- strsplit(unlist(stringlist)[2],'\\s')
variables[currentvar,'TOKEN'] <- paste(ifelse((prefix==FALSE),'','Class'), unlist(stringpart2)[2])
variables[currentvar,'SYMBOL'] <- paste('node',as.character(currentvar), sep='')
variables[currentvar,'TYPE'] <- shape.conclusion
variables[currentvar,'QUERY'] <- 0
currentvar <- currentvar + 1
connectors[currentcon - 1,'END'] <- variables[currentvar - 1,'SYMBOL']
open_connection <- FALSE
}
previousindent<-lineindent
}
runningstring <- paste('digraph g {', 'graph ', sep='\n')
runningstring <- paste(runningstring, ' [rankdir="', sep='')
runningstring <- paste(runningstring, ifelse(vertical==TRUE,'TB','LR'), sep='' )
runningstring <- paste(runningstring, '"]', sep='')
for (lines in c(1:(currentvar-1))) {
runningline <- paste(variables[lines,'SYMBOL'], '[shape="')
runningline <- paste(runningline,variables[lines,'TYPE'], sep='' )
runningline <- paste(runningline,'" label ="', sep='' )
runningline <- paste(runningline,variables[lines,'TOKEN'], sep='' )
runningline <- paste(runningline,
'" style=filled fontcolor=', sep='')
runningline <- paste(runningline, col.font)
runningline <- paste(runningline,' color=' )
runningline <- paste(runningline, col.draw)
runningline <- paste(runningline,' fontname=')
runningline <- paste(runningline, fontname)
runningline <- paste(runningline,' fillcolor=')
runningline <- paste(runningline,
ifelse(variables[lines,'QUERY']== 0 ,col.conclusion,col.question))
runningline <- paste(runningline,'];')
runningstring <- paste(runningstring, runningline , sep='\n')
}
for (lines in c(1:(currentcon-1))) {
runningline <- paste (connectors[lines,'START'], '->')
runningline <- paste (runningline, connectors[lines,'END'])
runningline <- paste (runningline,'[label="')
runningline <- paste (runningline,connectors[lines,'TOKEN'], sep='')
runningline <- paste (runningline,'" fontname=', sep='')
runningline <- paste (runningline, fontname)
runningline <- paste (runningline,'];')
runningstring <- paste(runningstring, runningline , sep='\n')
}
runningstring <- paste(runningstring,'}')
cat(runningstring)
sink(filename, split=TRUE)
cat(runningstring)
sink()
}