R 如何在glmnet中绘制正确的标签?

R 如何在glmnet中绘制正确的标签?,r,glmnet,R,Glmnet,以这个例子为例 library(dplyr) library(tibble) library(glmnet) library(quanteda) dtrain <- data_frame(text = c("Chinese Beijing Chinese", "Chinese Chinese Shanghai", "this is china",

以这个例子为例

library(dplyr)
library(tibble)
library(glmnet)
library(quanteda)

dtrain <- data_frame(text = c("Chinese Beijing Chinese",
                              "Chinese Chinese Shanghai",
                              "this is china",
                              "china is here",
                              'hello china',
                              "Chinese Beijing Chinese",
                              "Chinese Chinese Shanghai",
                              "this is china",
                              "china is here",
                              'hello china',
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              'japan'),
                     class = c(1, 1, 1, 1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0))
我可以使用
glmnet
轻松拟合
lasso
回归:

fit <- glmnet(dtm, y = as.factor(dtrain$class), alpha = 1, family = 'binomial')

fit据我所知,图中给出的是与重要单词相关的系数值。在你的例子中,单词9-11是京都、日本和东京(我可以从
dtm
表中看到)。这个普通的绘图库没有我想你说的你想做的。相反,您可以使用
库(plotmo)
,如下所示:

library(dplyr)
library(tibble)
library(glmnet)
library(quanteda)
library(plotmo)
dtrain <- data_frame(text = c("Chinese Beijing Chinese",
                              "Chinese Chinese Shanghai",
                              "this is china",
                              "china is here",
                              'hello china',
                              "Chinese Beijing Chinese",
                              "Chinese Chinese Shanghai",
                              "this is china",
                              "china is here",
                              'hello china',
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              'japan'),
                     class = c(1, 1, 1, 1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0))


dtm <- quanteda::dfm(dtrain$text)
fit <- glmnet(dtm, y = as.factor(dtrain$class), alpha = 1, family = 'binomial')
plot_glmnet(fit, label=3)            # label the 3 biggest final coefs
库(dplyr)
图书馆(tibble)
图书馆(glmnet)
图书馆(quanteda)
图书馆(plotmo)

dtrain据我所知,绘图给出的是与重要单词相关的系数值。在你的例子中,单词9-11是京都、日本和东京(我可以从
dtm
表中看到)。这个普通的绘图库没有我想你说的你想做的。相反,您可以使用
库(plotmo)
,如下所示:

library(dplyr)
library(tibble)
library(glmnet)
library(quanteda)
library(plotmo)
dtrain <- data_frame(text = c("Chinese Beijing Chinese",
                              "Chinese Chinese Shanghai",
                              "this is china",
                              "china is here",
                              'hello china',
                              "Chinese Beijing Chinese",
                              "Chinese Chinese Shanghai",
                              "this is china",
                              "china is here",
                              'hello china',
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              "Kyoto Japan",
                              "Tokyo Japan Chinese",
                              'japan'),
                     class = c(1, 1, 1, 1, 1,1,1,1,1,1,1,0,0,0,0,0,0,0,0))


dtm <- quanteda::dfm(dtrain$text)
fit <- glmnet(dtm, y = as.factor(dtrain$class), alpha = 1, family = 'binomial')
plot_glmnet(fit, label=3)            # label the 3 biggest final coefs
库(dplyr)
图书馆(tibble)
图书馆(glmnet)
图书馆(quanteda)
图书馆(plotmo)
dtrain