在R中进行逻辑回归时出错:对比仅适用于具有2个或更多水平的因素
我在建立逻辑回归模型时遇到了这个错误: 对比度在R中进行逻辑回归时出错:对比仅适用于具有2个或更多水平的因素,r,logistic-regression,R,Logistic Regression,我在建立逻辑回归模型时遇到了这个错误: 对比度中的错误 library("dplyr") library("caret") bank_eval <- read.csv("bank_eval.csv",stringsAsFactors = FALSE) head(bank_eval) anyNA(bank_eval) summary(bank_eval) str(bank_eval) #replace unknown
中的错误
library("dplyr")
library("caret")
bank_eval <- read.csv("bank_eval.csv",stringsAsFactors = FALSE)
head(bank_eval)
anyNA(bank_eval)
summary(bank_eval)
str(bank_eval)
#replace unknown val with NA
bank_eval <- bank_eval %>%
mutate(job = replace(job, job == "unknown", NA),
marital = replace(marital, marital == "unknown", NA),
education = replace(education, education == "unknown", NA),
default = replace(default, default == "unknown", NA),
housing = replace(housing, housing == "unknown", NA),
loan = replace(loan, loan == "unknown", NA),
housing = replace(housing, housing == "unknown", NA)
)
bank_eval
summary(bank_eval)
str(bank_eval)
#replacing NA Categorical values with frequent val
bank_eval <- bank_eval %>%
mutate(job= replace(job,
is.na(job),
"admin"),
marital= replace(marital,
is.na(marital),
"married"),
education= replace(education,
is.na(education),
"university.degree"),
default= replace(default,
is.na(default),
"no"),
housing= replace(housing,
is.na(housing),
"yes"),
loan= replace(loan,
is.na(loan),
"no")
)
bank_eval
anyNA(bank_eval)
summary(bank_eval)
bank_eval <- bank_eval %>%
mutate_if(is.character, as.factor)
bank_eval
str(bank_eval)
summary(bank_eval)
set.seed(123)
Trainingindex =createDataPartition(y= bank_eval$y, p=0.8, list=FALSE)
trainset = bank_eval[Trainingindex, ] #80% data for training
testset = bank_eval[-Trainingindex, ]
dim(trainset)
dim(testset)
#building and Training model using training set
sapply(trainset, function(x) if (is.factor(x)) length(levels(x)) else NA)
LGModel<-glm(y~., data = trainset,
family = binomial(logit))
summary(LGModel)
$ age : int 30 39 25 38 47 32 32 41 31 35 ...
$ job : Factor w/ 12 levels "admin","admin.",..: 3 9 9 9 2 9 2 4 9 3 ...
$ marital : Factor w/ 3 levels "divorced","married",..: 2 3 2 2 2 3 3 2 1 2 ...
$ education : Factor w/ 7 levels "basic.4y","basic.6y",..: 3 4 4 3 7 7 7 7 6 3 ...
$ default : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
$ housing : Factor w/ 2 levels "no","yes": 2 1 2 2 2 1 2 2 1 1 ...
$ loan : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
$ contact : Factor w/ 2 levels "cellular","telephone": 1 2 2 2 1 1 1 1 1 2 ...
$ month : Factor w/ 10 levels "apr","aug","dec",..: 7 7 5 5 8 10 10 8 8 7 ...
$ day_of_week : Factor w/ 5 levels "fri","mon","thu",..: 1 1 5 1 2 3 2 2 4 3 ...
$ duration : int 487 346 227 17 58 128 290 44 68 170 ...
$ campaign : int 2 4 1 3 1 3 4 2 1 1 ...
$ pdays : int 999 999 999 999 999 999 999 999 999 999 ...
$ previous : int 0 0 0 0 0 2 0 0 1 0 ...
$ poutcome : Factor w/ 3 levels "failure","nonexistent",..: 2 2 2 2 2 1 2 2 1 2 ...
$ emp.var.rate : num -1.8 1.1 1.4 1.4 -0.1 -1.1 -1.1 -0.1 -0.1 1.1 ...
$ cons.price.idx: num 92.9 94 94.5 94.5 93.2 ...
$ cons.conf.idx : num -46.2 -36.4 -41.8 -41.8 -42 -37.5 -37.5 -42 -42 -36.4 ...
$ euribor3m : num 1.31 4.86 4.96 4.96 4.19 ...
$ nr.employed : num 5099 5191 5228 5228 5196 ...
$ y : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...