R 以非常不同的格式混合数据
我有两个数据帧。 在第一个例子中,我给出了酒店的开业时间:“1”表示酒店已开业,“0”表示酒店已关闭R 以非常不同的格式混合数据,r,dataframe,R,Dataframe,我有两个数据帧。 在第一个例子中,我给出了酒店的开业时间:“1”表示酒店已开业,“0”表示酒店已关闭 hotels <- data.frame( hotel = c (1:5), jan = c(1,1,1,0,0), feb = c(1,1,1,1,1), mar = c(1,1,1,1,1), ap = c(0,0,1,1,1), may = c(0,0,0,0,0), jun = c(0,0,0,0,0), jul = c(0,0,0,1,1), aug = c(1,1,1,1,1
hotels <- data.frame(
hotel = c (1:5), jan = c(1,1,1,0,0), feb = c(1,1,1,1,1),
mar = c(1,1,1,1,1), ap = c(0,0,1,1,1), may = c(0,0,0,0,0),
jun = c(0,0,0,0,0), jul = c(0,0,0,1,1), aug = c(1,1,1,1,1),
sep = c(1,1,1,0,0), oct = c(0,0,0,1,1), nov = c(1,1,1,1,1),
des = c(1,1,1,1,1)
)
hotels使用mapply
的一种方法是提取月份出发
和到达
列。将此信息与酒店
id一起传递到mapply
,从该信息中子集相关酒店,并检查客户在酒店停留的所有月份的值是否为1
clientes$departure <- as.Date(clientes$departure)
clientes$arrive <- as.Date(clientes$arrive)
clientes$honest <- mapply(function(x, y, z)
all(hotels[hotels$hotel == x, c(y, z)] == 1), clientes$hotel,
format(clientes$arrive, "%b"), format(clientes$departure, "%b"))
clientes
# id_client hotel arrive departure honest
#1 a 1 2019-01-08 2019-01-10 TRUE
#2 b 2 2019-05-04 2019-05-08 FALSE
#3 c 3 2019-06-25 2019-07-05 FALSE
#4 d 4 2019-11-24 2019-12-01 TRUE
#5 e 5 2019-03-04 2019-03-08 TRUE
#6 f 1 2019-06-01 2019-06-09 FALSE
#7 g 2 2019-05-04 2019-05-10 FALSE
#8 h 3 2019-08-13 2019-08-20 TRUE
#9 i 4 2019-04-06 2019-04-10 TRUE
#10 j 5 2019-07-17 2019-08-03 TRUE
results<-data.frame(
id_client = c("a","b","c","d", "e", "f", "g", "h", "i", "j"),
hotel = c(1,2,3,4,5,1,2,3,4,5),
arrive = c("2019-01-08", "2019-05-04", "2019-06-25", "2019-11-24", "2019-03-04", "2019-06-01", "2019-05-04","2019-08-13", "2019-04-06", "2019-07-17"),
honest = c(TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
)
clientes$departure <- as.Date(clientes$departure)
clientes$arrive <- as.Date(clientes$arrive)
clientes$honest <- mapply(function(x, y, z)
all(hotels[hotels$hotel == x, c(y, z)] == 1), clientes$hotel,
format(clientes$arrive, "%b"), format(clientes$departure, "%b"))
clientes
# id_client hotel arrive departure honest
#1 a 1 2019-01-08 2019-01-10 TRUE
#2 b 2 2019-05-04 2019-05-08 FALSE
#3 c 3 2019-06-25 2019-07-05 FALSE
#4 d 4 2019-11-24 2019-12-01 TRUE
#5 e 5 2019-03-04 2019-03-08 TRUE
#6 f 1 2019-06-01 2019-06-09 FALSE
#7 g 2 2019-05-04 2019-05-10 FALSE
#8 h 3 2019-08-13 2019-08-20 TRUE
#9 i 4 2019-04-06 2019-04-10 TRUE
#10 j 5 2019-07-17 2019-08-03 TRUE
hotels <- data.frame(
hotel = c (1:5), Jan = c(1,1,1,0,0), Feb = c(1,1,1,1,1),
Mar = c(1,1,1,1,1), Apr = c(0,0,1,1,1), May = c(0,0,0,0,0),
Jun = c(0,0,0,0,0), Jul = c(0,0,0,1,1), Aug = c(1,1,1,1,1),
Sep = c(1,1,1,0,0), Oct = c(0,0,0,1,1), Nov = c(1,1,1,1,1),
Dec = c(1,1,1,1,1))
clientes<-data.frame(
id_client = c("a","b","c","d", "e", "f", "g", "h", "i", "j"),
hotel = c(1,2,3,4,5,1,2,3,4,5),
arrive = c("2019-01-08", "2019-05-04", "2019-06-25", "2019-11-24", "2019-03-04",
"2019-06-01", "2019-05-04","2019-08-13", "2019-04-06", "2019-07-17"),
departure = c("2019-01-10", "2019-05-08", "2019-07-05", "2019-12-01","2019-03-08",
"2019-06-09", "2019-05-10","2019-08-20", "2019-04-10", "2019-08-03"))