R 以非常不同的格式混合数据

R 以非常不同的格式混合数据,r,dataframe,R,Dataframe,我有两个数据帧。 在第一个例子中,我给出了酒店的开业时间:“1”表示酒店已开业,“0”表示酒店已关闭 hotels <- data.frame( hotel = c (1:5), jan = c(1,1,1,0,0), feb = c(1,1,1,1,1), mar = c(1,1,1,1,1), ap = c(0,0,1,1,1), may = c(0,0,0,0,0), jun = c(0,0,0,0,0), jul = c(0,0,0,1,1), aug = c(1,1,1,1,1

我有两个数据帧。 在第一个例子中,我给出了酒店的开业时间:“1”表示酒店已开业,“0”表示酒店已关闭

hotels <- data.frame(
hotel = c (1:5), jan = c(1,1,1,0,0), feb = c(1,1,1,1,1), 
mar = c(1,1,1,1,1), ap = c(0,0,1,1,1), may = c(0,0,0,0,0),
jun = c(0,0,0,0,0), jul = c(0,0,0,1,1), aug = c(1,1,1,1,1),
sep = c(1,1,1,0,0), oct = c(0,0,0,1,1), nov = c(1,1,1,1,1),
des = c(1,1,1,1,1)
)

hotels使用
mapply
的一种方法是提取月份
出发
到达
列。将此信息与
酒店
id一起传递到
mapply
,从该信息中子集相关酒店,并检查客户在酒店停留的所有月份的值是否为1

clientes$departure <- as.Date(clientes$departure)
clientes$arrive <- as.Date(clientes$arrive)


clientes$honest <- mapply(function(x, y, z) 
 all(hotels[hotels$hotel == x, c(y, z)] == 1), clientes$hotel, 
    format(clientes$arrive, "%b"), format(clientes$departure, "%b"))


clientes
#   id_client hotel     arrive  departure honest
#1          a     1 2019-01-08 2019-01-10   TRUE
#2          b     2 2019-05-04 2019-05-08  FALSE
#3          c     3 2019-06-25 2019-07-05  FALSE
#4          d     4 2019-11-24 2019-12-01   TRUE
#5          e     5 2019-03-04 2019-03-08   TRUE
#6          f     1 2019-06-01 2019-06-09  FALSE
#7          g     2 2019-05-04 2019-05-10  FALSE
#8          h     3 2019-08-13 2019-08-20   TRUE
#9          i     4 2019-04-06 2019-04-10   TRUE
#10         j     5 2019-07-17 2019-08-03   TRUE
results<-data.frame(
  id_client = c("a","b","c","d", "e", "f", "g", "h", "i", "j"), 
  hotel = c(1,2,3,4,5,1,2,3,4,5),
  arrive = c("2019-01-08", "2019-05-04", "2019-06-25", "2019-11-24", "2019-03-04", "2019-06-01", "2019-05-04","2019-08-13", "2019-04-06", "2019-07-17"),
  honest = c(TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE)
  )
clientes$departure <- as.Date(clientes$departure)
clientes$arrive <- as.Date(clientes$arrive)


clientes$honest <- mapply(function(x, y, z) 
 all(hotels[hotels$hotel == x, c(y, z)] == 1), clientes$hotel, 
    format(clientes$arrive, "%b"), format(clientes$departure, "%b"))


clientes
#   id_client hotel     arrive  departure honest
#1          a     1 2019-01-08 2019-01-10   TRUE
#2          b     2 2019-05-04 2019-05-08  FALSE
#3          c     3 2019-06-25 2019-07-05  FALSE
#4          d     4 2019-11-24 2019-12-01   TRUE
#5          e     5 2019-03-04 2019-03-08   TRUE
#6          f     1 2019-06-01 2019-06-09  FALSE
#7          g     2 2019-05-04 2019-05-10  FALSE
#8          h     3 2019-08-13 2019-08-20   TRUE
#9          i     4 2019-04-06 2019-04-10   TRUE
#10         j     5 2019-07-17 2019-08-03   TRUE
hotels <- data.frame(
  hotel = c (1:5), Jan = c(1,1,1,0,0), Feb = c(1,1,1,1,1), 
  Mar = c(1,1,1,1,1), Apr = c(0,0,1,1,1), May = c(0,0,0,0,0),
  Jun = c(0,0,0,0,0), Jul = c(0,0,0,1,1), Aug = c(1,1,1,1,1),
  Sep = c(1,1,1,0,0), Oct = c(0,0,0,1,1), Nov = c(1,1,1,1,1),
  Dec = c(1,1,1,1,1))

clientes<-data.frame(
id_client = c("a","b","c","d", "e", "f", "g", "h", "i", "j"), 
 hotel = c(1,2,3,4,5,1,2,3,4,5),
  arrive = c("2019-01-08", "2019-05-04", "2019-06-25", "2019-11-24", "2019-03-04", 
  "2019-06-01", "2019-05-04","2019-08-13", "2019-04-06", "2019-07-17"),
departure = c("2019-01-10", "2019-05-08", "2019-07-05", "2019-12-01","2019-03-08", 
  "2019-06-09", "2019-05-10","2019-08-20", "2019-04-10", "2019-08-03"))