R 将数据分配给事件间隔不均匀的日期向量

R 将数据分配给事件间隔不均匀的日期向量,r,R,我很抱歉这个神秘的标题,但我不知道如何充分总结我的问题。这是我的问题。我有一个带有日期的数据框和几个实体的名称: df <- data.frame( time=rep(as.Date(seq(as.Date("2004/1/1"), as.Date("2005/12/1"), by = "1 month ")),2), name=c(rep("a",24),rep("b",24)) ) str(df) 'data.frame': 48 obs

我很抱歉这个神秘的标题,但我不知道如何充分总结我的问题。这是我的问题。我有一个带有日期的数据框和几个实体的名称:

 df <- data.frame(
       time=rep(as.Date(seq(as.Date("2004/1/1"), as.Date("2005/12/1"), by = "1 month ")),2),
       name=c(rep("a",24),rep("b",24))
  )
  str(df)

  'data.frame':   48 obs. of  2 variables:
   $ time: Date, format: "2004-01-01" "2004-02-01" ...
   $ name: Factor w/ 2 levels "a","b": 1 1 1 1 1 1 1 1 1 1 ...
我希望以一种方式合并这两个数据帧,
事件
从数据集开始分配到事件,或从最后一个事件开始分配到下一个事件或数据集结束。这看起来像:

    date      name  event
   2004-01-01  a     normal
   2004-01-02  a     normal 
      ...
   2004-12-01  a     extraordinary
   2005-01-01  a     extraordinary

R
中是否有一种我看不到的简单方法,或者我是否手动合并这些方法?非常感谢你的帮助

我不知道有什么函数可以做到这一点,但这里有一些R代码可以自己做到:

# Needed type coercions (Date for comparisons, characters to avoid 'factor' problems)
events$time <- as.Date(events$time)
events$event <- as.character(events$event)
events$name <- as.character(events$name)
df$name <- as.character(df$name)

# Events ordering (needed to detect previous events as non NA)
events <- events[ order(events$time) ,]

# Updates
df$event = NA
for(i in 1:nrow(events)) {
    # Update where time is lesser than the limit, if names correspond and if an event was not already assigned to the row
    df[ df$time <= events[i,"time"] & df$name == events[i,"name"] & is.na(df$event) , "event" ] = events[i,"event"]
}
#需要类型强制(用于比较的日期、避免“因子”问题的字符)

events$time这里有一个函数可以执行您想要的操作:

event.aligning <- function(time.dataframe, events){
  if(!class(events[["time"]]) == 'Date'){
    events[["time"]] <- as.Date(events[["time"]])
  }
  ## lets sort on time
  events <- events[order(events[["time"]]),]

  ## setup event column
  time.dataframe$event <- NA 
  time.dataframe$event <- as.factor(time.dataframe$event)
  levels(time.dataframe$event) <- event.types
  rownames.tdf <- rownames(time.dataframe)

  res.time.dataframe <- NULL
  for( i in 1:length(levels(events$name))){
    i.name <- levels(events$name)[i]

    i.name.events <- subset(events, name == i.name)

    first.time <- time.dataframe$time[time.dataframe$name == i.name][1]
    first.event <- i.name.events$time[1]

    ## assume 2 events
    first.event.type <- i.name.events$event[1]
    second.event.type <- unique(i.name.events$event[i.name.events$event !=     first.event.type])

    event.types <- levels(i.name.events$event)

    sub.time.df <- time.dataframe[time.dataframe$name == i.name,]
    rownames(sub.time.df) <- 1:length(sub.time.df[,1])

    sub.time.df[1:(as.numeric(rownames(sub.time.df[sub.time.df$time == first.event,])) - 1),]$event <- second.event.type

    cur.event <- first.event
    for( j in 2:length(i.name.events[,1])){
      next.event <- i.name.events$time[j]
      sub.time.df[rownames( sub.time.df[ sub.time.df[["time"]] == cur.event,]) : 
        (as.numeric(rownames( sub.time.df[sub.time.df[["time"]] == next.event,])) - 1),]$event <- i.name.events$event[j-1]
      cur.event <- next.event
      next.event.type = i.name.events$event[j]
    }    
    last.time <- sub.time.df$time[length(sub.time.df$time)]
    last.event <- i.name.events$time[length(i.name.events$time)]   
    sub.time.df[rownames( sub.time.df[sub.time.df$time == last.event,]):length(sub.time.df$time),]$event <- next.event.type
    res.time.dataframe <- rbind(res.time.dataframe, sub.time.df)
  }
  rownames(res.time.dataframe) <- rownames.tdf
  return(res.time.dataframe)
}


df2 <- event.aligning(df, events)
event.aligning感谢您对我非常具体的问题的回答。不幸的是,它在event.aligning(df,events)中产生了
错误:找不到对象“event.types”
。然而,第二个答案产生了预期的结果。
event.aligning <- function(time.dataframe, events){
  if(!class(events[["time"]]) == 'Date'){
    events[["time"]] <- as.Date(events[["time"]])
  }
  ## lets sort on time
  events <- events[order(events[["time"]]),]

  ## setup event column
  time.dataframe$event <- NA 
  time.dataframe$event <- as.factor(time.dataframe$event)
  levels(time.dataframe$event) <- event.types
  rownames.tdf <- rownames(time.dataframe)

  res.time.dataframe <- NULL
  for( i in 1:length(levels(events$name))){
    i.name <- levels(events$name)[i]

    i.name.events <- subset(events, name == i.name)

    first.time <- time.dataframe$time[time.dataframe$name == i.name][1]
    first.event <- i.name.events$time[1]

    ## assume 2 events
    first.event.type <- i.name.events$event[1]
    second.event.type <- unique(i.name.events$event[i.name.events$event !=     first.event.type])

    event.types <- levels(i.name.events$event)

    sub.time.df <- time.dataframe[time.dataframe$name == i.name,]
    rownames(sub.time.df) <- 1:length(sub.time.df[,1])

    sub.time.df[1:(as.numeric(rownames(sub.time.df[sub.time.df$time == first.event,])) - 1),]$event <- second.event.type

    cur.event <- first.event
    for( j in 2:length(i.name.events[,1])){
      next.event <- i.name.events$time[j]
      sub.time.df[rownames( sub.time.df[ sub.time.df[["time"]] == cur.event,]) : 
        (as.numeric(rownames( sub.time.df[sub.time.df[["time"]] == next.event,])) - 1),]$event <- i.name.events$event[j-1]
      cur.event <- next.event
      next.event.type = i.name.events$event[j]
    }    
    last.time <- sub.time.df$time[length(sub.time.df$time)]
    last.event <- i.name.events$time[length(i.name.events$time)]   
    sub.time.df[rownames( sub.time.df[sub.time.df$time == last.event,]):length(sub.time.df$time),]$event <- next.event.type
    res.time.dataframe <- rbind(res.time.dataframe, sub.time.df)
  }
  rownames(res.time.dataframe) <- rownames.tdf
  return(res.time.dataframe)
}


df2 <- event.aligning(df, events)