R 将数据分配给事件间隔不均匀的日期向量
我很抱歉这个神秘的标题,但我不知道如何充分总结我的问题。这是我的问题。我有一个带有日期的数据框和几个实体的名称:R 将数据分配给事件间隔不均匀的日期向量,r,R,我很抱歉这个神秘的标题,但我不知道如何充分总结我的问题。这是我的问题。我有一个带有日期的数据框和几个实体的名称: df <- data.frame( time=rep(as.Date(seq(as.Date("2004/1/1"), as.Date("2005/12/1"), by = "1 month ")),2), name=c(rep("a",24),rep("b",24)) ) str(df) 'data.frame': 48 obs
df <- data.frame(
time=rep(as.Date(seq(as.Date("2004/1/1"), as.Date("2005/12/1"), by = "1 month ")),2),
name=c(rep("a",24),rep("b",24))
)
str(df)
'data.frame': 48 obs. of 2 variables:
$ time: Date, format: "2004-01-01" "2004-02-01" ...
$ name: Factor w/ 2 levels "a","b": 1 1 1 1 1 1 1 1 1 1 ...
我希望以一种方式合并这两个数据帧,事件
从数据集开始分配到事件,或从最后一个事件开始分配到下一个事件或数据集结束。这看起来像:
date name event
2004-01-01 a normal
2004-01-02 a normal
...
2004-12-01 a extraordinary
2005-01-01 a extraordinary
在
R
中是否有一种我看不到的简单方法,或者我是否手动合并这些方法?非常感谢你的帮助 我不知道有什么函数可以做到这一点,但这里有一些R代码可以自己做到:
# Needed type coercions (Date for comparisons, characters to avoid 'factor' problems)
events$time <- as.Date(events$time)
events$event <- as.character(events$event)
events$name <- as.character(events$name)
df$name <- as.character(df$name)
# Events ordering (needed to detect previous events as non NA)
events <- events[ order(events$time) ,]
# Updates
df$event = NA
for(i in 1:nrow(events)) {
# Update where time is lesser than the limit, if names correspond and if an event was not already assigned to the row
df[ df$time <= events[i,"time"] & df$name == events[i,"name"] & is.na(df$event) , "event" ] = events[i,"event"]
}
#需要类型强制(用于比较的日期、避免“因子”问题的字符)
events$time这里有一个函数可以执行您想要的操作:
event.aligning <- function(time.dataframe, events){
if(!class(events[["time"]]) == 'Date'){
events[["time"]] <- as.Date(events[["time"]])
}
## lets sort on time
events <- events[order(events[["time"]]),]
## setup event column
time.dataframe$event <- NA
time.dataframe$event <- as.factor(time.dataframe$event)
levels(time.dataframe$event) <- event.types
rownames.tdf <- rownames(time.dataframe)
res.time.dataframe <- NULL
for( i in 1:length(levels(events$name))){
i.name <- levels(events$name)[i]
i.name.events <- subset(events, name == i.name)
first.time <- time.dataframe$time[time.dataframe$name == i.name][1]
first.event <- i.name.events$time[1]
## assume 2 events
first.event.type <- i.name.events$event[1]
second.event.type <- unique(i.name.events$event[i.name.events$event != first.event.type])
event.types <- levels(i.name.events$event)
sub.time.df <- time.dataframe[time.dataframe$name == i.name,]
rownames(sub.time.df) <- 1:length(sub.time.df[,1])
sub.time.df[1:(as.numeric(rownames(sub.time.df[sub.time.df$time == first.event,])) - 1),]$event <- second.event.type
cur.event <- first.event
for( j in 2:length(i.name.events[,1])){
next.event <- i.name.events$time[j]
sub.time.df[rownames( sub.time.df[ sub.time.df[["time"]] == cur.event,]) :
(as.numeric(rownames( sub.time.df[sub.time.df[["time"]] == next.event,])) - 1),]$event <- i.name.events$event[j-1]
cur.event <- next.event
next.event.type = i.name.events$event[j]
}
last.time <- sub.time.df$time[length(sub.time.df$time)]
last.event <- i.name.events$time[length(i.name.events$time)]
sub.time.df[rownames( sub.time.df[sub.time.df$time == last.event,]):length(sub.time.df$time),]$event <- next.event.type
res.time.dataframe <- rbind(res.time.dataframe, sub.time.df)
}
rownames(res.time.dataframe) <- rownames.tdf
return(res.time.dataframe)
}
df2 <- event.aligning(df, events)
event.aligning感谢您对我非常具体的问题的回答。不幸的是,它在event.aligning(df,events)中产生了错误:找不到对象“event.types”
。然而,第二个答案产生了预期的结果。
event.aligning <- function(time.dataframe, events){
if(!class(events[["time"]]) == 'Date'){
events[["time"]] <- as.Date(events[["time"]])
}
## lets sort on time
events <- events[order(events[["time"]]),]
## setup event column
time.dataframe$event <- NA
time.dataframe$event <- as.factor(time.dataframe$event)
levels(time.dataframe$event) <- event.types
rownames.tdf <- rownames(time.dataframe)
res.time.dataframe <- NULL
for( i in 1:length(levels(events$name))){
i.name <- levels(events$name)[i]
i.name.events <- subset(events, name == i.name)
first.time <- time.dataframe$time[time.dataframe$name == i.name][1]
first.event <- i.name.events$time[1]
## assume 2 events
first.event.type <- i.name.events$event[1]
second.event.type <- unique(i.name.events$event[i.name.events$event != first.event.type])
event.types <- levels(i.name.events$event)
sub.time.df <- time.dataframe[time.dataframe$name == i.name,]
rownames(sub.time.df) <- 1:length(sub.time.df[,1])
sub.time.df[1:(as.numeric(rownames(sub.time.df[sub.time.df$time == first.event,])) - 1),]$event <- second.event.type
cur.event <- first.event
for( j in 2:length(i.name.events[,1])){
next.event <- i.name.events$time[j]
sub.time.df[rownames( sub.time.df[ sub.time.df[["time"]] == cur.event,]) :
(as.numeric(rownames( sub.time.df[sub.time.df[["time"]] == next.event,])) - 1),]$event <- i.name.events$event[j-1]
cur.event <- next.event
next.event.type = i.name.events$event[j]
}
last.time <- sub.time.df$time[length(sub.time.df$time)]
last.event <- i.name.events$time[length(i.name.events$time)]
sub.time.df[rownames( sub.time.df[sub.time.df$time == last.event,]):length(sub.time.df$time),]$event <- next.event.type
res.time.dataframe <- rbind(res.time.dataframe, sub.time.df)
}
rownames(res.time.dataframe) <- rownames.tdf
return(res.time.dataframe)
}
df2 <- event.aligning(df, events)