R 为大型数据库中的每个标识符添加一行0_R_Merge_Dataframe_Rbind

R 为大型数据库中的每个标识符添加一行0

r merge dataframe

R 为大型数据库中的每个标识符添加一行0,r,merge,dataframe,rbind,R,Merge,Dataframe,Rbind,我有一个数据库，其中缺少每个唯一标识符的第一行。基本上，我需要为每个唯一id添加一个由0组成的新行我的数据库是这样的（我有超过一百万行，所以循环基本上是不可能的）对于每个id，我需要在时间0处添加一行0 下面的代码行可以工作，但是，我的数据库占用了太多的时间 IdUnique = length(unique(dt$id)) VeK = vector('list', IdUnique) for(i in 1:IdUnique){ row0 = matrix(0, nrow = 1,

我有一个数据库，其中缺少每个唯一标识符的第一行。基本上，我需要为每个唯一id添加一个由0组成的新行

我的数据库是这样的（我有超过一百万行，所以循环基本上是不可能的）

对于每个id，我需要在时间0处添加一行0

下面的代码行可以工作，但是，我的数据库占用了太多的时间

IdUnique = length(unique(dt$id))
VeK = vector('list',  IdUnique)
for(i in 1:IdUnique){  
  row0 = matrix(0, nrow = 1, ncol = ncol(dt), dimnames = list(unique(dt$id)[i], colnames(dt)))
  VeK[[i]] = rbind(row0, subset(dt, id == unique(dt$id)[i]) )
  VeK[[i]][,'id'] <- unique(dt$id)[i]
}

dt2 <- do.call("rbind", VeK)

IdUnique=长度（唯一（dt$id））
VeK=向量（'list'，IdUnique）
对于（i in 1:IdUnique）{
row0=矩阵（0，nrow=1，ncol=ncol（dt），dimnames=list（唯一（dt$id）[i]，colnames（dt）））
VeK[[i]]=rbind（行0，子集（dt，id==unique（dt$id）[i]））
VeK[[i]][，'id']试试：
或：
其中：
#  id activity time
#1 A1        0    0
#2 A1       15    1
#3 A1       17    2
#4 A1       12    3
#5 B1        0    0
#6 B1        3    1
#7 B1        4    2
#8 B1       15    3

之后，如果要将活动
和时间
列转换为数字，可以添加：
... %>% mutate_each(funs(type.convert(as.character(.))), -id)

更新
如果在您的原始dt
中没有故意造成等级差异，那么会更容易一些：
dt <- data.frame(id = c(rep("A1", 3), rep("B1", 3)),
                 activity = c(15,17,12,3,4,15),
                 time = rep(1:3, 2))

library(dplyr)
dt %>% 
  group_by(id) %>% 
  summarise(activity = 0, time = 0) %>% 
  full_join(., dt) %>%
  arrange(id, time)  

dt%
分组依据（id）%>%
总结（活动=0，时间=0）%>%
全联接（，dt）%>%
安排（id、时间）
首先，我想你必须转换你的dt
，使活动
和时间
都属于int
类，而不是因子
：
dt[]<-lapply(dt,function(x) type.convert(as.character(x)))

dt=as.data.frame（rbind（c（'A1'，'15'，'1'），
c（'A1'，'17'，'2'），
c（'A1'，'12'，'3'），
c（'B1'，'3'，'1'），
c（'B1'，'4'，'2'），
c（'B1'，'15'，'3'）
))
colnames（dt）=c（'id'，'activity'，'time'））
#只是我们需要得到要绑定'zero'的'id'的级别
级别我没有使用OP中提供的dt
，而是我在上面发布的那一个。dt
非常奇怪，因为每一列都是factor
类型。我将添加几行来说明如何在“适当”中转换dt
格式。是的，我还编辑了之后添加了一个转换步骤。另一个选项是setDT（dt）[，.SD[c（.N+1），1:N）]，id][is.na（activity），2:3:=0L][假设类是数字谢谢你的回答。你能告诉我%>%号吗？@giacomoV很高兴它有帮助！看看
#  id activity time
#1 A1        0    0
#2 A1       15    1
#3 A1       17    2
#4 A1       12    3
#5 B1        0    0
#6 B1        3    1
#7 B1        4    2
#8 B1       15    3

... %>% mutate_each(funs(type.convert(as.character(.))), -id)

dt <- data.frame(id = c(rep("A1", 3), rep("B1", 3)),
                 activity = c(15,17,12,3,4,15),
                 time = rep(1:3, 2))

library(dplyr)
dt %>% 
  group_by(id) %>% 
  summarise(activity = 0, time = 0) %>% 
  full_join(., dt) %>%
  arrange(id, time)  

dt[]<-lapply(dt,function(x) type.convert(as.character(x)))

require(data.table)
DT<-as.data.table(dt)
DT[,lapply(.SD,function(x) c(0,x)),by=id]

dt = as.data.frame( rbind(c('A1', '15', '1'), 
                          c('A1', '17', '2'), 
                          c('A1', '12', '3'), 
                          c('B1', '3', '1'), 
                          c('B1', '4', '2'), 
                          c('B1', '15', '3')
                          ))

colnames(dt) = c('id', 'activity', 'time')
#Just we need to get the levels of `id` we want to bind `zeros` to
levels <- levels(dt$id)

#create a new matrix of new data we need to append to our data frame `dt`
levels_M <- cbind(id = levels , activity =  '0' , time = '0')

#then simply bind these values to the data frame
rbind(dt , levels_M)

#if you want to order the final results

dt <- dt[order(dt$id),]