基于因子条件在R中插入行
我正在尝试对值列运行变化率计算,但由于以下原因无法运行:基于因子条件在R中插入行,r,insert,row,conditional-statements,R,Insert,Row,Conditional Statements,我正在尝试对值列运行变化率计算,但由于以下原因无法运行: 由于“重置”,每次换油后都会丢失一行 我缺乏根据条件插入行的R知识 这是我的实际数据帧 Before <- data.frame( Engine_ID = as.factor(c(1006,1006,1006,1006,1006,1006,1006)), Oil_Change = as.factor(c(1,0,1,1,0,0,0)), Value = c(5,6,3,7,9,11,12) ) BeforedfBefo
Before <- data.frame(
Engine_ID = as.factor(c(1006,1006,1006,1006,1006,1006,1006)),
Oil_Change = as.factor(c(1,0,1,1,0,0,0)),
Value = c(5,6,3,7,9,11,12)
)
BeforedfBefore$order如果您了解您需要做什么,我认为可能有很多方法。下面是一个方法,我可以根据我理解你需要做什么的方式来做。这可能是完成任务最低效的方式:
library(dplyr); library(reshape2)
newChange <- mutate(Before, no = c(1:nrow(Before)),
changeRate = ifelse(as.numeric(as.character(Oil_Change)) > 0, 0,NA)) %>%
melt(., id=c('no', 'Engine_ID')) %>%
mutate(., no = ifelse(variable =='changeRate', no+0.5,no),
variable = ifelse(variable =='changeRate', 'Value', as.character(variable))) %>%
reshape(., direction ='wide', idvar = c('no', 'Engine_ID'), timevar = 'variable') %>%
arrange(no) %>% subset(., !(is.na(value.Oil_Change) & is.na(value.Value)))
names(newChange) <- gsub('value.', '', names(newChange))
newChange$no <- NULL
库(dplyr);图书馆(E2)
新更改(0,0,NA))%>%
熔体(,id=c('no','Engine_id'))%>%
变异(,no=ifelse(变量=='changeRate',no+0.5,no),
变量=ifelse(变量=='changeRate','Value',as.character(变量)))%>%
重塑(,direction='wide',idvar=c('no','Engine_ID'),timevar='variable')%>%
排列(否)%>%子集(,!(is.na(value.Oil_Change)和is.na(value.value)))
名称(换油)我不明白你是如何从之前和之后得到的,你用什么条件来决定在哪里插入这些新行?条件是:换油中的1表示将注入新油。所以数据显示,这个值从3到7。实际上是3比0比7。它只是没有在数据中正确地表示出来。因此,在每次换油后,我想直接插入一行额外的零。
df <- Before
# create a helper column
# which gives number of Oil_Change occurrence before the actual row
df$helper <- cumsum(as.integer(as.character(df$Oil_Change)))
# shift it, so that number changes AFTER the oilchange row
df$helper <- c(0, df$helper[1:(length(df$helper)-1)])
# split data frame by the helper row
dfl <- split(df, df$helper) # look at `dfl` content!
# construct to be added horizontal data row
to.be.added <- t(as.data.frame(c(1006, NA, 0, 0)))
# name it correctly
colnames(to.be.added) <- colnames(df)
rownames(to.be.added) <- 1
# add this list at the end of each sub-data frame
dfl.added <- lapply(dfl, function(df) rbind(df, to.be.added))
# join the sub data frames by rowbinding
res <- Reduce(rbind, dfl.added)
# properly name the rows
rownames(res) <- 1:nrow(res)
# remove helper column
res <- res[, -(ncol(res))]
# voila!
res # remove last line if you don't want it
Engine_ID Oil_Change Value
1 1006 1 5
2 1006 <NA> 0
3 1006 0 6
4 1006 1 3
5 1006 <NA> 0
6 1006 1 7
7 1006 <NA> 0
8 1006 0 9
9 1006 0 11
10 1006 0 12
11 1006 <NA> 0
Before$order <- 1:nrow(Before)
new <- Before[Before$Oil_Change == 1, ]
new$Oil_Change <- NA
new$Value <- 0
After <- rbind(Before, new)
After[order(After$order), ][ , -4]
Engine_ID Oil_Change Value
1 1006 1 5
11 1006 <NA> 0
2 1006 0 6
3 1006 1 3
31 1006 <NA> 0
4 1006 1 7
41 1006 <NA> 0
5 1006 0 9
6 1006 0 11
7 1006 0 12
library(dplyr); library(reshape2)
newChange <- mutate(Before, no = c(1:nrow(Before)),
changeRate = ifelse(as.numeric(as.character(Oil_Change)) > 0, 0,NA)) %>%
melt(., id=c('no', 'Engine_ID')) %>%
mutate(., no = ifelse(variable =='changeRate', no+0.5,no),
variable = ifelse(variable =='changeRate', 'Value', as.character(variable))) %>%
reshape(., direction ='wide', idvar = c('no', 'Engine_ID'), timevar = 'variable') %>%
arrange(no) %>% subset(., !(is.na(value.Oil_Change) & is.na(value.Value)))
names(newChange) <- gsub('value.', '', names(newChange))
newChange$no <- NULL