R 基于条件或序列合并行中的数据
我有以下数据:R 基于条件或序列合并行中的数据,r,merge,conditional,sequence,R,Merge,Conditional,Sequence,我有以下数据: Data <- data.frame(Project=c(123,123,123,123,123,123,124,124,124,124,124,125,125,125,126,126), Value=c(1,4,7,3,8,9,8,3,2,5,6,2,2,1,8,3), OldValue=c("","Open","In Progress","Complete","Open","In Pro
Data <- data.frame(Project=c(123,123,123,123,123,123,124,124,124,124,124,125,125,125,126,126),
Value=c(1,4,7,3,8,9,8,3,2,5,6,2,2,1,8,3),
OldValue=c("","Open","In Progress","Complete","Open","In Progress","Complete","Open","In Progress","System Declined","In Progress","","Open","In Progress","In Progress",""),
NewValue=c("Open","In Progress","Complete","Open","In Progress","Complete","Open","In Progress","System Declined","In Progress","Complete","Open","In Progress","Complete","","In Progress"))
Data$First <- ifelse(((Data$OldValue==""|Data$OldValue=="Complete"|Data$OldValue=="System Declined")&Data$NewValue=="Open"),Data$Value,NA)
Data$Second <- ifelse(((Data$OldValue=="Open"|Data$OldValue=="Complete"|Data$OldValue=="System Declined")&Data$NewValue=="In Progress"),Data$Value,NA)
Data$Third <- ifelse(((Data$NewValue=="Complete"|Data$NewValue=="System Declined")&Data$OldValue=="In Progress"),Data$Value,NA)
数据使用dplyr的解决方案
:
library(dplyr)
Data %>% group_by(Project) %>%
mutate(
fl = as.numeric(NewValue),
flag = paste(lag(fl, 2, default = 0),
lag(fl, 1, default = 0),
fl, sep = ''),
merge = paste(lag(Value, 2, default = 0),
lag(Value, 1, default = 0),
Value, sep = ',')
) %>%
filter(flag == '321' | flag == '324') %>%
select(Project, merge)
# Project merge
# <dbl> <chr>
# 1 123 1,4,7
# 2 123 3,8,9
# 3 124 8,3,2
# 4 125 2,2,1
库(dplyr)
数据%>%按(项目)分组%>%
变异(
fl=作为数值(新值),
标志=粘贴(滞后(fl,2,默认值=0),
滞后(fl,1,默认值=0),
佛罗里达州,九月=“”),
合并=粘贴(滞后(值,2,默认值=0),
滞后(值为1,默认值为0),
值,sep=',')
) %>%
过滤器(标志=='321'|标志=='324')%>%
选择(项目、合并)
#项目合并
#
# 1 123 1,4,7
# 2 123 3,8,9
# 3 124 8,3,2
# 4 125 2,2,1
使用dplyr的解决方案
:
library(dplyr)
Data %>% group_by(Project) %>%
mutate(
fl = as.numeric(NewValue),
flag = paste(lag(fl, 2, default = 0),
lag(fl, 1, default = 0),
fl, sep = ''),
merge = paste(lag(Value, 2, default = 0),
lag(Value, 1, default = 0),
Value, sep = ',')
) %>%
filter(flag == '321' | flag == '324') %>%
select(Project, merge)
# Project merge
# <dbl> <chr>
# 1 123 1,4,7
# 2 123 3,8,9
# 3 124 8,3,2
# 4 125 2,2,1
库(dplyr)
数据%>%按(项目)分组%>%
变异(
fl=作为数值(新值),
标志=粘贴(滞后(fl,2,默认值=0),
滞后(fl,1,默认值=0),
佛罗里达州,九月=“”),
合并=粘贴(滞后(值,2,默认值=0),
滞后(值为1,默认值为0),
值,sep=',')
) %>%
过滤器(标志=='321'|标志=='324')%>%
选择(项目、合并)
#项目合并
#
# 1 123 1,4,7
# 2 123 3,8,9
# 3 124 8,3,2
# 4 125 2,2,1
这是实现目标的一种方法。我想创建一个数字序列,它可以表示您指定的两种模式(即,打开进行中完成模式和打开进行中系统拒绝模式)。因此,我使用fct\u collapse()
将因子级别压缩为三个。然后,我将新的因子级别转换为数值。然后,我想在每个项目中创建子组,这是我在第二个mutate()
中完成的。下一个任务是更改第一个
、第二个
和第三个
中元素的顺序。你想把数字排成一行。所以我使用了sort()
。应用此操作有一个条件,即相同(检查[1:3],与.numeric(1:3)]
。如果您有这两种模式中的任何一种,则在检查中应该有1、2、3的序列。对每个组使用此逻辑检查。只要满足此逻辑条件,sort()
将应用于由Project
和group
定义的每组中的三列。最后,我删除了整个操作中使用的检查
和组
library(dplyr)
library(forcats)
Data %>%
mutate(check = as.numeric(
as.character(fct_collapse(NewValue,
`1` = "Open",
`2` = "In Progress",
`3` = c("Complete", "System Declined"))))) %>%
group_by(Project) %>%
mutate(group = cumsum(c(TRUE, diff(check) != 1))) %>%
group_by(Project, group) %>%
mutate_at(vars(First:Third),
funs(if(identical(check[1:3], as.numeric(1:3))){
sort(., na.last = TRUE)} else{.}
)) %>%
select(-check, -group)
# group Project Value OldValue NewValue First Second Third
# <int> <dbl> <dbl> <fctr> <fctr> <dbl> <dbl> <dbl>
#1 1 123 1 Open 1 4 7
#2 1 123 4 Open In Progress NA NA NA
#3 1 123 7 In Progress Complete NA NA NA
#4 2 123 3 Complete Open 3 8 9
#5 2 123 8 Open In Progress NA NA NA
#6 2 123 9 In Progress Complete NA NA NA
#7 1 124 8 Complete Open 8 3 2
#8 1 124 3 Open In Progress NA NA NA
#9 1 124 2 In Progress System Declined NA NA NA
#10 2 124 5 System Declined In Progress NA 5 NA
#11 2 124 6 In Progress Complete NA NA 6
#12 1 125 2 Open 2 2 1
#13 1 125 2 Open In Progress NA NA NA
#14 1 125 1 In Progress Complete NA NA NA
库(dplyr)
图书馆(供猫用)
数据%>%
变异(检查=as.numeric(
as.character(fct_)折叠(NewValue,
`1`=“打开”,
`2“正在进行中”,
`3`=c(“完成”、“系统已谢绝”)%>%
分组单位(项目)%>%
变异(组=总和(c(真,差异(检查)!=1))%>%
组别(项目、组别)%>%
变异_at(变量(第一:第三),
funs(如果相同(检查[1:3],作为数字(1:3))){
排序(,na.last=TRUE)}else{.}
)) %>%
选择(-check,-group)
#组项目值旧值新值第一第二第三
#
#111231打开1417
#2 1 123 4未结未结未结未结未结未结
#3 1 123 7进行中完成不适用不适用不适用
#4 2 123 3完全打开3 8 9
#5 2 123 8未结未结未结未结未结未结
#6 2 123 9正在进行中完成不适用不适用不适用
#7 1 124 8完全打开8 3 2
#8 1 124 3未结未结未结未结未结未结
#9 1 124 2在建系统拒绝不适用
#10 2 124 5系统正在下降NA 5 NA
#11 2 124 6正在进行中完成NA 6
#12 1 125 2打开2 2 1
#13 1 125 2未结未结未结未结未结未结
#14 1 125 1正在进行中完成不适用不适用不适用
这是实现目标的一种方法。我想创建一个数字序列,它可以表示您指定的两种模式(即,打开进行中完成模式和打开进行中系统拒绝模式)。因此,我使用fct\u collapse()
将因子级别压缩为三个。然后,我将新的因子级别转换为数值。然后,我想在每个项目中创建子组,这是我在第二个mutate()
中完成的。下一个任务是更改第一个
、第二个
和第三个
中元素的顺序。你想把数字排成一行。所以我使用了sort()
。应用此操作有一个条件,即相同(检查[1:3],与.numeric(1:3)]
。如果您有这两种模式中的任何一种,则在检查中应该有1、2、3的序列。对每个组使用此逻辑检查。只要满足此逻辑条件,sort()
将应用于由Project
和group
定义的每组中的三列。最后,我删除了整个操作中使用的检查
和组
library(dplyr)
library(forcats)
Data %>%
mutate(check = as.numeric(
as.character(fct_collapse(NewValue,
`1` = "Open",
`2` = "In Progress",
`3` = c("Complete", "System Declined"))))) %>%
group_by(Project) %>%
mutate(group = cumsum(c(TRUE, diff(check) != 1))) %>%
group_by(Project, group) %>%
mutate_at(vars(First:Third),
funs(if(identical(check[1:3], as.numeric(1:3))){
sort(., na.last = TRUE)} else{.}
)) %>%
select(-check, -group)
# group Project Value OldValue NewValue First Second Third
# <int> <dbl> <dbl> <fctr> <fctr> <dbl> <dbl> <dbl>
#1 1 123 1 Open 1 4 7
#2 1 123 4 Open In Progress NA NA NA
#3 1 123 7 In Progress Complete NA NA NA
#4 2 123 3 Complete Open 3 8 9
#5 2 123 8 Open In Progress NA NA NA
#6 2 123 9 In Progress Complete NA NA NA
#7 1 124 8 Complete Open 8 3 2
#8 1 124 3 Open In Progress NA NA NA
#9 1 124 2 In Progress System Declined NA NA NA
#10 2 124 5 System Declined In Progress NA 5 NA
#11 2 124 6 In Progress Complete NA NA 6
#12 1 125 2 Open 2 2 1
#13 1 125 2 Open In Progress NA NA NA
#14 1 125 1 In Progress Complete NA NA NA
库(dplyr)
图书馆(供猫用)
数据%>%
变异(检查=as.numeric(
as.character(fct_)折叠(NewValue,
`1`=“打开”,
`2“正在进行中”,
`3`=c(“完成”、“系统已谢绝”)%>%
分组单位(项目)%>%
变异(组=总和(c(真,差异(检查)!=1))%>%
组别(项目、组别)%>%
变异_at(变量(第一:第三),
funs(如果相同)(检查