在R中是否有方法在定义的连续行上生成ifelse?
如果我有:在R中是否有方法在定义的连续行上生成ifelse?,r,R,如果我有: df<-data.frame(group=c(1, 1,1, 1,1, 2, 2, 2, 4,4,4,4), value=c("A","B","C","B","A","A","A","B","D","A","A","B")) 跟进:我还想看看3的每次扫描是否都包含一个值,从组中的第一行开始,然后是第二组等(即第1组扫描ABC、BCB、CBA、第2组扫描AAB和第4组扫描DAA、AAB)(ty akrun): 我们可以在% library(dply
df<-data.frame(group=c(1, 1,1, 1,1, 2, 2, 2, 4,4,4,4),
value=c("A","B","C","B","A","A","A","B","D","A","A","B"))
跟进:我还想看看3的每次扫描是否都包含一个值,从组中的第一行开始,然后是第二组等(即第1组扫描ABC、BCB、CBA、第2组扫描AAB和第4组扫描DAA、AAB)(ty akrun):
我们可以在%
library(dplyr)
df %>%
group_by(group) %>%
mutate(want_any_c = c('no', 'yes')[('C' %in% value) + 1],
want_any_b = c('no', 'yes')[('B' %in% value) + 1])
# A tibble: 12 x 4
# Groups: group [3]
# group value want_any_c want_any_b
# <dbl> <fct> <chr> <chr>
# 1 1 A yes yes
# 2 1 B yes yes
# 3 1 C yes yes
# 4 1 B yes yes
# 5 1 A yes yes
# 6 2 A no yes
# 7 2 A no yes
# 8 2 B no yes
# 9 4 D no yes
#10 4 A no yes
#11 4 A no yes
#12 4 B no yes
因为它是在多个值上完成的,所以函数会更有用
f1 <- function(colNm, val){
c('no', 'yes')[(val %in% {{colNm}}) + 1]
}
f2 <- function(colNm, val){
c('no', 'yes')[(all(rollapply({{colNm}}, 3,
FUN = function(x) val %in% x))) + 1]
}
df %>%
group_by(group) %>%
mutate(want_any_c = f1(value, "C"),
want_any_b = f1(value, "B"),
want_every_c = f2(value, "C"),
want_every_b = f2(value, "B"))
f1%
突变(想要任何c=f1(值,“c”),
想要_any_b=f1(值,“b”),
希望每一个c=f2(值,“c”),
想要每一个_b=f2(值“b”))
我们可以在%
library(dplyr)
df %>%
group_by(group) %>%
mutate(want_any_c = c('no', 'yes')[('C' %in% value) + 1],
want_any_b = c('no', 'yes')[('B' %in% value) + 1])
# A tibble: 12 x 4
# Groups: group [3]
# group value want_any_c want_any_b
# <dbl> <fct> <chr> <chr>
# 1 1 A yes yes
# 2 1 B yes yes
# 3 1 C yes yes
# 4 1 B yes yes
# 5 1 A yes yes
# 6 2 A no yes
# 7 2 A no yes
# 8 2 B no yes
# 9 4 D no yes
#10 4 A no yes
#11 4 A no yes
#12 4 B no yes
因为它是在多个值上完成的,所以函数会更有用
f1 <- function(colNm, val){
c('no', 'yes')[(val %in% {{colNm}}) + 1]
}
f2 <- function(colNm, val){
c('no', 'yes')[(all(rollapply({{colNm}}, 3,
FUN = function(x) val %in% x))) + 1]
}
df %>%
group_by(group) %>%
mutate(want_any_c = f1(value, "C"),
want_any_b = f1(value, "B"),
want_every_c = f2(value, "C"),
want_every_b = f2(value, "B"))
f1%
突变(想要任何c=f1(值,“c”),
想要_any_b=f1(值,“b”),
希望每一个c=f2(值,“c”),
想要每一个_b=f2(值“b”))
这是一个data.table解决方案
library(zoo)
library(data.table)
setDT(df)
to_check <- c('C', 'B')
df[, paste0('want_any_', to_check) := lapply(to_check, '%in%', value),
by = group]
df[, paste0('want_every_', to_check) :=
lapply(to_check, function(x) all(rollapply(value, 3, '%in%', x = x))),
by = group]
df
# group value want_any_C want_any_B want_every_C want_every_B
# 1: 1 A TRUE TRUE TRUE TRUE
# 2: 1 B TRUE TRUE TRUE TRUE
# 3: 1 C TRUE TRUE TRUE TRUE
# 4: 1 B TRUE TRUE TRUE TRUE
# 5: 1 A TRUE TRUE TRUE TRUE
# 6: 2 A FALSE TRUE FALSE TRUE
# 7: 2 A FALSE TRUE FALSE TRUE
# 8: 2 B FALSE TRUE FALSE TRUE
# 9: 4 D FALSE TRUE FALSE FALSE
# 10: 4 A FALSE TRUE FALSE FALSE
# 11: 4 A FALSE TRUE FALSE FALSE
# 12: 4 B FALSE TRUE FALSE FALSE
图书馆(动物园)
库(数据表)
setDT(df)
要检查这里有一个data.table解决方案
library(zoo)
library(data.table)
setDT(df)
to_check <- c('C', 'B')
df[, paste0('want_any_', to_check) := lapply(to_check, '%in%', value),
by = group]
df[, paste0('want_every_', to_check) :=
lapply(to_check, function(x) all(rollapply(value, 3, '%in%', x = x))),
by = group]
df
# group value want_any_C want_any_B want_every_C want_every_B
# 1: 1 A TRUE TRUE TRUE TRUE
# 2: 1 B TRUE TRUE TRUE TRUE
# 3: 1 C TRUE TRUE TRUE TRUE
# 4: 1 B TRUE TRUE TRUE TRUE
# 5: 1 A TRUE TRUE TRUE TRUE
# 6: 2 A FALSE TRUE FALSE TRUE
# 7: 2 A FALSE TRUE FALSE TRUE
# 8: 2 B FALSE TRUE FALSE TRUE
# 9: 4 D FALSE TRUE FALSE FALSE
# 10: 4 A FALSE TRUE FALSE FALSE
# 11: 4 A FALSE TRUE FALSE FALSE
# 12: 4 B FALSE TRUE FALSE FALSE
图书馆(动物园)
库(数据表)
setDT(df)
要检查这里有一个基本的R解决方案,您首先定义函数want
,如下所示
want <- function(v,key,f) {
u <- sapply(seq(length(v)-2),function(k) key %in% v[k+0:2])
switch (f,
"any" = rep(ifelse(any(u),"Yes","No"),length(v)),
"every" = rep(ifelse(all(u),"Yes","No"),length(v))
)
}
下面是一个基本的R解决方案,首先定义函数want
,如下所示
want <- function(v,key,f) {
u <- sapply(seq(length(v)-2),function(k) key %in% v[k+0:2])
switch (f,
"any" = rep(ifelse(any(u),"Yes","No"),length(v)),
"every" = rep(ifelse(all(u),"Yes","No"),length(v))
)
}
以R为基数,但不要求将单个值硬编码为向量,并进行匹配等:
# Create a group of each grouping var every three rows:
n = 3
df$group2 <- paste0(df$group,
" - ",
ave(rep(1:n, ceiling(nrow(df)/n)),
rep(1:n, ceiling(nrow(df)/n)),
FUN = seq.int)[1:nrow(df)])
# Row-wise concatenate the unique values per group:
values_by_group <- aggregate(value~group2, df, FUN =
function(x){
paste0(unique(sort(x)),
collapse = ", ")})
# Add a vector per each unique value in df's value vector:
values_by_group <- cbind(values_by_group,
setNames(data.frame(matrix(NA, nrow = nrow(values_by_group),
ncol = length(unique(df$value)))),
c(unique(sapply(df$value, as.character)))))
# Store a vector of indices of values_by_group table
# matching the values in the original dataframe:
vec_idx <- names(values_by_group) %in% unique(sapply(df$value, as.character))
# Match vector names with values in value vector:
values_by_group[,vec_idx] <-
t(vapply(strsplit(as.character(values_by_group$value), ', '),
function(x){
names(values_by_group)[c(vec_idx)] %in% x
},
logical(ncol(values_by_group)-sum(!(vec_idx)))
)
)
# Merge with the original dataframe, drop unwanted grouping vec:
final_df <- within(merge(df,
values_by_group[,names(values_by_group) != "value"],
by = "group2",
all.x = TRUE), rm("group2"))
#每三行为每个分组变量创建一个组:
n=3
df$group2Base R,但不要求将单个值硬编码为向量,并匹配它们,等等:
# Create a group of each grouping var every three rows:
n = 3
df$group2 <- paste0(df$group,
" - ",
ave(rep(1:n, ceiling(nrow(df)/n)),
rep(1:n, ceiling(nrow(df)/n)),
FUN = seq.int)[1:nrow(df)])
# Row-wise concatenate the unique values per group:
values_by_group <- aggregate(value~group2, df, FUN =
function(x){
paste0(unique(sort(x)),
collapse = ", ")})
# Add a vector per each unique value in df's value vector:
values_by_group <- cbind(values_by_group,
setNames(data.frame(matrix(NA, nrow = nrow(values_by_group),
ncol = length(unique(df$value)))),
c(unique(sapply(df$value, as.character)))))
# Store a vector of indices of values_by_group table
# matching the values in the original dataframe:
vec_idx <- names(values_by_group) %in% unique(sapply(df$value, as.character))
# Match vector names with values in value vector:
values_by_group[,vec_idx] <-
t(vapply(strsplit(as.character(values_by_group$value), ', '),
function(x){
names(values_by_group)[c(vec_idx)] %in% x
},
logical(ncol(values_by_group)-sum(!(vec_idx)))
)
)
# Merge with the original dataframe, drop unwanted grouping vec:
final_df <- within(merge(df,
values_by_group[,names(values_by_group) != "value"],
by = "group2",
all.x = TRUE), rm("group2"))
#每三行为每个分组变量创建一个组:
n=3
df$group 2你能检查“want_every_c”和“want_every_b”列的值是否正确吗是的,第4组want_every_b是DAA然后AAB所以不是每个扫描都有b。与第2组中的AAB扫描相同,(没有c)如果我做滚动值,它将是a b c
,b
,c b a
,b a
,b a,A
,因此只有第1组的前3行具有“C”@akrun ABC、BCB、CBA,但不需要BAA(A是第2组的一部分)和AAA(AA是第2组的一部分),因为这些值开始进入第2组2@akrun是的,只是因为我想说“第一组有三个带C的扫描”。所以我希望want标签始终是yes或no(组内所有需要的扫描都有C吗?如果是,则组=yes,如果否,则组=no)您可以检查“want_every_C”和“want_every_b”列值是否正确是的,组4 want_every__b是DAA然后AAB,因此不是每个扫描都有b。与组2中的AAB扫描相同,(没有C)如果我做一个滚动值,它将是ABC
,BCB
,CBA
,aba
,AAA
,因此只有组1的前3行有“C”@akrunabc,BCB,CBA是需要的,而不是BAA(a是组2的一部分)和AAA(AA是组2的一部分)因为这些值开始进入组中2@akrun是的,只是因为我想说“第一组有三个带C的扫描”。因此,我希望标签始终为“是”或“否”(组内所有需要的扫描都有C吗?如果是,则组=是,如果否,则组=否)@biostatguy12。您可以在rollappy
中添加partial=TRUE
,而不是ifelse
,只需if/else
,因为我们只检查行数。我在上面评论了syntax@biostatguy12尝试df%>%groupby(group)%>%mutate(想要任何c=f1(值,“c”)、想要任何b=f1(值,“b”)、想要每c=if(n()>=3)f2(值,“c”)否则想要任何c、想要每b=f2(值,“b”))
同样适用于每种需求_b@biostatguy12%
中的%返回一个逻辑输出,即TRUE/FALSE。通过添加1,真/假被强制为整数,并将值更改为2/1,我们使用该值作为索引来替换c(“否”、“是”)
@biostatguy12中的值。“f2”函数对宽度为3的相关列执行rollapply
,创建一个逻辑向量(val%x
),检查all
是否为真,将其更改为数字索引(+1
),并将替换为c('no','yes')
。我们按“组”进行分组,如果行数大于或等于3,则通过条件创建列,然后应用“f2”,否则只返回“否”@biostatguy12。您可以在rollappy
中添加partial=TRUE
,而不是ifelse
,只需if/else
,因为我们只检查行数。我在上面评论了syntax@biostatguy12尝试df%>%groupby(group)%>%mutate(想要任何c=f1(值,“c”)、想要任何b=f1(值,“b”)、想要每c=if(n()>=3)f2(值,“c”)否则想要任何c、想要每b=f2(值,“b”))
同样适用于每种需求_b@biostatguy12%
中的%返回一个逻辑输出,即TRUE/FALSE。通过添加1,真/假被强制为整数,并将值更改为2/1,我们使用该值作为索引来替换c(“否”、“是”)
@biostatguy12中的值。“f2”函数对宽度为3的相关列执行rollapply
,创建一个逻辑向量(val%x
),检查all
是否为真,将其更改为数字索引(+1
),并将替换为c('no','yes')
。我们按“group”进行分组,并通过条件创建一列,如果行数大于或等于3,则应用“f2”,否则只返回“no”
> dfout
group value want_any_c want_any_b want_every_c want_every_b
1 1 A Yes Yes Yes Yes
2 1 B Yes Yes Yes Yes
3 1 C Yes Yes Yes Yes
4 1 B Yes Yes Yes Yes
5 1 A Yes Yes Yes Yes
6 2 A No Yes No Yes
7 2 A No Yes No Yes
8 2 B No Yes No Yes
9 4 D No Yes No No
10 4 A No Yes No No
11 4 A No Yes No No
12 4 B No Yes No No
# Create a group of each grouping var every three rows:
n = 3
df$group2 <- paste0(df$group,
" - ",
ave(rep(1:n, ceiling(nrow(df)/n)),
rep(1:n, ceiling(nrow(df)/n)),
FUN = seq.int)[1:nrow(df)])
# Row-wise concatenate the unique values per group:
values_by_group <- aggregate(value~group2, df, FUN =
function(x){
paste0(unique(sort(x)),
collapse = ", ")})
# Add a vector per each unique value in df's value vector:
values_by_group <- cbind(values_by_group,
setNames(data.frame(matrix(NA, nrow = nrow(values_by_group),
ncol = length(unique(df$value)))),
c(unique(sapply(df$value, as.character)))))
# Store a vector of indices of values_by_group table
# matching the values in the original dataframe:
vec_idx <- names(values_by_group) %in% unique(sapply(df$value, as.character))
# Match vector names with values in value vector:
values_by_group[,vec_idx] <-
t(vapply(strsplit(as.character(values_by_group$value), ', '),
function(x){
names(values_by_group)[c(vec_idx)] %in% x
},
logical(ncol(values_by_group)-sum(!(vec_idx)))
)
)
# Merge with the original dataframe, drop unwanted grouping vec:
final_df <- within(merge(df,
values_by_group[,names(values_by_group) != "value"],
by = "group2",
all.x = TRUE), rm("group2"))
df <- data.frame(group = c(1, 1,1, 1,1, 2, 2, 2, 4,4,4,4),
value = c("A","B","C","B","A","A","A","B","D","A","A","B"))