R 检查超过特定值的表格,并按相应id和标签计数超过相应阈值的次数
我有一个数据帧dfR 检查超过特定值的表格,并按相应id和标签计数超过相应阈值的次数,r,dataframe,R,Dataframe,我有一个数据帧df df <- data.frame(id =c(1,2,1,4,1,5,6), label=c("a","b", "a", "a","a", "e", "a"), color = c("g","a","g","g","a","a","a"), threshold = c(12, 10, 12, 12, 12, 35, 40),
df <- data.frame(id =c(1,2,1,4,1,5,6),
label=c("a","b", "a", "a","a", "e", "a"),
color = c("g","a","g","g","a","a","a"),
threshold = c(12, 10, 12, 12, 12, 35, 40),
value =c(32.1,0,15.0,10,1,50,45),stringsAsFactors = F
)
df阈值,1,0))%>%
分组依据(id,标签)%>%
总结(超过=总和(检查))
但是我没有得到相应的id,而是得到了超过的总数
只需对代码稍作修改:
df %>%
group_by(id, label) %>%
mutate(check = if_else(value > threshold, 1, 0)) %>%
summarise(exceed = sum(check)) %>%
group_by(id, label)
id label exceed
<dbl> <chr> <dbl>
1 1 a 2
2 2 b 0
3 4 a 0
4 5 e 1
5 6 a 1
df%>%
分组依据(id,标签)%>%
变异(检查=if_else(值>阈值,1,0))%>%
汇总(超出=总和(检查))%>%
分组人(id、标签)
id标签超过
1 a 2
2B0
3 4 a 0
4 5 e 1
5 6 a 1
要更紧密地匹配预期输出,请执行以下操作:
df %>%
group_by(id, label) %>%
mutate(exceed = sum(if_else(value > threshold, 1, 0))) %>%
group_by(id, label, color) %>%
filter(row_number() == 1)
id label color threshold value exceed
<dbl> <chr> <chr> <dbl> <dbl> <dbl>
1 1 a g 12 32.1 2
2 2 b a 10 0 0
3 4 a g 12 10 0
4 1 a a 12 1 2
5 5 e a 35 50 1
6 6 a a 40 45 1
df%>%
分组依据(id,标签)%>%
变异(超过=总和(如果值>阈值,1,0))%>%
分组依据(id、标签、颜色)%>%
过滤器(行数()==1)
id标签颜色阈值超过
1 a g 12 32.1 2
2 b a 10 0 0
3 4 a g 12 10 0
41A1212
5 e 35 50 1
6 a 40 45 1
代码中的一点更改
final_df <- df %>% mutate(check = if_else(value > threshold, 1, 0)) %>% group_by(id, label) %>% filter(check==1)
unique(final_df$id)
final_df%mutate(check=if_else(值>阈值,1,0))%%>%groupby(id,label)%%>%filter(check==1)
唯一(最终的_df$id)
仅使用基数R时,使用聚合
aggregate(seq.int(nrow(df)) ~ id + label, df, function(i) sum(df[i, 4] < df[i, 5]))
# id label seq.int(nrow(df))
#1 1 a 2
#2 4 a 0
#3 6 a 1
#4 2 b 0
#5 5 e 1
聚合(seq.int(nrow(df))~id+标签,df,函数(i)和(df[i,4]exceed <- seq.int(nrow(df))
agg <- aggregate(exceed ~ id + label, df, function(i) sum(df[i, 4] < df[i, 5]))
res <- merge(df[1:3], agg)
unique(res)
# id label color exceed
#1 1 a g 2
#3 1 a a 2
#4 2 b a 0
#5 4 a g 0
#6 5 e a 1
#7 6 a a 1
超过我们可以使用表
和合并
:
table_ <- table(subset(df,value>threshold, c("id","label")))
df2 <- merge(unique(df[c("id","label","color")]),table_,all.x=TRUE)
df2$Freq[is.na(df2$Freq)] <- 0
# id label color Freq
# 1 1 a g 2
# 2 1 a a 2
# 3 2 b a 0
# 4 4 a g 0
# 5 5 e a 1
# 6 6 a a 1
table_uuu阈值,c(“id”,“label”))
是的,是的。我现在觉得有点傻。
aggregate(seq.int(nrow(df)) ~ id + label, df, function(i) sum(df[i, 4] < df[i, 5]))
# id label seq.int(nrow(df))
#1 1 a 2
#2 4 a 0
#3 6 a 1
#4 2 b 0
#5 5 e 1
exceed <- seq.int(nrow(df))
agg <- aggregate(exceed ~ id + label, df, function(i) sum(df[i, 4] < df[i, 5]))
res <- merge(df[1:3], agg)
unique(res)
# id label color exceed
#1 1 a g 2
#3 1 a a 2
#4 2 b a 0
#5 4 a g 0
#6 5 e a 1
#7 6 a a 1
table_ <- table(subset(df,value>threshold, c("id","label")))
df2 <- merge(unique(df[c("id","label","color")]),table_,all.x=TRUE)
df2$Freq[is.na(df2$Freq)] <- 0
# id label color Freq
# 1 1 a g 2
# 2 1 a a 2
# 3 2 b a 0
# 4 4 a g 0
# 5 5 e a 1
# 6 6 a a 1