R 按连续条件筛选行-按分组_R_Group By

R 按连续条件筛选行-按分组

R 按连续条件筛选行-按分组,r,group-by,R,Group By,我有以下数据框： df.temp <- tribble( ~ Case, ~ Operator, ~ Interval, ~ Value, "A","John","01-Blue",10, "A", "Cage","01-Blue",10, "A","John","02-Red",20, "A","Cage","03-Yellow",30,

我有以下数据框：

df.temp <- tribble(
        ~ Case,
        ~ Operator,
        ~ Interval,
        ~ Value,

        "A","John","01-Blue",10,
        "A", "Cage","01-Blue",10,

        "A","John","02-Red",20,
        "A","Cage","03-Yellow",30,

        "A","John","04-Grey",37,
        "A","Cage","04-Grey",40,

        "A","Cage","05-Brown",55,
        "A","John","05-Brown",60,

        "A","Cage","05-Black",60,
        "A","Cage","05-Black",65,

        "B","John","01-Blue",10,

        "B","Cage","02-Red",10,
        "B","John","02-Red",20,

        "B","John","03-Yellow",30,

        "B","John","04-Grey",44,
        "B","Cage","04-Grey",40,

        "B","Cage","05-Brown",55,
        "B","Cage","05-Brown",60,

        "B","Cage","05-Black",60,
        "B","Cage","05-Black",63,
)

df.temp我们可以根据
案例
和间隔
进行分组，并在

library(dplyr)

df.temp %>%
   group_by(Case, Interval) %>%
   mutate(Remove = case_when(
     #If there are duplicates in Interval check the difference in Value 4 
     #or more, then keep both
     n() > 1 & any(diff(Value) >= 4) & n_distinct(Operator) == 2 ~"keep", 
     #If difference is less or equal 3 and Intervals have different Operator
     #Keep John
     n_distinct(Operator) == 2 & all(diff(Value) <= 3) & Operator == "John"~"keep", 
     #and remove Cage
     n_distinct(Operator) == 2 & all(diff(Value) <= 3) & Operator == "Cage"~"remove",
     #If there are duplicates in Interval with same Operator 
     #keep one with the smallest Value
     n() > 1 & n_distinct(Operator) == 1 & Value == min(Value) ~ "keep",
     #and remove one which is not the smallest Value
     n() > 1 & n_distinct(Operator) == 1 & Value != min(Value) ~ "remove", 
     #keep rest of them
     TRUE ~"keep"))


# Case  Operator Interval  Value Remove
#   <chr> <chr>    <chr>     <dbl> <chr> 
# 1 A     John     01-Blue      10 keep  
# 2 A     Cage     01-Blue      10 remove
# 3 A     John     02-Red       20 keep  
# 4 A     Cage     03-Yellow    30 keep  
# 5 A     John     04-Grey      37 keep  
# 6 A     Cage     04-Grey      40 remove
# 7 A     Cage     05-Brown     55 keep  
# 8 A     John     05-Brown     60 keep  
# 9 A     Cage     05-Black     60 keep  
#10 A     Cage     05-Black     65 remove
#11 B     John     01-Blue      10 keep  
#12 B     Cage     02-Red       10 keep  
#13 B     John     02-Red       20 keep  
#14 B     John     03-Yellow    30 keep  
#15 B     John     04-Grey      44 keep  
#16 B     Cage     04-Grey      40 remove
#17 B     Cage     05-Brown     55 keep  
#18 B     Cage     05-Brown     60 remove
#19 B     Cage     05-Black     60 keep  
#20 B     Cage     05-Black     63 remove

库（dplyr）
df.temp%>%
分组依据（案例、区间）%>%
变异（移除=在(
#如果间隔中存在重复项，请检查值4的差异
#或者更多，然后两者都保留
n（）>1&any（diff（Value）>=4）和n_distinct（Operator）==2~“keep”，
#如果差值小于或等于3，且间隔具有不同的运算符
#留住约翰
n_distinct（运算符）==2&all（差异（值）1&n_distinct（运算符）==1&Value！=min（值）~“删除”，
#剩下的留着吧
对~“保留”））
#Case运算符间隔值删除
#               
#1 A约翰01蓝10保持
#2 A笼架01蓝色10拆卸
#3 A约翰02红色20码
#4一个笼子03黄色30保持
#5 A约翰04格雷37保持
#6 A保持架04灰色40拆卸
#7一个笼子05棕色55
#8 A约翰05布朗60
#9个笼子05个黑色60个
#10 A保持架05黑色65拆卸
#11 B约翰01蓝10保持
#12 B笼02红色10保持
#13 B约翰02红色20保持
#14 B约翰03黄色30保持
#15 B约翰04格雷44保持
#16 B保持架04灰色40拆卸
#17 B笼05棕色55保持
#18 B保持架05棕色60拆卸
#19 B笼子05黑色60
#20 B保持架05黑色63拆卸

Hi再次-您能解释为什么删除第6行（第04格灰色）和第12行（第02格红色）吗？在您的代码中，您似乎检查了前后行，以查看

值的变化是否大于/小于3，但这在您的规则中没有描述。此外，这可能会与不同的间隔进行比较（这是需要的吗？）。我只是想了解其中的逻辑。嗨@Ben，是的，你是对的-我使用的不是精确匹配，而是大于3的差异。这就是为什么应该删除第6行，应该保留第12行。不，它不应该看间隔之外，它应该比较每个案例的值，每个间隔（可能超过两个）。我将更正问题。为什么在第3行和第4行中比较“02红色”和“03黄色”？根据您的规则1，如果间隔中有重复项（可能超过两个）检查它们的值是否有差异。红色和黄色不是重复的，那么它们的差异为什么重要？@RonakShah true，它们没有被比较，我的打字错误，请更正。谢谢，你可以检查我下面的答案。我想第15行和第16行应该有“删除”和“保留”，因为它有运算符“John”和“Cage”。太棒了，谢谢！当（）看起来对我非常有用时，需要仔细阅读有关案例_的内容！
Case  Operator Interval  Value Remove
   <chr> <chr>    <chr>     <dbl> <chr> 
 1 A     John     01-Blue      10 keep  
 2 A     Cage     01-Blue      10 remove

 3 A     John     02-Red       20 keep  

 4 A     Cage     03-Yellow    30 keep  

 5 A     John     04-Grey      37 keep  
 6 A     Cage     04-Grey      40 remove 

 7 A     Cage     05-Brown     55 keep  
 8 A     John     05-Brown     60 keep  

 9 A     Cage     05-Black     60 keep
10 A     Cage     05-Black     65 remove (more than 3 points diff, same Operator)

11 B     John     01-Blue      10 keep  

12 B     Cage     02-Red       10 keep
13 B     John     02-Red       20 keep  

14 B     John     03-Yellow    30 keep  

15 B     Cage     04-Grey      40 keep
16 B     John     04-Grey      44 keep  

17 B     Cage     05-Brown     55 keep  
18 B     Cage     05-Brown     60 remove

19 B     Cage     05-Black     60 keep
20 B     Cage     05-Black     63 remove

library(dplyr)

df.temp %>%
   group_by(Case, Interval) %>%
   mutate(Remove = case_when(
     #If there are duplicates in Interval check the difference in Value 4 
     #or more, then keep both
     n() > 1 & any(diff(Value) >= 4) & n_distinct(Operator) == 2 ~"keep", 
     #If difference is less or equal 3 and Intervals have different Operator
     #Keep John
     n_distinct(Operator) == 2 & all(diff(Value) <= 3) & Operator == "John"~"keep", 
     #and remove Cage
     n_distinct(Operator) == 2 & all(diff(Value) <= 3) & Operator == "Cage"~"remove",
     #If there are duplicates in Interval with same Operator 
     #keep one with the smallest Value
     n() > 1 & n_distinct(Operator) == 1 & Value == min(Value) ~ "keep",
     #and remove one which is not the smallest Value
     n() > 1 & n_distinct(Operator) == 1 & Value != min(Value) ~ "remove", 
     #keep rest of them
     TRUE ~"keep"))


# Case  Operator Interval  Value Remove
#   <chr> <chr>    <chr>     <dbl> <chr> 
# 1 A     John     01-Blue      10 keep  
# 2 A     Cage     01-Blue      10 remove
# 3 A     John     02-Red       20 keep  
# 4 A     Cage     03-Yellow    30 keep  
# 5 A     John     04-Grey      37 keep  
# 6 A     Cage     04-Grey      40 remove
# 7 A     Cage     05-Brown     55 keep  
# 8 A     John     05-Brown     60 keep  
# 9 A     Cage     05-Black     60 keep  
#10 A     Cage     05-Black     65 remove
#11 B     John     01-Blue      10 keep  
#12 B     Cage     02-Red       10 keep  
#13 B     John     02-Red       20 keep  
#14 B     John     03-Yellow    30 keep  
#15 B     John     04-Grey      44 keep  
#16 B     Cage     04-Grey      40 remove
#17 B     Cage     05-Brown     55 keep  
#18 B     Cage     05-Brown     60 remove
#19 B     Cage     05-Black     60 keep  
#20 B     Cage     05-Black     63 remove