根据r中组内的其他变量重新赋值

根据r中组内的其他变量重新赋值,r,dplyr,R,Dplyr,我有一个包含4列的数据框——ID、days、pod和value df <- data.frame(ID = rep(1:3, each = 4), days = c(1, 7, 12, 7, 10, 10, 1, 7, 14, 7, 7, 20), pod = factor(c("t1", "t2", "t3", "t2", "t2", "t2", "t1", "t2", "t3", "t2", "t2", "t3")),

我有一个包含4列的数据框——ID、days、pod和value

df <- data.frame(ID = rep(1:3, each = 4),
                 days = c(1, 7, 12, 7, 10, 10, 1, 7, 14, 7, 7, 20),
                 pod = factor(c("t1", "t2", "t3", "t2", "t2", "t2", "t1", "t2", "t3", "t2", "t2", "t3")),
                 value = rnorm(12, mean = 0, sd = 1))

基本上,如果它不是ID和pod组中最接近的一天,我想用NA替换该值。如果一个组内的天数和时间相等,那么我只想选择第一个。

我发现,根据顺序,用同一天替换这些值是有问题的,这很容易搞砸。还有什么。。。可靠的。。。哪些可以定义忽略哪些值? 下面的代码与您想要的非常接近,但它目前“仅”将重复的“pod”替换为第一个的值:

    set.seed(1)
dat <- data.frame(ID = rep(1:3, each = 4),
                 days = c(1, 7, 12, 7, 10, 10, 1, 7, 14, 7, 7, 20),
                 pod = factor(c("t1", "t2", "t3", "t2", "t2", "t2", "t1", "t2", "t3", "t2", "t2", "t3")),
                 value = rnorm(12, mean = 0, sd = 1))

dat %>% mutate(helper = case_when(pod == 't1' ~ days-1,
                                  pod == 't2' ~ days-7,
                                  pod == 't3' ~ days-14)) %>%
  group_by(ID, pod) %>% mutate(min = ifelse(helper == min(helper), 
                                            first(value), NA ))

# A tibble: 12 x 6
# Groups:   ID, pod [7]
      ID  days pod    value helper     min
   <int> <dbl> <fct>  <dbl>  <dbl>   <dbl>
 1     1  1.00 t1    -0.626   0    - 0.626
 2     1  7.00 t2     0.184   0      0.184
 3     1 12.0  t3    -0.836  -2.00 - 0.836
 4     1  7.00 t2     1.60    0      0.184
 5     2 10.0  t2     0.330   3.00  NA    
 6     2 10.0  t2    -0.820   3.00  NA    
 7     2  1.00 t1     0.487   0      0.487
 8     2  7.00 t2     0.738   0      0.330
 9     3 14.0  t3     0.576   0      0.576
10     3  7.00 t2    -0.305   0    - 0.305
11     3  7.00 t2     1.51    0    - 0.305
12     3 20.0  t3     0.390   6.00  NA
set.seed(1)
dat%变异(助手=案例)当(pod=='t1'~days-1,
pod==“t2”~第7天,
pod==“t3”~days-14))%>%
分组依据(ID,pod)%>%突变(min=ifelse(helper==min(helper)),
第一(值,NA))
#一个tibble:12x6
#分组:ID,pod[7]
ID天pod值帮助器最小值
11.00 t1-0.626 0-0.626
21.7.00 t2 0.184 0.184
3 1 12.0 t3-0.836-2.00-0.836
417.00t21.600.184
5 2 10.0 t2 0.330 3.00 NA
6 2 10.0 t2-0.820 3.00 NA
72 1.00 t1 0.487 0.487
827.00 t20.7380.330
9314.0T30.57600.576
1037.00T2-0.3050-0.305
11 3 7.00 t2 1.51 0-0.305
12 3 20.0 t3 0.390 6.00北美
现在添加了另一个条件。这是一些ifelse嵌套,可能不是最优雅的,但我希望它能满足您的需求:)

dat%>%mutate(helper=case_)当(pod==t1'~days-1,
pod==“t2”~第7天,
pod==“t3”~days-14))%>%
分组依据(ID,pod)%>%突变(min=ifelse(helper==min(helper)),
ifelse(值==第一个(值),值,NA),NA))
#一个tibble:12x6
#分组:ID,pod[7]
ID天pod值帮助器最小值
11.00 t1-0.626 0-0.626
21.7.00 t2 0.184 0.184
3 1 12.0 t3-0.836-2.00-0.836
417.00t21.600NA
5 2 10.0 t2 0.330 3.00 NA
6 2 10.0 t2-0.820 3.00 NA
72 1.00 t1 0.487 0.487
827.00 t2 0.738 0 NA
9314.0T30.57600.576
1037.00T2-0.3050-0.305
11 3 7.00 t2 1.51 0 NA
12 3 20.0 t3 0.390 6.00北美

示例中应包含一个种子!是的,我同意。我为此道歉。我的示例中的实际值'value'变量并不真正影响我要做的事情。再一次,很抱歉没有包含种子。这对我来说很有用!非常感谢。你说得对,日子一样的时候有点笨重。这并没有出现在我的实际数据中,我只是怀疑它可能会出现,所以我希望是包容性的。
    set.seed(1)
dat <- data.frame(ID = rep(1:3, each = 4),
                 days = c(1, 7, 12, 7, 10, 10, 1, 7, 14, 7, 7, 20),
                 pod = factor(c("t1", "t2", "t3", "t2", "t2", "t2", "t1", "t2", "t3", "t2", "t2", "t3")),
                 value = rnorm(12, mean = 0, sd = 1))

dat %>% mutate(helper = case_when(pod == 't1' ~ days-1,
                                  pod == 't2' ~ days-7,
                                  pod == 't3' ~ days-14)) %>%
  group_by(ID, pod) %>% mutate(min = ifelse(helper == min(helper), 
                                            first(value), NA ))

# A tibble: 12 x 6
# Groups:   ID, pod [7]
      ID  days pod    value helper     min
   <int> <dbl> <fct>  <dbl>  <dbl>   <dbl>
 1     1  1.00 t1    -0.626   0    - 0.626
 2     1  7.00 t2     0.184   0      0.184
 3     1 12.0  t3    -0.836  -2.00 - 0.836
 4     1  7.00 t2     1.60    0      0.184
 5     2 10.0  t2     0.330   3.00  NA    
 6     2 10.0  t2    -0.820   3.00  NA    
 7     2  1.00 t1     0.487   0      0.487
 8     2  7.00 t2     0.738   0      0.330
 9     3 14.0  t3     0.576   0      0.576
10     3  7.00 t2    -0.305   0    - 0.305
11     3  7.00 t2     1.51    0    - 0.305
12     3 20.0  t3     0.390   6.00  NA
dat %>% mutate(helper = case_when(pod == 't1' ~ days-1,
                                  pod == 't2' ~ days-7,
                                  pod == 't3' ~ days-14)) %>%
  group_by(ID, pod) %>% mutate(min = ifelse(helper == min(helper), 
                                            ifelse(value == first(value), value, NA ), NA))


# A tibble: 12 x 6
# Groups:   ID, pod [7]
      ID  days pod    value helper     min
   <int> <dbl> <fct>  <dbl>  <dbl>   <dbl>
 1     1  1.00 t1    -0.626   0    - 0.626
 2     1  7.00 t2     0.184   0      0.184
 3     1 12.0  t3    -0.836  -2.00 - 0.836
 4     1  7.00 t2     1.60    0     NA    
 5     2 10.0  t2     0.330   3.00  NA    
 6     2 10.0  t2    -0.820   3.00  NA    
 7     2  1.00 t1     0.487   0      0.487
 8     2  7.00 t2     0.738   0     NA    
 9     3 14.0  t3     0.576   0      0.576
10     3  7.00 t2    -0.305   0    - 0.305
11     3  7.00 t2     1.51    0     NA    
12     3 20.0  t3     0.390   6.00  NA