R 基于多个条件选择行_R_If Statement_Dplyr_Data.table

R 基于多个条件选择行

r if-statement

R 基于多个条件选择行,r,if-statement,dplyr,data.table,R,If Statement,Dplyr,Data.table,我有一个df set.seed(123) df <- data.frame(loc.id = rep(1:9, each = 9), month = rep(1:9,times = 9), x = runif(81, min = 0, max = 5)) 第1、2、4、5、6、7、8个月的x>1，从这些月开始，第8个月与第9个月最接近。因此，将选择第8个月 2）如果没有一个月的x>1，只需选择x值最高的月份。例如：如果是一个位置，x是 0.8,

我有一个df

set.seed(123)
df <- data.frame(loc.id = rep(1:9, each = 9), month = rep(1:9,times = 9), 
                 x = runif(81, min = 0, max = 5))

第1、2、4、5、6、7、8个月的x>1，从这些月开始，第8个月与第9个月最接近。因此，将选择第8个月

2）如果没有一个月的x>1，只需选择x值最高的月份。例如：

如果是一个位置，x是

  0.8, 0.6, 0.95, 0.4, 0.88, 0.7, 0.6, 0.45, 0.3

然后选择第3个月（x=0.95）

我试过这个：

  library(dplyr)
  df %>% filter(month != 9) %>% # removes the 9 month so that only the 8 months are evaluated 
        group_by(loc.id) %>% 
        mutate(select.month = x > 1) %>% # mark those months where x > 1
        filter(select.month == TRUE) %>% # select those months where x > 1 is true
        mutate(dif = 9 - month) %>%# subtract each month from 9 to check which one is closest to 9
        summarise(month.id = min(dif)) # select the months which is closest to month 9

但是，在上述功能中，我无法检查所有月份的

值小于1。我的问题是，当x都不大于1时，如何更改上述代码以同时检查条件2，一个潜在解决方案是

ifelse

，如果位置8大于1.0，则排除第9行后，该行的位置8，else max

一个月的示例：


月1set.seed（123）
>df集。种子（123）
>df=rbind（df，cbind（loc.id=10，month=1:9，x=runif（9）））
>df%>%group_by（loc.id）%%>%mutate（x=replace（x，9,0），y=cumsum（x>1））%%
+总结（y=ifelse（全部（！y），which.max（x），which.max（y）））
#一个tibble:10x2
位置id y
1      1     8
2      2     8
3      3     8
4      4     7
5      5     8
6      6     8
7      7     7
8      8     8
9      9     7
10     10     5
我稍微修改了您的数据帧，因为没有一个loc.id
s的月份少于1个
df %>% 
  group_by(loc.id) %>%
  filter(month != 9) %>% 
  mutate(all_x_less_1 = all(x < 1)) %>% 
  filter(all_x_less_1 | x > 1) %>% 
  filter(month == if_else(all_x_less_1, month[which.max(x)], month[which.min(9 - month)]))

# A tibble: 9 x 4
# Groups:   loc.id [9]
#   loc.id month     x all_x_less_1
#    <int> <int> <dbl> <lgl>       
# 1      1     8 4.46  F           
# 2      2     7 2.25  F           
# 3      3     8 1.18  F           
# 4      4     5 1.13  F           
# 5      5     1 0.758 T           
# 6      6     5 0.715 T           
# 7      7     5 0.639 T           
# 8      8     2 0.509 T           
# 9      9     1 0.395 T         

库（data.table）
setDT（d）
d[,{
九、一个月！=9
（月=如果（任何（ix））最后（月[ix]）其他月[which.max（x）]）
}，by=loc.id]


说明：
对于每个组（by=loc.id
），获取x>1的索引，不包括第9个月（x>1&month！=9
）。如果任何此类索引为真（If（any（ix））
），请选择其中的最后一个月（last（month[ix]）
）。Else选择对应于最大x的月份（Else月[which.max（x）]
） 你能把它写成一个解决方案吗。在我的剧本中你会把这个放在哪里？感谢OP的例子是可复制的，在这个网站上，通常期望答案将包含与它一起工作的代码。
set.seed(123)
> df <- data.frame(loc.id = rep(1:9, each = 9), month = rep(1:9,times = 9), 
                  x = runif(81, min = 0, max = 5))
> set.seed(123)
> df=rbind(df,cbind(loc.id=10,month=1:9 , x=runif(9)))




> df%>%group_by(loc.id)%>%mutate(x=replace(x,9,0),y=cumsum(x>1))%>%
+     summarise(y=ifelse(all(!y),which.max(x),which.max(y)))
# A tibble: 10 x 2
   loc.id     y
    <dbl> <int>
 1      1     8
 2      2     8
 3      3     8
 4      4     7
 5      5     8
 6      6     8
 7      7     7
 8      8     8
 9      9     7
10     10     5

df %>% 
  group_by(loc.id) %>%
  filter(month != 9) %>% 
  mutate(all_x_less_1 = all(x < 1)) %>% 
  filter(all_x_less_1 | x > 1) %>% 
  filter(month == if_else(all_x_less_1, month[which.max(x)], month[which.min(9 - month)]))

# A tibble: 9 x 4
# Groups:   loc.id [9]
#   loc.id month     x all_x_less_1
#    <int> <int> <dbl> <lgl>       
# 1      1     8 4.46  F           
# 2      2     7 2.25  F           
# 3      3     8 1.18  F           
# 4      4     5 1.13  F           
# 5      5     1 0.758 T           
# 6      6     5 0.715 T           
# 7      7     5 0.639 T           
# 8      8     2 0.509 T           
# 9      9     1 0.395 T         

set.seed(123)
df <- data.frame(loc.id = rep(1:9, each = 9), month = rep(1:9,times = 9), 
                 x = runif(81, min = 0, max = 5))

df <- df %>% 
  mutate(x = x/loc.id)

library(data.table)
setDT(d)
d[ , {
  ix <- x > 1 & month != 9 
  .(month = if(any(ix)) last(month[ix]) else month[which.max(x)])
}, by = loc.id]