dplyr突变的复杂条件_R_If Statement_Dplyr_Mutate

dplyr突变的复杂条件

r if-statement

dplyr突变的复杂条件,r,if-statement,dplyr,mutate,R,If Statement,Dplyr,Mutate,样本数据： library(dplyr) id <- rep(LETTERS[1:5], each = 10) x <- round(runif(50, -500, 200), digits = 0) y <- round(runif(50, -700, 700), digits = 0) z <- round(runif(50, 250, 300), digits = 0) df.1 <- data.frame(id = id, x = x, y = y, z

样本数据：

library(dplyr)

id <- rep(LETTERS[1:5], each = 10)
x <- round(runif(50, -500, 200), digits = 0)
y <- round(runif(50, -700, 700), digits = 0)
z <- round(runif(50, 250, 300), digits = 0)

df.1 <- data.frame(id = id, x = x, y = y, z = z)
> summary(df.1)
 id           x                y                 z        
 A:10   Min.   :-497.0   Min.   :-665.00   Min.   :251.0  
 B:10   1st Qu.:-283.2   1st Qu.:-349.50   1st Qu.:261.2  
 C:10   Median :-128.0   Median : -33.50   Median :274.5  
 D:10   Mean   :-145.4   Mean   : -39.58   Mean   :275.3  
 E:10   3rd Qu.: -15.0   3rd Qu.: 293.25   3rd Qu.:288.0  
        Max.   : 171.0   Max.   : 696.00   Max.   :299.0

在具有最大范围的柱上计算方向非常重要。在示例数据中，y列很可能总是具有最大范围的列，但在我的实际数据中，它可以是任意列

我想这会涉及变异，否则呢？！但不知道我会怎么做。。。我通常会使用扩展for循环，直到上一两周才开始使用dplyr。。尽量避免使用凌乱的for循环和嵌套严重的代码

非常感谢你的帮助！谢谢

    for (i in 1:length(unique(id)) {

    x <- 
      df.1 %>% 
      filter(id == unique(id)[i] %>%
      mutate(direction = ifelse())

    assign(unique(id)[i], x)

      }

for（1中的i：长度（唯一（id））{
x%
过滤器（id==唯一的（id）[i]]>%
突变（方向=ifelse（））
分配（唯一（id）[i]，x）
}

将每个id放入其自己的数据帧中

创建一个名为“方向”的新列，这将是对以下条件的响应

确定x、y、z和b中范围最广的列
在标识的列中，通过下一行值是否大于当前行值来计算方向
真假报税表

让我们编写一个函数，对一个数据帧执行此操作：

foo = function(df) {
  # identify column with widest range within x, y, z
  sub_df = df[c("x", "y", "z")]
  ranges = sapply(sub_df, max) - sapply(sub_df, min)
  widest = which.max(ranges)
  # see which direction it goes
  direction = diff(sub_df[[widest]]) < 0
  # add this as a column to whole df
  df$direction = c(direction, NA)
  return(df)
}

df_list = lapply(df_list foo)

在此完成演示。我将您的数据缩小了一点，以保持其紧凑性：

set.seed(47)
id <- rep(LETTERS[1:3], each = 6)
x <- round(runif(18, -500, 200), digits = 0)
y <- round(runif(18, -700, 700), digits = 0)
z <- round(runif(18, 250, 300), digits = 0)
df.1 <- data.frame(id = id, x = x, y = y, z = z)

df_list = split(df.1, df.1$id)

df_list = lapply(df_list, foo)
df_list
# $A
# id    x    y   z direction
# 1  A  184 -600 262     FALSE
# 2  A -238  -44 299      TRUE
# 3  A   33 -451 274     FALSE
# 4  A   76   80 284      TRUE
# 5  A  -99   22 253      TRUE
# 6  A  -16 -513 269        NA
# 
# $B
# id    x    y   z direction
# 7   B -228  265 280      TRUE
# 8   B -172 -168 297      TRUE
# 9   B -120 -653 268     FALSE
# 10  B  147 -648 260     FALSE
# 11  B -403   51 283     FALSE
# 12  B   -9  419 298        NA
# 
# $C
# id    x    y   z direction
# 13  C -386  348 269      TRUE
# 14  C  -80 -183 293     FALSE
# 15  C -146  -45 259      TRUE
# 16  C  131 -429 289     FALSE
# 17  C -220  556 253      TRUE
# 18  C -478  -84 252        NA

如果你想把数据帧分开，最后仍然是

split

，但是

lappy

非常简单（而且不需要额外的包），而这似乎更复杂，没有好处。

顺序命名的变量很糟糕。使用一个，例如just

df\u list=split（df，df$id）

。谢谢！这太可爱了！我应该学习编写我自己的函数更多：D再次感谢你！当我没有id列时，它工作得非常完美？！但是由于我的id列是factor，max对于factor错误没有意义？！但我想我可以从这里开始：D谢谢！！foo=function（df）{#识别范围最宽的列nums=sappy（df，is.numeric）ranges=sappy（df[，nums]，max）-sappy（df[，nums]，min）widest=which.max（ranges）#查看它的走向direction=diff（df[[widest]]）<0#将其添加为df$direction=c（direction，NA）return（df）列}

df_list = lapply(df_list foo)

set.seed(47)
id <- rep(LETTERS[1:3], each = 6)
x <- round(runif(18, -500, 200), digits = 0)
y <- round(runif(18, -700, 700), digits = 0)
z <- round(runif(18, 250, 300), digits = 0)
df.1 <- data.frame(id = id, x = x, y = y, z = z)

df_list = split(df.1, df.1$id)

df_list = lapply(df_list, foo)
df_list
# $A
# id    x    y   z direction
# 1  A  184 -600 262     FALSE
# 2  A -238  -44 299      TRUE
# 3  A   33 -451 274     FALSE
# 4  A   76   80 284      TRUE
# 5  A  -99   22 253      TRUE
# 6  A  -16 -513 269        NA
# 
# $B
# id    x    y   z direction
# 7   B -228  265 280      TRUE
# 8   B -172 -168 297      TRUE
# 9   B -120 -653 268     FALSE
# 10  B  147 -648 260     FALSE
# 11  B -403   51 283     FALSE
# 12  B   -9  419 298        NA
# 
# $C
# id    x    y   z direction
# 13  C -386  348 269      TRUE
# 14  C  -80 -183 293     FALSE
# 15  C -146  -45 259      TRUE
# 16  C  131 -429 289     FALSE
# 17  C -220  556 253      TRUE
# 18  C -478  -84 252        NA

library(dplyr)
library(tidyr)
df.1 %>% group_by(id) %>% 
  do(a = foo(.)) %>%  
  ungroup() %>% 
  unnest()