如何根据r中现有的多个列的名称部分地从这些列生成新列?

如何根据r中现有的多个列的名称部分地从这些列生成新列?,r,R,我想从现有的多个列中根据它们的名称生成新列。 这是一些近似于我的表格的数据 id <- c("s001", "s002", "s003", "s004", "s005", "s006", "s007", "s008", "s009") group <- c(0,1,2,1,2,0,0,1,2) lh_app

我想从现有的多个列中根据它们的名称生成新列。 这是一些近似于我的表格的数据

id <- c("s001", "s002", "s003", "s004", "s005", "s006", "s007", "s008", "s009")
group <- c(0,1,2,1,2,0,0,1,2)
lh_apple_thickness <-c(1,2,3,4,5,6,7,8,9)
lh_banana_thickness <- c(1,3,5,7,9,11,13,15,17)
lh_orange_thickness <- c(2,4,6,8,10,12,14,16,18)
rh_apple_thickness <- c(3,7,2,1,5,4,2,6,11)
rh_banana_thickness <- c(2,4,5,4,2,4,3,1,5)
rh_orange_thickness <- c(3,6,2,4,5,6,2,5,4)
lh_apple_volume <-c(1,2,3,4,5,6,7,8,9)
lh_banana_volume <- c(1,3,5,7,9,11,13,15,17)
lh_orange_volume <- c(2,4,6,8,10,12,14,16,18)
rh_apple_volume <- c(3,7,2,1,5,4,2,6,11)
rh_banana_volume <- c(2,4,5,4,2,4,3,1,5)
rh_orange_volume <- c(3,6,2,4,5,6,2,5,4)
df <- data.frame(id,group,lh_apple_thickness,lh_banana_thickness,lh_orange_thickness,rh_apple_thickness,rh_banana_thickness,rh_orange_thickness,lh_apple_volume,lh_banana_volume,lh_orange_volume,rh_apple_volume,rh_banana_volume,rh_orange_volume)
df$group <- as.factor(df$group)

id我认为最简单的方法是先“整理”数据。例如,每个水果组合有一行

我只能分两步来做,一步是lh,一步是rh,我相信有更聪明的方法

df_lh <- df %>% dplyr::select(id, group, grep("^lh", names(.))) %>% 
  tidyr::pivot_longer(
    cols = 3:ncol(.),
    names_to = "lh", values_to = "lh_values")
df_rh <- df %>% dplyr::select(id, group, grep("^rh", names(.))) %>% 
  tidyr::pivot_longer(
    cols = 3:ncol(.),
    names_to = "rh", values_to = "rh_values")

df_result <- df_lh %>% dplyr::left_join(df_rh, by = c("id", "group")) %>% 
  dplyr::mutate(result = (lh_values - rh_values) / (lh_values + rh_values))
编辑 将数据放回宽格式

我假设您只想将水果与水果进行比较,将菊苣与属性(体积、厚度等)进行比较

df\u结果\u范围%
dplyr::filter(stringr::str_extract(lh,“(.)*”)==
stringr::str_extract(rh,“(.)*”),
stringr::str_extract(lh,“(.)*$”==stringr::str_extract(rh,“(.)*$”)%%>%
tidyr::枢轴_加宽(
id_cols=c(“id”,“group”),name_from=c(“lh”,“rh”),value_from=
c(“结果”))
最后将结果与原始数据放在一起

df_final <- df %>% dplyr::left_join(df_result_wide, by = c("id", "group"))
df_final%dplyr::left_join(df_result_wide,by=c(“id”,“group”))

这对我来说很有效,但前提是l列始终位于同一类别的相应r列之前(否则您必须在代码中进行一些更改):

categories%gsub(“^...-”,”,)%>%{[grepl(“.-”,)]}
df2%
sappy(函数(x)rbind(应用(x,1,函数(y)(y[1]-y[2])/(y[1]+y[2]))%>%

`colnames这里是base R选项,使用
lappy
从数据中提取
unique_水果
unique_度量
,因此它可以应用于任意数量的水果以及任意数量的度量

unique_fruits <- unique(sub('.*_(.*)_.*', '\\1', names(df)[-c(1, 2)]))
unique_fruits
#[1] "apple"  "banana" "orange"
unique_measure <- unique(sub('.*_', '', names(df)[-c(1, 2)]))
unique_measure
#[1] "thickness" "volume"   

cbind(df, do.call(cbind, lapply(unique_fruits, function(x) {
  setNames(do.call(cbind.data.frame, lapply(unique_measure, function(y) {
    lhs <- sprintf('lh_%s_%s', x, y)
    rhs <- sprintf('rh_%s_%s', x, y)
    (df[[lhs]] - df[[rhs]])/(df[[lhs]] + df[[rhs]])
  })), paste0('index_', unique_measure))
}))) -> result

unique_fruits一个简单的解决方案是首先生成所有结果和度量的组合,然后为每个可能的组合创建所需的索引。考虑这个函数:

gen_index <- function(df, ...) {
  nms <- purrr::cross(list(...))
  nms <- vapply(nms, function(x) paste0(as.character(x), collapse = "_"), character(1L))
  lh <- paste0("lh_", nms)
  rh <- paste0("rh_", nms)
  res <- paste0(nms, "_index")
  Reduce(function(d, i) {
    `[[<-`(d, res[[i]], value = (d[[lh[[i]]]] - d[[rh[[i]]]]) / (d[[lh[[i]]]] + d[[rh[[i]]]]))
  }, seq_along(nms), df)
}
这给了你

    id group lh_apple_thickness lh_banana_thickness lh_orange_thickness rh_apple_thickness rh_banana_thickness rh_orange_thickness lh_apple_volume lh_banana_volume
1 s001     0                  1                   1                   2                  3                   2                   3               1                1
2 s002     1                  2                   3                   4                  7                   4                   6               2                3
3 s003     2                  3                   5                   6                  2                   5                   2               3                5
4 s004     1                  4                   7                   8                  1                   4                   4               4                7
5 s005     2                  5                   9                  10                  5                   2                   5               5                9
6 s006     0                  6                  11                  12                  4                   4                   6               6               11
7 s007     0                  7                  13                  14                  2                   3                   2               7               13
8 s008     1                  8                  15                  16                  6                   1                   5               8               15
9 s009     2                  9                  17                  18                 11                   5                   4               9               17
  lh_orange_volume rh_apple_volume rh_banana_volume rh_orange_volume apple_thickness_index banana_thickness_index orange_thickness_index apple_volume_index
1                2               3                2                3            -0.5000000             -0.3333333             -0.2000000         -0.5000000
2                4               7                4                6            -0.5555556             -0.1428571             -0.2000000         -0.5555556
3                6               2                5                2             0.2000000              0.0000000              0.5000000          0.2000000
4                8               1                4                4             0.6000000              0.2727273              0.3333333          0.6000000
5               10               5                2                5             0.0000000              0.6363636              0.3333333          0.0000000
6               12               4                4                6             0.2000000              0.4666667              0.3333333          0.2000000
7               14               2                3                2             0.5555556              0.6250000              0.7500000          0.5555556
8               16               6                1                5             0.1428571              0.8750000              0.5238095          0.1428571
9               18              11                5                4            -0.1000000              0.5454545              0.6363636         -0.1000000
  banana_volume_index orange_volume_index
1          -0.3333333          -0.2000000
2          -0.1428571          -0.2000000
3           0.0000000           0.5000000
4           0.2727273           0.3333333
5           0.6363636           0.3333333
6           0.4666667           0.3333333
7           0.6250000           0.7500000
8           0.8750000           0.5238095
9           0.5454545           0.6363636
如果只想计算数据帧子集的索引:

gen_index(df, c("apple", "banana"), "thickness")
输出

    id group lh_apple_thickness lh_banana_thickness lh_orange_thickness rh_apple_thickness rh_banana_thickness rh_orange_thickness lh_apple_volume lh_banana_volume
1 s001     0                  1                   1                   2                  3                   2                   3               1                1
2 s002     1                  2                   3                   4                  7                   4                   6               2                3
3 s003     2                  3                   5                   6                  2                   5                   2               3                5
4 s004     1                  4                   7                   8                  1                   4                   4               4                7
5 s005     2                  5                   9                  10                  5                   2                   5               5                9
6 s006     0                  6                  11                  12                  4                   4                   6               6               11
7 s007     0                  7                  13                  14                  2                   3                   2               7               13
8 s008     1                  8                  15                  16                  6                   1                   5               8               15
9 s009     2                  9                  17                  18                 11                   5                   4               9               17
  lh_orange_volume rh_apple_volume rh_banana_volume rh_orange_volume apple_thickness_index banana_thickness_index
1                2               3                2                3            -0.5000000             -0.3333333
2                4               7                4                6            -0.5555556             -0.1428571
3                6               2                5                2             0.2000000              0.0000000
4                8               1                4                4             0.6000000              0.2727273
5               10               5                2                5             0.0000000              0.6363636
6               12               4                4                6             0.2000000              0.4666667
7               14               2                3                2             0.5555556              0.6250000
8               16               6                1                5             0.1428571              0.8750000
9               18              11                5                4            -0.1000000              0.5454545

非常感谢。这个例子就是这样。事实上,我的真实数据还有其他一些列带有“u”,我只是在应用到真实数据时出错了。可能需要使用
\uuuu
删除其他列。非常感谢,它确实有效。但是,我需要在原始数据框中添加这些新列。我会对问题进行编辑以使其更清楚。嗨,Ella_may,我添加了一些代码,将数据重新转换为宽格式,并与原始数据连接。让我知道它是否有效。另一件事:在我看来,以长格式工作更容易。从长远来看,如果你改变信仰,你可能会省去麻烦。祝你好运:)嗨,贾格,非常感谢你。我还有一些其他列的名称中带有
。主要原因是这些列会导致错误。谢谢你帮我:)
gen_index(df, c("apple", "banana", "orange"), c("thickness", "volume"))
    id group lh_apple_thickness lh_banana_thickness lh_orange_thickness rh_apple_thickness rh_banana_thickness rh_orange_thickness lh_apple_volume lh_banana_volume
1 s001     0                  1                   1                   2                  3                   2                   3               1                1
2 s002     1                  2                   3                   4                  7                   4                   6               2                3
3 s003     2                  3                   5                   6                  2                   5                   2               3                5
4 s004     1                  4                   7                   8                  1                   4                   4               4                7
5 s005     2                  5                   9                  10                  5                   2                   5               5                9
6 s006     0                  6                  11                  12                  4                   4                   6               6               11
7 s007     0                  7                  13                  14                  2                   3                   2               7               13
8 s008     1                  8                  15                  16                  6                   1                   5               8               15
9 s009     2                  9                  17                  18                 11                   5                   4               9               17
  lh_orange_volume rh_apple_volume rh_banana_volume rh_orange_volume apple_thickness_index banana_thickness_index orange_thickness_index apple_volume_index
1                2               3                2                3            -0.5000000             -0.3333333             -0.2000000         -0.5000000
2                4               7                4                6            -0.5555556             -0.1428571             -0.2000000         -0.5555556
3                6               2                5                2             0.2000000              0.0000000              0.5000000          0.2000000
4                8               1                4                4             0.6000000              0.2727273              0.3333333          0.6000000
5               10               5                2                5             0.0000000              0.6363636              0.3333333          0.0000000
6               12               4                4                6             0.2000000              0.4666667              0.3333333          0.2000000
7               14               2                3                2             0.5555556              0.6250000              0.7500000          0.5555556
8               16               6                1                5             0.1428571              0.8750000              0.5238095          0.1428571
9               18              11                5                4            -0.1000000              0.5454545              0.6363636         -0.1000000
  banana_volume_index orange_volume_index
1          -0.3333333          -0.2000000
2          -0.1428571          -0.2000000
3           0.0000000           0.5000000
4           0.2727273           0.3333333
5           0.6363636           0.3333333
6           0.4666667           0.3333333
7           0.6250000           0.7500000
8           0.8750000           0.5238095
9           0.5454545           0.6363636
gen_index(df, c("apple", "banana"), "thickness")
    id group lh_apple_thickness lh_banana_thickness lh_orange_thickness rh_apple_thickness rh_banana_thickness rh_orange_thickness lh_apple_volume lh_banana_volume
1 s001     0                  1                   1                   2                  3                   2                   3               1                1
2 s002     1                  2                   3                   4                  7                   4                   6               2                3
3 s003     2                  3                   5                   6                  2                   5                   2               3                5
4 s004     1                  4                   7                   8                  1                   4                   4               4                7
5 s005     2                  5                   9                  10                  5                   2                   5               5                9
6 s006     0                  6                  11                  12                  4                   4                   6               6               11
7 s007     0                  7                  13                  14                  2                   3                   2               7               13
8 s008     1                  8                  15                  16                  6                   1                   5               8               15
9 s009     2                  9                  17                  18                 11                   5                   4               9               17
  lh_orange_volume rh_apple_volume rh_banana_volume rh_orange_volume apple_thickness_index banana_thickness_index
1                2               3                2                3            -0.5000000             -0.3333333
2                4               7                4                6            -0.5555556             -0.1428571
3                6               2                5                2             0.2000000              0.0000000
4                8               1                4                4             0.6000000              0.2727273
5               10               5                2                5             0.0000000              0.6363636
6               12               4                4                6             0.2000000              0.4666667
7               14               2                3                2             0.5555556              0.6250000
8               16               6                1                5             0.1428571              0.8750000
9               18              11                5                4            -0.1000000              0.5454545