如何根据r中现有的多个列的名称部分地从这些列生成新列？_R

如何根据r中现有的多个列的名称部分地从这些列生成新列？

如何根据r中现有的多个列的名称部分地从这些列生成新列？,r,R,我想从现有的多个列中根据它们的名称生成新列。这是一些近似于我的表格的数据 id <- c("s001", "s002", "s003", "s004", "s005", "s006", "s007", "s008", "s009") group <- c(0,1,2,1,2,0,0,1,2) lh_app

我想从现有的多个列中根据它们的名称生成新列。这是一些近似于我的表格的数据

id <- c("s001", "s002", "s003", "s004", "s005", "s006", "s007", "s008", "s009")
group <- c(0,1,2,1,2,0,0,1,2)
lh_apple_thickness <-c(1,2,3,4,5,6,7,8,9)
lh_banana_thickness <- c(1,3,5,7,9,11,13,15,17)
lh_orange_thickness <- c(2,4,6,8,10,12,14,16,18)
rh_apple_thickness <- c(3,7,2,1,5,4,2,6,11)
rh_banana_thickness <- c(2,4,5,4,2,4,3,1,5)
rh_orange_thickness <- c(3,6,2,4,5,6,2,5,4)
lh_apple_volume <-c(1,2,3,4,5,6,7,8,9)
lh_banana_volume <- c(1,3,5,7,9,11,13,15,17)
lh_orange_volume <- c(2,4,6,8,10,12,14,16,18)
rh_apple_volume <- c(3,7,2,1,5,4,2,6,11)
rh_banana_volume <- c(2,4,5,4,2,4,3,1,5)
rh_orange_volume <- c(3,6,2,4,5,6,2,5,4)
df <- data.frame(id,group,lh_apple_thickness,lh_banana_thickness,lh_orange_thickness,rh_apple_thickness,rh_banana_thickness,rh_orange_thickness,lh_apple_volume,lh_banana_volume,lh_orange_volume,rh_apple_volume,rh_banana_volume,rh_orange_volume)
df$group <- as.factor(df$group)

id我认为最简单的方法是先“整理”数据。例如，每个水果组合有一行
我只能分两步来做，一步是lh，一步是rh，我相信有更聪明的方法
df_lh <- df %>% dplyr::select(id, group, grep("^lh", names(.))) %>% 
  tidyr::pivot_longer(
    cols = 3:ncol(.),
    names_to = "lh", values_to = "lh_values")
df_rh <- df %>% dplyr::select(id, group, grep("^rh", names(.))) %>% 
  tidyr::pivot_longer(
    cols = 3:ncol(.),
    names_to = "rh", values_to = "rh_values")

df_result <- df_lh %>% dplyr::left_join(df_rh, by = c("id", "group")) %>% 
  dplyr::mutate(result = (lh_values - rh_values) / (lh_values + rh_values))

编辑
将数据放回宽格式
我假设您只想将水果与水果进行比较，将菊苣与属性（体积、厚度等）进行比较
df\u结果\u范围%
dplyr:：filter（stringr:：str_extract（lh，“（.）*”）==
stringr:：str_extract（rh，“（.）*”），
stringr:：str_extract（lh，“（.）*$”==stringr:：str_extract（rh，“（.）*$”）%%>%
tidyr：：枢轴_加宽(
id_cols=c（“id”，“group”），name_from=c（“lh”，“rh”），value_from=
c（“结果”））

最后将结果与原始数据放在一起
df_final <- df %>% dplyr::left_join(df_result_wide, by = c("id", "group"))

df_final%dplyr:：left_join（df_result_wide，by=c（“id”，“group”））
这对我来说很有效，但前提是l列始终位于同一类别的相应r列之前（否则您必须在代码中进行一些更改）：
categories%gsub（“^...-”，”，）%>%{[grepl（“.-”，）]}
df2%
sappy（函数（x）rbind（应用（x，1，函数（y）（y[1]-y[2]）/（y[1]+y[2]））%>%
`colnames这里是base R选项，使用lappy
从数据中提取unique_水果
和unique_度量
，因此它可以应用于任意数量的水果以及任意数量的度量
unique_fruits <- unique(sub('.*_(.*)_.*', '\\1', names(df)[-c(1, 2)]))
unique_fruits
#[1] "apple"  "banana" "orange"
unique_measure <- unique(sub('.*_', '', names(df)[-c(1, 2)]))
unique_measure
#[1] "thickness" "volume"   

cbind(df, do.call(cbind, lapply(unique_fruits, function(x) {
  setNames(do.call(cbind.data.frame, lapply(unique_measure, function(y) {
    lhs <- sprintf('lh_%s_%s', x, y)
    rhs <- sprintf('rh_%s_%s', x, y)
    (df[[lhs]] - df[[rhs]])/(df[[lhs]] + df[[rhs]])
  })), paste0('index_', unique_measure))
}))) -> result

unique_fruits一个简单的解决方案是首先生成所有结果和度量的组合，然后为每个可能的组合创建所需的索引。考虑这个函数：
gen_index <- function(df, ...) {
  nms <- purrr::cross(list(...))
  nms <- vapply(nms, function(x) paste0(as.character(x), collapse = "_"), character(1L))
  lh <- paste0("lh_", nms)
  rh <- paste0("rh_", nms)
  res <- paste0(nms, "_index")
  Reduce(function(d, i) {
    `[[<-`(d, res[[i]], value = (d[[lh[[i]]]] - d[[rh[[i]]]]) / (d[[lh[[i]]]] + d[[rh[[i]]]]))
  }, seq_along(nms), df)
}

这给了你
    id group lh_apple_thickness lh_banana_thickness lh_orange_thickness rh_apple_thickness rh_banana_thickness rh_orange_thickness lh_apple_volume lh_banana_volume
1 s001     0                  1                   1                   2                  3                   2                   3               1                1
2 s002     1                  2                   3                   4                  7                   4                   6               2                3
3 s003     2                  3                   5                   6                  2                   5                   2               3                5
4 s004     1                  4                   7                   8                  1                   4                   4               4                7
5 s005     2                  5                   9                  10                  5                   2                   5               5                9
6 s006     0                  6                  11                  12                  4                   4                   6               6               11
7 s007     0                  7                  13                  14                  2                   3                   2               7               13
8 s008     1                  8                  15                  16                  6                   1                   5               8               15
9 s009     2                  9                  17                  18                 11                   5                   4               9               17
  lh_orange_volume rh_apple_volume rh_banana_volume rh_orange_volume apple_thickness_index banana_thickness_index orange_thickness_index apple_volume_index
1                2               3                2                3            -0.5000000             -0.3333333             -0.2000000         -0.5000000
2                4               7                4                6            -0.5555556             -0.1428571             -0.2000000         -0.5555556
3                6               2                5                2             0.2000000              0.0000000              0.5000000          0.2000000
4                8               1                4                4             0.6000000              0.2727273              0.3333333          0.6000000
5               10               5                2                5             0.0000000              0.6363636              0.3333333          0.0000000
6               12               4                4                6             0.2000000              0.4666667              0.3333333          0.2000000
7               14               2                3                2             0.5555556              0.6250000              0.7500000          0.5555556
8               16               6                1                5             0.1428571              0.8750000              0.5238095          0.1428571
9               18              11                5                4            -0.1000000              0.5454545              0.6363636         -0.1000000
  banana_volume_index orange_volume_index
1          -0.3333333          -0.2000000
2          -0.1428571          -0.2000000
3           0.0000000           0.5000000
4           0.2727273           0.3333333
5           0.6363636           0.3333333
6           0.4666667           0.3333333
7           0.6250000           0.7500000
8           0.8750000           0.5238095
9           0.5454545           0.6363636

如果只想计算数据帧子集的索引：
gen_index(df, c("apple", "banana"), "thickness")

输出
    id group lh_apple_thickness lh_banana_thickness lh_orange_thickness rh_apple_thickness rh_banana_thickness rh_orange_thickness lh_apple_volume lh_banana_volume
1 s001     0                  1                   1                   2                  3                   2                   3               1                1
2 s002     1                  2                   3                   4                  7                   4                   6               2                3
3 s003     2                  3                   5                   6                  2                   5                   2               3                5
4 s004     1                  4                   7                   8                  1                   4                   4               4                7
5 s005     2                  5                   9                  10                  5                   2                   5               5                9
6 s006     0                  6                  11                  12                  4                   4                   6               6               11
7 s007     0                  7                  13                  14                  2                   3                   2               7               13
8 s008     1                  8                  15                  16                  6                   1                   5               8               15
9 s009     2                  9                  17                  18                 11                   5                   4               9               17
  lh_orange_volume rh_apple_volume rh_banana_volume rh_orange_volume apple_thickness_index banana_thickness_index
1                2               3                2                3            -0.5000000             -0.3333333
2                4               7                4                6            -0.5555556             -0.1428571
3                6               2                5                2             0.2000000              0.0000000
4                8               1                4                4             0.6000000              0.2727273
5               10               5                2                5             0.0000000              0.6363636
6               12               4                4                6             0.2000000              0.4666667
7               14               2                3                2             0.5555556              0.6250000
8               16               6                1                5             0.1428571              0.8750000
9               18              11                5                4            -0.1000000              0.5454545

非常感谢。这个例子就是这样。事实上，我的真实数据还有其他一些列带有“u”，我只是在应用到真实数据时出错了。可能需要使用\uuuu
删除其他列。非常感谢，它确实有效。但是，我需要在原始数据框中添加这些新列。我会对问题进行编辑以使其更清楚。嗨，Ella_may，我添加了一些代码，将数据重新转换为宽格式，并与原始数据连接。让我知道它是否有效。另一件事：在我看来，以长格式工作更容易。从长远来看，如果你改变信仰，你可能会省去麻烦。祝你好运：）嗨，贾格，非常感谢你。我还有一些其他列的名称中带有。。主要原因是这些列会导致错误。谢谢你帮我：）
gen_index(df, c("apple", "banana", "orange"), c("thickness", "volume"))

    id group lh_apple_thickness lh_banana_thickness lh_orange_thickness rh_apple_thickness rh_banana_thickness rh_orange_thickness lh_apple_volume lh_banana_volume
1 s001     0                  1                   1                   2                  3                   2                   3               1                1
2 s002     1                  2                   3                   4                  7                   4                   6               2                3
3 s003     2                  3                   5                   6                  2                   5                   2               3                5
4 s004     1                  4                   7                   8                  1                   4                   4               4                7
5 s005     2                  5                   9                  10                  5                   2                   5               5                9
6 s006     0                  6                  11                  12                  4                   4                   6               6               11
7 s007     0                  7                  13                  14                  2                   3                   2               7               13
8 s008     1                  8                  15                  16                  6                   1                   5               8               15
9 s009     2                  9                  17                  18                 11                   5                   4               9               17
  lh_orange_volume rh_apple_volume rh_banana_volume rh_orange_volume apple_thickness_index banana_thickness_index orange_thickness_index apple_volume_index
1                2               3                2                3            -0.5000000             -0.3333333             -0.2000000         -0.5000000
2                4               7                4                6            -0.5555556             -0.1428571             -0.2000000         -0.5555556
3                6               2                5                2             0.2000000              0.0000000              0.5000000          0.2000000
4                8               1                4                4             0.6000000              0.2727273              0.3333333          0.6000000
5               10               5                2                5             0.0000000              0.6363636              0.3333333          0.0000000
6               12               4                4                6             0.2000000              0.4666667              0.3333333          0.2000000
7               14               2                3                2             0.5555556              0.6250000              0.7500000          0.5555556
8               16               6                1                5             0.1428571              0.8750000              0.5238095          0.1428571
9               18              11                5                4            -0.1000000              0.5454545              0.6363636         -0.1000000
  banana_volume_index orange_volume_index
1          -0.3333333          -0.2000000
2          -0.1428571          -0.2000000
3           0.0000000           0.5000000
4           0.2727273           0.3333333
5           0.6363636           0.3333333
6           0.4666667           0.3333333
7           0.6250000           0.7500000
8           0.8750000           0.5238095
9           0.5454545           0.6363636

gen_index(df, c("apple", "banana"), "thickness")

    id group lh_apple_thickness lh_banana_thickness lh_orange_thickness rh_apple_thickness rh_banana_thickness rh_orange_thickness lh_apple_volume lh_banana_volume
1 s001     0                  1                   1                   2                  3                   2                   3               1                1
2 s002     1                  2                   3                   4                  7                   4                   6               2                3
3 s003     2                  3                   5                   6                  2                   5                   2               3                5
4 s004     1                  4                   7                   8                  1                   4                   4               4                7
5 s005     2                  5                   9                  10                  5                   2                   5               5                9
6 s006     0                  6                  11                  12                  4                   4                   6               6               11
7 s007     0                  7                  13                  14                  2                   3                   2               7               13
8 s008     1                  8                  15                  16                  6                   1                   5               8               15
9 s009     2                  9                  17                  18                 11                   5                   4               9               17
  lh_orange_volume rh_apple_volume rh_banana_volume rh_orange_volume apple_thickness_index banana_thickness_index
1                2               3                2                3            -0.5000000             -0.3333333
2                4               7                4                6            -0.5555556             -0.1428571
3                6               2                5                2             0.2000000              0.0000000
4                8               1                4                4             0.6000000              0.2727273
5               10               5                2                5             0.0000000              0.6363636
6               12               4                4                6             0.2000000              0.4666667
7               14               2                3                2             0.5555556              0.6250000
8               16               6                1                5             0.1428571              0.8750000
9               18              11                5                4            -0.1000000              0.5454545