R 根据不同的组条件创建列

R 根据不同的组条件创建列,r,if-statement,select,multiple-columns,R,If Statement,Select,Multiple Columns,我有一个不同列的数据集。看起来是这样 df <- data.frame(PatientID = c("0002" ,"0004", "0005", "0006" ,"0009" ,"0010" ,"0018", "0019" ,"0020" ,"0027", "0039"

我有一个不同列的数据集。看起来是这样

df <- data.frame(PatientID = c("0002" ,"0004", "0005", "0006" ,"0009" ,"0010" ,"0018", "0019" ,"0020" ,"0027", "0039" ,"0041" ,"0042", "0043" ,"0044" ,"0045", "0046", "0047" ,"0048" ,"0049", "0055"),
                 A = c(987.805 , 977.146 , 790.809 , 964.315 ,1014.020 , 952.311 , 992.967 , 950.797 , 958.975  ,960.712  ,958.117 , 947.465 , 902.852 , 961.417,  985.124  ,994.178 , 930.141 ,1007.790 , 948.848, 1027.110 , 999.414),
                 B = c(998.988 , 972.606 , 998.680 , 955.037 , 972.941 ,1020.560 , 947.751 ,1029.560 , 955.540 , 911.606 , 964.039   ,    NA,  988.087 , 902.367 , 959.338 ,1029.050 , 925.162 , 987.374 ,1066.400  ,957.512 , 917.597),
                 C = c( 975.634 , 987.140 , 961.810 , 929.466 , 978.166, 1005.820  ,925.752 , 969.469 , 943.398  ,936.034,  965.292 , 996.404 , 920.610 , 967.047  ,986.565 , 913.517 , 893.428 , 921.606 , 976.192 , 929.590  ,950.493), 
D = c(975.634 , 987.140 , 961.810 , 929.466 , 978.166, 1005.820 , 925.752 , 969.469  ,943.398 , 936.034 , 965.292 , 996.404 , 920.610 , 967.047 , 986.565 , 913.517 , 893.428 , 921.606 , 976.192 , 929.590 , 950.493),
E = c(1006.330, 1028.070 , 975.554 , 954.274 ,1005.910  ,949.969 , 992.820 , 977.048  ,934.407 , 948.913 , 944.578 , 917.564 , 975.301,  961.375  ,955.296 , 961.128  ,998.119 ,1009.110 , 994.891 ,1000.170  ,982.763),
G= c(951.684 , 958.990 , 944.432 , 944.654 , 924.680 , 955.927 , 972.674 , 949.384  ,973.348 , 984.392 , 943.894 , 961.468 , 995.368 , 994.997 , 973.175 , 979.454 , 952.605 , 930.744  ,   NA, 1015.150 , 956.507), stringsAsFactors = F)
df990
  • 对于C和D->1000
  • 对于E和G->1005
  • 否则为
    FALSE

    基本上,要使最后一列为真,需要有3列或更多列为真。输出如下所示(高于阈值==TRUE,以绿色绘制):

    我该怎么设置呢我希望这是清楚的,但如果不清楚,请立即询问


    非常感谢

    我们创建一个命名的
    列表
    (或一个命名的
    向量
    ),在“PatientID”以外的列之间循环,用列名(
    cur\u column()
    )提取
    列表
    元素,通过在
    .names
    中添加后缀
    \u new
    创建一个新的逻辑列,然后使用
    rowSums
    检查每行的TRUE数是否大于或等于3,以创建“高于_阈值”

    library(dplyr)
    lst1 <- list(A = 990, B = 990, C = 1000, D = 1000, E = 1005, G = 1005)
    
    df %>% 
        mutate(across(A:G,  ~ . > lst1[[cur_column()]],
           .names = '{.col}_new'), 
         above_threshold = rowSums(select(cur_data(), ends_with('new')), 
                na.rm = TRUE) >=3) %>%
        select(names(df), above_threshold)
    

    最后,在神奇的akrun的大力帮助下,以下是解决方案:

    df %>% mutate(above_treshold = rowSums(cbind(A >= 990, B >= 990, C>=1000, D >= 1000, E >=1005, G >=1005), na.rm = TRUE) >=3)
    
    输出:

       PatientID        A        B        C        D        E        G above_treshold
    1       0002  987.805  998.988  975.634  975.634 1006.330  951.684          FALSE
    2       0004  977.146  972.606  987.140  987.140 1028.070  958.990          FALSE
    3       0005  790.809  998.680  961.810  961.810  975.554  944.432          FALSE
    4       0006  964.315  955.037  929.466  929.466  954.274  944.654          FALSE
    5       0009 1014.020  972.941  978.166  978.166 1005.910  924.680          FALSE
    6       0010  952.311 1020.560 1005.820 1005.820  949.969  955.927           TRUE
    7       0018  992.967  947.751  925.752  925.752  992.820  972.674          FALSE
    8       0019  950.797 1029.560  969.469  969.469  977.048  949.384          FALSE
    9       0020  958.975  955.540  943.398  943.398  934.407  973.348          FALSE
    10      0027  960.712  911.606  936.034  936.034  948.913  984.392          FALSE
    11      0039  958.117  964.039  965.292  965.292  944.578  943.894          FALSE
    12      0041  947.465       NA  996.404  996.404  917.564  961.468          FALSE
    13      0042  902.852  988.087  920.610  920.610  975.301  995.368          FALSE
    14      0043  961.417  902.367  967.047  967.047  961.375  994.997          FALSE
    15      0044  985.124  959.338  986.565  986.565  955.296  973.175          FALSE
    16      0045  994.178 1029.050  913.517  913.517  961.128  979.454          FALSE
    17      0046  930.141  925.162  893.428  893.428  998.119  952.605          FALSE
    18      0047 1007.790  987.374  921.606  921.606 1009.110  930.744          FALSE
    19      0048  948.848 1066.400  976.192  976.192  994.891       NA          FALSE
    20      0049 1027.110  957.512  929.590  929.590 1000.170 1015.150          FALSE
    21      0055  999.414  917.597  950.493  950.493  982.763  956.507          FALSE
    
    基础

    使用@akrun思想

    standard <- c(A = 990, B = 990, C = 1000, D = 1000, E = 1005, G = 1005)
    tmp <- sweep(df[, -1], MARGIN = 2, STATS = standard, FUN = `>=`)
    
    df$res <- apply(tmp, 1, function(x) sum(x, na.rm = TRUE) >= 3)
    
       PatientID        A        B        C        D        E        G   res
    1       0002  987.805  998.988  975.634  975.634 1006.330  951.684 FALSE
    2       0004  977.146  972.606  987.140  987.140 1028.070  958.990 FALSE
    3       0005  790.809  998.680  961.810  961.810  975.554  944.432 FALSE
    4       0006  964.315  955.037  929.466  929.466  954.274  944.654 FALSE
    5       0009 1014.020  972.941  978.166  978.166 1005.910  924.680 FALSE
    6       0010  952.311 1020.560 1005.820 1005.820  949.969  955.927  TRUE
    7       0018  992.967  947.751  925.752  925.752  992.820  972.674 FALSE
    8       0019  950.797 1029.560  969.469  969.469  977.048  949.384 FALSE
    9       0020  958.975  955.540  943.398  943.398  934.407  973.348 FALSE
    10      0027  960.712  911.606  936.034  936.034  948.913  984.392 FALSE
    11      0039  958.117  964.039  965.292  965.292  944.578  943.894 FALSE
    12      0041  947.465       NA  996.404  996.404  917.564  961.468 FALSE
    13      0042  902.852  988.087  920.610  920.610  975.301  995.368 FALSE
    14      0043  961.417  902.367  967.047  967.047  961.375  994.997 FALSE
    15      0044  985.124  959.338  986.565  986.565  955.296  973.175 FALSE
    16      0045  994.178 1029.050  913.517  913.517  961.128  979.454 FALSE
    17      0046  930.141  925.162  893.428  893.428  998.119  952.605 FALSE
    18      0047 1007.790  987.374  921.606  921.606 1009.110  930.744 FALSE
    19      0048  948.848 1066.400  976.192  976.192  994.891       NA FALSE
    20      0049 1027.110  957.512  929.590  929.590 1000.170 1015.150 FALSE
    21      0055  999.414  917.597  950.493  950.493  982.763  956.507 FALSE
    

    standard感谢Akrun!-您能确认这考虑到阈值以上的3列或更多列吗?我在任何地方都看不到>3。而且,第一个应该是A:B,对吗?谢谢@Lili只需将
    cross
    更改为
    cross(A:G
    @TarJae您的代码在A:G上循环,然后单独提取A:F。不是吗clear@TarJae你是说
    df%>%变异(x=rowSums(cbind(A>=900,B>=900,C>=1000,D>=1000,E>=1005,G>=1005),na.rm=TRUE)>=3)
    很高兴我的问题激发了更多的问题!!哇哦!:)
    standard <- c(A = 990, B = 990, C = 1000, D = 1000, E = 1005, G = 1005)
    tmp <- sweep(df[, -1], MARGIN = 2, STATS = standard, FUN = `>=`)
    
    df$res <- apply(tmp, 1, function(x) sum(x, na.rm = TRUE) >= 3)
    
       PatientID        A        B        C        D        E        G   res
    1       0002  987.805  998.988  975.634  975.634 1006.330  951.684 FALSE
    2       0004  977.146  972.606  987.140  987.140 1028.070  958.990 FALSE
    3       0005  790.809  998.680  961.810  961.810  975.554  944.432 FALSE
    4       0006  964.315  955.037  929.466  929.466  954.274  944.654 FALSE
    5       0009 1014.020  972.941  978.166  978.166 1005.910  924.680 FALSE
    6       0010  952.311 1020.560 1005.820 1005.820  949.969  955.927  TRUE
    7       0018  992.967  947.751  925.752  925.752  992.820  972.674 FALSE
    8       0019  950.797 1029.560  969.469  969.469  977.048  949.384 FALSE
    9       0020  958.975  955.540  943.398  943.398  934.407  973.348 FALSE
    10      0027  960.712  911.606  936.034  936.034  948.913  984.392 FALSE
    11      0039  958.117  964.039  965.292  965.292  944.578  943.894 FALSE
    12      0041  947.465       NA  996.404  996.404  917.564  961.468 FALSE
    13      0042  902.852  988.087  920.610  920.610  975.301  995.368 FALSE
    14      0043  961.417  902.367  967.047  967.047  961.375  994.997 FALSE
    15      0044  985.124  959.338  986.565  986.565  955.296  973.175 FALSE
    16      0045  994.178 1029.050  913.517  913.517  961.128  979.454 FALSE
    17      0046  930.141  925.162  893.428  893.428  998.119  952.605 FALSE
    18      0047 1007.790  987.374  921.606  921.606 1009.110  930.744 FALSE
    19      0048  948.848 1066.400  976.192  976.192  994.891       NA FALSE
    20      0049 1027.110  957.512  929.590  929.590 1000.170 1015.150 FALSE
    21      0055  999.414  917.597  950.493  950.493  982.763  956.507 FALSE