R 基于标头将数据帧拆分为2个数据帧

R 基于标头将数据帧拆分为2个数据帧,r,R,我想根据标题中的T和C分割数据帧。我想把C放在T下面,并添加一列,指定此行是属于T还是C。请看我下面的例子 > head(Merged_Columns) SYMBOL T0h T0.25h T0.5h T1h T2h T3h T6h T12h T24h T48h C0h C0.25h 536 Mki67 51.8604 50.6255 48.0860 49.5517 40

我想根据标题中的
T
C
分割数据帧。我想把
C
放在
T
下面,并添加一列,指定此行是属于
T
还是
C
。请看我下面的例子

> head(Merged_Columns)
     SYMBOL      T0h   T0.25h    T0.5h      T1h     T2h      T3h      T6h     T12h     T24h     T48h      C0h   C0.25h
536   Mki67  51.8604  50.6255  48.0860  49.5517  40.273  39.5324  55.7545  54.3527  44.5083  42.9394  50.0313  50.1149
1210   Calr 245.4650 261.7930 258.8250 273.9030 357.174 436.1670 630.2560 667.4980 599.0280 539.8510 255.5500 300.9920
1512  Cd180 110.8700 128.8450 138.1540 149.1230 186.518 185.0130 163.5700 119.1490 124.0210  92.2006 118.5150 143.2500
1523   Mcl1 206.7630 228.0150 240.7600 248.2550 167.791 111.5800  68.5703  76.1921  90.7829 102.3770 196.3040 198.2640
1753   Scd1  79.5991  87.3635  84.2132  93.7678 136.711 138.4380  77.9293  49.1002  58.5614  71.3752  80.0855  87.3873
1754   Scd2  83.4522  86.2435  81.8103  80.8914 126.018 146.6090 176.7760 123.4130 243.7200 291.1610  80.3333  96.0904
        C0.5h      C1h      C2h      C3h      C6h     C12h     C24h     C48h
536   47.0657  45.9766  45.4482  44.7699  68.5744  66.6451  59.2015  59.6688
1210 259.5850 297.2010 375.9320 470.6640 606.1660 583.2740 568.1570 363.0990
1512 148.5380 182.7170 218.5140 202.3130 204.4120 194.4180 241.9570 141.9830
1523 211.4390 200.1030 138.0060 108.3210  97.3858 119.8560 119.1730  99.3792
1753  83.9854  97.6568 138.0320 139.2210 105.2110  93.9231 108.5130 147.4040
1754  79.3060  95.2863 124.3770 136.9830 165.4720  91.6879 196.9550 279.3140
输出:

> head(Merged_Columns)
 Cond SYMBOL      0h     0.25h     0.5h       1h      2h       3h       6h      12h      24h      48h 
  T    Mki67    51.8604  50.6255  48.0860  49.5517  40.273  39.5324  55.7545  54.3527  44.5083  42.9394  
  T    Calr     245.4650 261.7930 258.8250 273.9030 357.174 436.1670 630.2560 667.4980 599.0280 539.8510 
  T    Cd180    110.8700 128.8450 138.1540 149.1230 186.518 185.0130 163.5700 119.1490 124.0210  92.2006 
  T    Mcl1     206.7630 228.0150 240.7600 248.2550 167.791 111.5800  68.5703  76.1921  90.7829 102.3770 
  T    Scd1     79.5991  87.3635  84.2132  93.7678 136.711 138.4380  77.9293  49.1002  58.5614  71.3752  
  T    Scd2     83.4522  86.2435  81.8103  80.8914 126.018 146.6090 176.7760 123.4130 243.7200 291.1610       
  C    Mki67    50.0313  50.1149  47.0657  45.9766  45.4482  44.7699  68.5744  66.6451  59.2015  59.6688
  C    Calr     255.5500 300.9920 259.5850 297.2010 375.9320 470.6640 606.1660 583.2740 568.1570 363.0990
  C    Cd180    118.5150 143.2500  148.5380 182.7170 218.5140 202.3130 204.4120 194.4180 241.9570 141.9830
  C    Mcl1     196.3040 198.2640 211.4390 200.1030 138.0060 108.3210  97.3858 119.8560 119.1730  99.3792
  C    Scd1     80.0855 87.3873 83.9854  97.6568 138.0320 139.2210 105.2110  93.9231 108.5130 147.4040
  C    Scd2     80.3333 96.0904 79.3060  95.2863 124.3770 136.9830 165.4720  91.6879 196.9550 279.3140

这里有一个可能的解决方案,就像在评论中建议的那样

id <- gsub("(.).*", "\\1", names(df)) # Getting all the first letters from the column names
id1 <- id == "T" # Getting columns that start with "T"
id2 <- id == "C" # Getting columns that start with "C"

n1 <- names(df[id1])
n2 <- names(df[id2])

cbind(Cond = rep(unique(gsub("(.).*", "\\1", c(n1, n2))), each = nrow(df)), 
      df[1],
      rbind(setNames(df[id1], gsub("[A-Z]", "", n1)), 
            setNames(df[id2], gsub("[A-Z]", "", n2))))

#    Cond SYMBOL       0h    0.25h     0.5h       1h       2h       3h       6h      12h      24h      48h
# 1     T  Mki67  51.8604  50.6255  48.0860  49.5517  40.2730  39.5324  55.7545  54.3527  44.5083  42.9394
# 2     T   Calr 245.4650 261.7930 258.8250 273.9030 357.1740 436.1670 630.2560 667.4980 599.0280 539.8510
# 3     T  Cd180 110.8700 128.8450 138.1540 149.1230 186.5180 185.0130 163.5700 119.1490 124.0210  92.2006
# 4     T   Mcl1 206.7630 228.0150 240.7600 248.2550 167.7910 111.5800  68.5703  76.1921  90.7829 102.3770
# 5     T   Scd1  79.5991  87.3635  84.2132  93.7678 136.7110 138.4380  77.9293  49.1002  58.5614  71.3752
# 6     T   Scd2  83.4522  86.2435  81.8103  80.8914 126.0180 146.6090 176.7760 123.4130 243.7200 291.1610
# 7     C  Mki67  50.0313  50.1149  47.0657  45.9766  45.4482  44.7699  68.5744  66.6451  59.2015  59.6688
# 8     C   Calr 255.5500 300.9920 259.5850 297.2010 375.9320 470.6640 606.1660 583.2740 568.1570 363.0990
# 9     C  Cd180 118.5150 143.2500 148.5380 182.7170 218.5140 202.3130 204.4120 194.4180 241.9570 141.9830
# 10    C   Mcl1 196.3040 198.2640 211.4390 200.1030 138.0060 108.3210  97.3858 119.8560 119.1730  99.3792
# 11    C   Scd1  80.0855  87.3873  83.9854  97.6568 138.0320 139.2210 105.2110  93.9231 108.5130 147.4040
# 12    C   Scd2  80.3333  96.0904  79.3060  95.2863 124.3770 136.9830 165.4720  91.6879 196.9550 279.3140

id这里有一个可能的解决方案,就像在评论中建议的那样

id <- gsub("(.).*", "\\1", names(df)) # Getting all the first letters from the column names
id1 <- id == "T" # Getting columns that start with "T"
id2 <- id == "C" # Getting columns that start with "C"

n1 <- names(df[id1])
n2 <- names(df[id2])

cbind(Cond = rep(unique(gsub("(.).*", "\\1", c(n1, n2))), each = nrow(df)), 
      df[1],
      rbind(setNames(df[id1], gsub("[A-Z]", "", n1)), 
            setNames(df[id2], gsub("[A-Z]", "", n2))))

#    Cond SYMBOL       0h    0.25h     0.5h       1h       2h       3h       6h      12h      24h      48h
# 1     T  Mki67  51.8604  50.6255  48.0860  49.5517  40.2730  39.5324  55.7545  54.3527  44.5083  42.9394
# 2     T   Calr 245.4650 261.7930 258.8250 273.9030 357.1740 436.1670 630.2560 667.4980 599.0280 539.8510
# 3     T  Cd180 110.8700 128.8450 138.1540 149.1230 186.5180 185.0130 163.5700 119.1490 124.0210  92.2006
# 4     T   Mcl1 206.7630 228.0150 240.7600 248.2550 167.7910 111.5800  68.5703  76.1921  90.7829 102.3770
# 5     T   Scd1  79.5991  87.3635  84.2132  93.7678 136.7110 138.4380  77.9293  49.1002  58.5614  71.3752
# 6     T   Scd2  83.4522  86.2435  81.8103  80.8914 126.0180 146.6090 176.7760 123.4130 243.7200 291.1610
# 7     C  Mki67  50.0313  50.1149  47.0657  45.9766  45.4482  44.7699  68.5744  66.6451  59.2015  59.6688
# 8     C   Calr 255.5500 300.9920 259.5850 297.2010 375.9320 470.6640 606.1660 583.2740 568.1570 363.0990
# 9     C  Cd180 118.5150 143.2500 148.5380 182.7170 218.5140 202.3130 204.4120 194.4180 241.9570 141.9830
# 10    C   Mcl1 196.3040 198.2640 211.4390 200.1030 138.0060 108.3210  97.3858 119.8560 119.1730  99.3792
# 11    C   Scd1  80.0855  87.3873  83.9854  97.6568 138.0320 139.2210 105.2110  93.9231 108.5130 147.4040
# 12    C   Scd2  80.3333  96.0904  79.3060  95.2863 124.3770 136.9830 165.4720  91.6879 196.9550 279.3140

id也许不是最优雅的解决方案,但下面应该可以做到这一点

cdat<-dat[ , grep("C", names(dat))]
tdat<-dat[ , grep("T", names(dat))]

colnames(cdat)<-c("0h", "0.25h", "0.5h", "1h", "2h", "3h", "6h", "12h", "24h", "48h") 
colnames(tdat)<-c("0h", "0.25h", "0.5h", "1h", "2h", "3h", "6h", "12h", "24h", "48h") 

df<-rbind(cdat, tdat)

df$SYMBOL<-rep(dat$SYMBOL, 2)
df$Cond<-rep(c("T", "C"), each=6)

cdat也许不是最优雅的解决方案,但下面应该可以做到这一点

cdat<-dat[ , grep("C", names(dat))]
tdat<-dat[ , grep("T", names(dat))]

colnames(cdat)<-c("0h", "0.25h", "0.5h", "1h", "2h", "3h", "6h", "12h", "24h", "48h") 
colnames(tdat)<-c("0h", "0.25h", "0.5h", "1h", "2h", "3h", "6h", "12h", "24h", "48h") 

df<-rbind(cdat, tdat)

df$SYMBOL<-rep(dat$SYMBOL, 2)
df$Cond<-rep(c("T", "C"), each=6)

cdatI您可以简单地
rbind
列2:11和12:21,然后添加第一列…我可以简单地
rbind
列2:11和12:21,然后添加第一列…我得到了这个错误,可能是因为第一列有数字:>dft感谢您注意到这一点!原因是列名不同。我现在将colnames步骤放在rbind之前,现在应该可以工作了!我犯了这个错误,可能是因为第一列的数字:>谢谢你注意到这一点!原因是列名不同。我现在将colnames步骤放在rbind之前,现在应该可以工作了!