R 在某些列上取平均值
我有这样的数据R 在某些列上取平均值,r,R,我有这样的数据 > dput(head(my_data)) structure(list(`Sample Name` = c("ACTB", "ATP5F1", "DDX5", "EEF1G", "GAPDH", "NCL"), `31-29TRG1R1_1` = c(4526, 884, 11044, 10661, 31760, 9417), `31
> dput(head(my_data))
structure(list(`Sample Name` = c("ACTB", "ATP5F1", "DDX5", "EEF1G",
"GAPDH", "NCL"), `31-29TRG1R1_1` = c(4526, 884, 11044, 10661,
31760, 9417), `31-25TRG2R1_1` = c(5311, 841, 6401, 16073, 20432,
4969), `31-18TRG1R1_1` = c(12487, 567, 13945, 16474, 43309, 11831
), `31-44TRG2R1_1` = c(9477, 1244, 7140, 6580, 12457, 5176),
`31-32TRG2R1_1` = c(13386, 1535, 14153, 11036, 26186, 8299
), `31-21TRG3R1_1` = c(8511, 1703, 8966, 14800, 57140, 7002
), `31-14TRG4R1_1` = c(9935, 1841, 19590, 16467, 34281, 12463
), `31-19TRG4R1_1` = c(5211, 917, 9144, 8024, 8200, 3935),
`31-29TRG1R2_1` = c(3181, 613, 7584, 7603, 19331, 6286),
`31-25TRG2R2_1` = c(115931, 11738, 88604, 255822, 447002,
70197), `31-23TRG1R1_1` = c(3156, 765, 18034, 18982, 17237,
18880), `31-RESECTION1_1` = c(3470, 506, 8858, 11829, 29934,
7857), `31-32TRG2R2_1` = c(241372, 21547, 165886, 138726,
442723, 94629), `31-21TRG3R2_1` = c(5102, 1217, 5977, 10041,
35615, 4740), `31-14TRG4R2_1` = c(5352, 1029, 14053, 10995,
18206, 8633), `31-24TRG4R1_1` = c(6990, 761, 4440, 2833,
8150, 1340), `31-46TRG1R1_1` = c(4290, 834, 8146, 22078,
13631, 7407), `31-27TRG2R1_1` = c(11262, 1153, 10063, 13690,
19430, 6470), `31-33TRG1R1_1` = c(4139, 338, 8260, 8650,
13916, 8000), `31-RESECTION1_2` = c(2812, 497, 8503, 10674,
22677, 7394), `31-53TRG2R1_1` = c(7438, 921, 6897, 10042,
15142, 8073), `31-22TRG3R1_1` = c(4784, 621, 9904, 7049,
13977, 4279), `31-6TRG4R1_1` = c(6749, 704, 9638, 12920,
14396, 8133), `31-17TRG3R1_1` = c(10074, 1056, 7966, 12489,
26819, 6404), `31-46TRG1R2_1` = c(6586, 1160, 10305, 30012,
21850, 9848), `31-27TRG2R2_1` = c(10368, 866, 7324, 10047,
17939, 4943), `31-5TRG2R1_1` = c(5479, 440, 8913, 7103, 15886,
5801), `31-RESECTION1_3` = c(3032, 474, 8484, 11152, 24847,
7741), `31-53TRG2R2_1` = c(10587, 1315, 8675, 12923, 23882,
10213), `31-22TRG3R2_1` = c(4885, 660, 9710, 7115, 15669,
4908), `31-6TRG4R2_1` = c(6683, 650, 7987, 11447, 14364,
7236), `31-16TRG4R1_1` = c(5428, 825, 11789, 12018, 6812,
5954), `31-9TRG1R1_1` = c(5723, 494, 7504, 5591, 9086, 2966
), `31-26TRG3R1_1` = c(2564, 465, 2115, 2810, 4022, 955),
`31-20TRG2R1_1` = c(1018, 491, 2325, 3460, 3469, 1863), `31-RESECTION2_1` = c(1011,
285, 2893, 4345, 6476, 1818), `31-52TRG2R1_1` = c(4128, 622,
5952, 5196, 7428, 4181), `31-38TRG3R1_1` = c(2281, 521, 12043,
8111, 16866, 5207), `31-49TRG4R1_1` = c(4566, 453, 8521,
9618, 26104, 8663), `31-15TRG4R1_1` = c(6501, 947, 10320,
7285, 10538, 4638), `31-9TRG1R2_1` = c(13212, 1131, 14045,
11418, 21843, 6245), `31-26TRG3R2_1` = c(5832, 972, 3012,
4851, 7559, 1560), `31-37TRG2R1_1` = c(7031, 690, 8208, 17495,
28514, 7058), `31-RESECTION2_2` = c(4095, 620, 6319, 10473,
25292, 4274), `31-52TRG2R1_2` = c(10240, 2169, 19618, 17052,
18671, 13688), `31-38TRG3R2_1` = c(5009, 874, 20191, 14132,
34612, 8855), `31-49TRG4R2_1` = c(10911, 1131, 19318, 21560,
58967, 19450), `31-48TRG2R1_1` = c(8758, 1002, 8044, 6677,
17354, 7355), `31-2TRG1R1_1` = c(10461, 1904, 15286, 16249,
15900, 14393), `31-36TRG3R1_1` = c(16430, 1401, 9522, 17646,
26764, 8063), `31-51TRG2R1_1` = c(151880, 16572, 93610, 110556,
303604, 57029), `31-RESECTION2_3` = c(9686, 1435, 14038,
23464, 61018, 8921), `31-42TRG2R1_1` = c(38649, 3795, 24468,
36134, 68794, 17827), `31-3TRG4R1_1` = c(14421, 1901, 34905,
21004, 47952, 12428), `31-35TRG4R1_1` = c(59268, 4752, 73216,
64854, 212997, 37064), `31-50TRG4R1_1` = c(6660, 701, 4092,
16796, 7958, 2408), `31-2TRG1R2_1` = c(4845, 1086, 9191,
8887, 6692, 8119), `31-36TRG3R2_1` = c(8418, 868, 6832, 11279,
13530, 5233), `31-28TRG4R1_1` = c(7980, 1343, 7342, 9874,
14286, 4255), `31-RESECTION3_1` = c(4330, 832, 4446, 6208,
13131, 8312), `31-42TRG2R2_1` = c(6745, 1007, 8185, 9790,
11478, 5764), `31-3TRG4R2_1` = c(4080, 769, 14648, 8343,
14580, 5035), `31-35TRG4R2_1` = c(5290, 684, 11139, 9057,
18928, 5325), `31-12TRG4R1_1` = c(7812, 1326, 5750, 9204,
12352, 5489), `31-40TRG1R1_1` = c(14399, 922, 13135, 20975,
50560, 9952), `31-30TRG5R1_1` = c(24261, 1986, 14361, 26032,
89023, 20479), `31-30TRG5R2_1` = c(24454, 2196, 16827, 27964,
93862, 21350), `31-34TRG3R1_1` = c(37303, 2998, 22122, 30431,
51981, 11737), `31-RESECTION3_2` = c(20019, 1158, 8349, 14393,
49299, 17807), `31-7TRG3R1_1` = c(22085, 1431, 12012, 15730,
40418, 10271), `31-43TRG3R1_1` = c(11531, 676, 13557, 6762,
33468, 5563), `31-11TRG4R1_1` = c(28845, 3840, 28234, 45017,
85101, 17277), `31-cells_1` = c(5252, 2077, 6668, 36973,
39909, 15035), `31-40TRG1R2_1` = c(43316, 3278, 40684, 65687,
157910, 29137), `31-39TRG3R1_1` = c(28273, 2169, 14697, 20890,
68353, 25293), `31-RESECTION3_3` = c(27828, 2144, 13246,
20487, 69043, 26125), `31-7TRG3R2_1` = c(10299, 879, 6798,
8131, 19650, 5182), `31-43TRG3R2_1` = c(16109, 918, 18933,
8892, 43115, 6850), `31-11TRG4R2_1` = c(20847, 2748, 22810,
32811, 60942, 11978), `31-cells_2` = c(8909, 3453, 10518,
49209, 46428, 19275), `31-41TRG1R1_1` = c(12732, 883, 11319,
12157, 40460, 10400), `31-8TRG1R1_1` = c(11408, 1007, 11396,
20424, 38188, 9570), `31-1TRG3R1_1` = c(9928, 1112, 5726,
6227, 12942, 3644), `31-RNA_1` = c(7453, 1324, 4461, 15932,
47845, 13711), `31-10TRG3R1_1` = c(13214, 1355, 13559, 14508,
31296, 9096), `31-31TRG4R1_1` = c(7230, 769, 11198, 11052,
39781, 12827), `31-45TRG3R1_1` = c(12182, 573, 2818, 3699,
4365, 1639), `31-cells_3` = c(6517, 1899, 5144, 23862, 30177,
8443), `31-41TRG1R2_1` = c(15229, 1485, 15090, 15091, 54369,
13341), `31-13TRG1R1_1` = c(15332, 1162, 18268, 20875, 62257,
10614), `31-4TRG3R1_1` = c(9947, 1115, 7267, 5957, 13831,
2793), `31-RNA_2` = c(10629, 2135, 8014, 27834, 92632, 25041
), `31-10TRG3R2_1` = c(18232, 2010, 19637, 19973, 46075,
12244), `31-31TRG4R2_1` = c(6362, 1110, 14307, 12233, 44153,
13017), `31-47TRG3R1_1` = c(14419, 1927, 7350, 10375, 15736,
3415), `31-RNA_3` = c(11938, 1911, 5565, 16496, 49358, 17899
)), row.names = c(NA, -6L), class = c("tbl_df", "tbl", "data.frame"
))
>
例如,有些列是重复的
31-30TRG5R1_1
和31-30TRG5R2_1
是单个样本的重复,我想对其取平均值,并创建一个名为31-30TRG5
在我的数据中,我有很多这样的例子,名字的第一部分与R1_1
和R2_1
喜欢图片
[![在此处输入图像描述][1][1]
是否可以使用R函数对这些样本进行平均,而不是手动定位它们并对它们的基因进行平均?我将首先“修复”损坏的数据,因为您不能有两行列标题。最好的解决方法是从正确导入的数据开始,可能在读入数据时跳过其中一个标题行。缺少那一点
这就假定`Sample Name`
是唯一正确的“字符串”数据,而所有其他数据都是严格的数字
库(dplyr)
colnames(我的_数据)%
总结(
样本=样本[1],#任意选择第一个样本名称
值=平均值(值)
) %>%
解组()%>%
选择(-sample0)%>%
tidyr::pivot_更宽(name_from=“sample”,values_from=“value”)
my_数据2
##tibble:5 x 57
#`Sample Name``31-10TRG3R1_1``31-11TRG4R1_1``31-12TRG4R1_1``31-13TRG1R1_1``31-14TRG4R1_1```
#
#1 ACTB 15723248467812153327644。
#2 ATP5F1 1682。3294 1326 1162 1435
#3 DDX5 16598 25522 5750 18268 16822。
#4 EEF1G 17240。38914 9204 20875 13731
#5 GAPDH 38686。7302212352 62257 26244.
# # ... 还有51个变量:`31-15TRG4R1_1`、`31-16TRG4R1_1`,
##31-17TRG3R1_1`、`31-18TRG1R1_1`、`31-19TRG4R1_1`、`31-1TRG3R1_1`、,
##31-20TRG2R1_1`、`31-21TRG3R1_1`、`31-22TRG3R1_1`、`31-23TRG1R1_1`、,
##31-24TRG4R1_1`、`31-25TRG2R1_1`、`31-26TRG3R1_1`、`31-27TRG2R1_1`、,
##31-28TRG4R1_1`、`31-29TRG1R1_1`、`31-2TRG1R1_1`、`31-30TRG5R1_1`、,
##31-31TRG4R1_1`、`31-32TRG2R1_1`、`31-33TRG1R1_1`、`31-34TRG3R1_1`、,
##31-35TRG4R1_1`、`31-36TRG3R1_1`、`31-37TRG2R1_1`、`31-38TRG3R1_1`、,
##31-39TRG3R1_1`、`31-3TRG4R1_1`、`31-40TRG1R1_1`、`31-41TRG1R1_1`、,
##31-42TRG2R1_1`、`31-43TRG3R1_1`、`31-44TRG2R1_1`、`31-45TRG3R1_1`、,
##31-46TRG1R1_1`、`31-47TRG3R1_1`、`31-48TRG2R1_1`、`31-49TRG4R1_1`、,
##31-4TRG3R1_1`、`31-50TRG4R1_1`、`31-51TRG2R1_1`、`31-52TRG2R1_1`、,
##31-53TRG2R1_1`、`31-5TRG2R1_1`、`31-6TRG4R1_1`、`31-7TRG3R1_1`、,
##`31-8TRG1R1_1`、`31-9TRG1R1_1`、`31-cells_1`、`31-RESECTION1_1`、,
##`31-RNA_1`
作为删除内容的演示,我们将区分列名,此处列出的列名已删除(平均为原始样本名):
setdiff(名称(我的_数据)、名称(我的_数据2))
#[1]“31-29TRG1R2_1”“31-25TRG2R2_1”“31-32TRG2R2_1”“31-21TRG3R2_1”“31-14TRG4R2_1”
#[6]“31-RESECTION1_2”“31-46TRG1R2_1”“31-27TRG2R2_1”“31-RESECTION1_3”“31-53TRG2R2_1”
#[11]“31-22TRG3R2_1”“31-6TRG4R2_1”“31-2切除术”“31-9TRG1R2_1”“31-26TRG3R2_1”
#[16]“31-TRG2R1\U 2”“31-52TRG2R1\U 2”“31-38TRG3R2\U 1”“31-49TRG4R2\U 1”“31-TRG2R1\U 3”
#[21]“31-2TRG1R2\u 1”“31-36TRG3R2\u 1”“31-42TRG2R2\u 1”“31-3TRG4R2\u 1”
#[26]“31-35TRG4R2_1”“31-30TRG5R2_1”“31-切除3_2”“31-40TRG1R2_1”“31-切除3_3”
#[31]“31-7TRG3R2_1”“31-43TRG3R2_1”“31-11TRG4R2_1”“31-cells_2”“31-cells_3”
#[36]“31-41TRG1R2_1”“31-RNA_2”“31-10TRG3R2_1”“31-31TRG4R2_1”“31-RNA_3”
我将首先“修复”损坏的数据,因为您不能有两行列标题。最好的解决方法是从正确导入的数据开始,可能在读入数据时跳过其中一个标题行。缺少那一点
这就假定`Sample Name`
是唯一正确的“字符串”数据,而所有其他数据都是严格的数字
库(dplyr)
colnames(我的_数据)%
总结(
样本=样本[1],#任意选择第一个样本名称
值=平均值(值)
) %>%
解组()%>%
选择(-sample0)%>%
tidyr::pivot_更宽(name_from=“sample”,values_from=“value”)
my_数据2
##tibble:5 x 57
#`Sample Name``31-10TRG3R1_1``31-11TRG4R1_1``31-12TRG4R1_1``31-13TRG1R1_1``31-14TRG4R1_1```
#
#1 ACTB 15723248467812153327644。
#2 ATP5F1 1682。3294 1326 1162 1435
#3 DDX5 16598 25522 5750 18268 16822。
#4 EEF1G 17240。38914 9204 20875 13731
#5 GAPDH 38686。7302212352 62257 26244.
# # ... 还有51个变量:`31-15TRG4R1_1`、`31-16TRG4R1_1`,
##31-17TRG3R1_1`、`31-18TRG1R1_1`、`31-19TRG4R1_1`、`31-1TRG3R1_1`、,
##31-20TRG2R1_1`、`31-21TRG3R1_1`、`31-22TRG3R1_1`、`31-23TRG1R1_1`、,
##31-24TRG4R1_1`、`31-25TRG2R1_1`、`31-26TRG3R1_1`、`31-27TRG2R1_1`、,
##31-28TRG4R1_1`、`31-29TRG1R1_1`、`31-2TRG1R1_1`、`31-30TRG5R1_1`、,
##31-31TRG4R1_1`、`31-32TRG2R1_1`、`31-33TRG1R1_1`、`31-34TRG3R1_1`、,
##31-35TRG4R1_1`、`31-36TRG3R1_1`、`31-37TRG2R1_1`、`31-38TRG3R1_1`、,
##31-39TRG3R1_1`、`31-3TRG4R1_1`、`31-40TRG1R1_1`、`31-41TRG1R1_1`、,
##31-42TRG2R1_1`、`31-43TRG3R1_1`、`31-44TRG2R1_1`、`31-45TRG3R1_1`、,
##`31-46T
result <- cbind(my_data[1], round(sapply(split.default(my_data[-1],
sub('\\d_\\d$', '', names(my_data)[-1])), rowMeans, na.rm = TRUE)))
sub('\\d_\\d$', '', names(my_data)[-1])
# [1] "31-29TRG1R" "31-25TRG2R" "31-18TRG1R" "31-44TRG2R" "31-32TRG2R" "31-21TRG3R"
# [7] "31-14TRG4R" "31-19TRG4R" "31-29TRG1R" "31-25TRG2R" "31-23TRG1R" "31-RESECTION"
#[13] "31-32TRG2R" "31-21TRG3R" "31-14TRG4R" "31-24TRG4R" "31-46TRG1R" "31-27TRG2R"
#[19] "31-33TRG1R" "31-RESECTION" "31-53TRG2R" "31-22TRG3R" "31-6TRG4R" "31-17TRG3R"
#[25] "31-46TRG1R" "31-27TRG2R" "31-5TRG2R" "31-RESECTION" "31-53TRG2R" "31-22TRG3R"
#[31] "31-6TRG4R" "31-16TRG4R" "31-9TRG1R" "31-26TRG3R" "31-20TRG2R" "31-RESECTION"
#[37] "31-52TRG2R" "31-38TRG3R" "31-49TRG4R" "31-15TRG4R" "31-9TRG1R" "31-26TRG3R"
#[43] "31-37TRG2R" "31-RESECTION" "31-52TRG2R" "31-38TRG3R" "31-49TRG4R" "31-48TRG2R"
#[49] "31-2TRG1R" "31-36TRG3R" "31-51TRG2R" "31-RESECTION" "31-42TRG2R" "31-3TRG4R"
#[55] "31-35TRG4R" "31-50TRG4R" "31-2TRG1R" "31-36TRG3R" "31-28TRG4R" "31-RESECTION"
#[61] "31-42TRG2R" "31-3TRG4R" "31-35TRG4R" "31-12TRG4R" "31-40TRG1R" "31-30TRG5R"
#[67] "31-30TRG5R" "31-34TRG3R" "31-RESECTION" "31-7TRG3R" "31-43TRG3R" "31-11TRG4R"
#[73] "31-cells_1" "31-40TRG1R" "31-39TRG3R" "31-RESECTION" "31-7TRG3R" "31-43TRG3R"
[#79] "31-11TRG4R" "31-cells_2" "31-41TRG1R" "31-8TRG1R" "31-1TRG3R" "31-RNA_1"
[#85] "31-10TRG3R" "31-31TRG4R" "31-45TRG3R" "31-cells_3" "31-41TRG1R" "31-13TRG1R"
#[91] "31-4TRG3R" "31-RNA_2" "31-10TRG3R" "31-31TRG4R" "31-47TRG3R" "31-RNA_3"