R 合并不同列的数据帧以创建新的数据帧,然后创建一个列表
我有两个带有全局列R 合并不同列的数据帧以创建新的数据帧,然后创建一个列表,r,list,merge,dataframe,R,List,Merge,Dataframe,我有两个带有全局列ID的数据帧 df1: structure(list(ID = c("ILMN_1343291", "ILMN_1343292", "ILMN_1343293", "ILMN_1343294", "ILMN_1651209", "ILMN_1651217"), sample1 = c(15.8694849642673, 11.012283668738, 14.995174021271, 15.5837972369044, 8.08964880277197, 7.83406
ID
的数据帧
df1:
structure(list(ID = c("ILMN_1343291", "ILMN_1343292", "ILMN_1343293",
"ILMN_1343294", "ILMN_1651209", "ILMN_1651217"), sample1 = c(15.8694849642673,
11.012283668738, 14.995174021271, 15.5837972369044, 8.08964880277197,
7.83406791970529), sample2 = c(15.8286939941996, 10.8407625194439,
14.8818391783215, 15.5951964773238, 8.07397790649028, 7.85669590957356
), sample3 = c(15.8694849642673, 11.453825129904, 14.764465720783,
15.6917233494908, 8.12884795713027, 7.86320578481668), sample4 = c(15.8694849642673,
13.1291568945281, 14.8250076629948, 15.7193468523012, 8.07217210194779,
7.84280084254162), sample5 = c(15.8694849642673, 10.223217362866,
14.9595743905588, 15.362353159395, 8.12336672246356, 7.80055774746275
), sample6 = c(15.8694849642673, 11.3892037666505, 15.3837334429764,
15.65923530912, 8.05738048722836, 7.81925084187836)), .Names = c("ID",
"sample1", "sample2", "sample3", "sample4", "sample5", "sample6"
), class = "data.frame", row.names = c(NA, 6L))
structure(list(ID = c("ILMN_1343291", "ILMN_1343292", "ILMN_1343293",
"ILMN_1343294", "ILMN_1651209", "ILMN_1651217"), PVALUE1 = c(0,
0, 0, 0, 0, 0.4675325), PVALUE2 = c(0, 0, 0, 0, 0, 0.3290415),
PVALUE3 = c(0, 0, 0, 0, 0, 0.3074713), PVALUE4 = c(0, 0,
0, 0, 0, 0.3543759), PVALUE5 = c(0, 0, 0, 0, 0, 0.6465517
), PVALUE6 = c(0, 0, 0, 0, 0.001445087, 0.5390173)), .Names = c("ID",
"PVALUE1", "PVALUE2", "PVALUE3", "PVALUE4", "PVALUE5", "PVALUE6"
), class = "data.frame", row.names = c(NA, 6L))
[[1]]
ID sample1 PVALUE1
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 11.012284 0.0000000
3 ILMN_1343293 14.995174 0.0000000
4 ILMN_1343294 15.583797 0.0000000
5 ILMN_1651209 8.089649 0.0000000
6 ILMN_1651217 7.834068 0.4675325
[[2]]
ID sample2 PVALUE2
1 ILMN_1343291 15.828694 0.0000000
2 ILMN_1343292 10.840763 0.0000000
3 ILMN_1343293 14.881839 0.0000000
4 ILMN_1343294 15.595196 0.0000000
5 ILMN_1651209 8.073978 0.0000000
6 ILMN_1651217 7.856696 0.3290415
[[3]]
ID sample3 PVALUE3
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 11.453825 0.0000000
3 ILMN_1343293 14.764466 0.0000000
4 ILMN_1343294 15.691723 0.0000000
5 ILMN_1651209 8.128848 0.0000000
6 ILMN_1651217 7.863206 0.3074713
[[4]]
ID sample4 PVALUE4
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 13.129157 0.0000000
3 ILMN_1343293 14.825008 0.0000000
4 ILMN_1343294 15.719347 0.0000000
5 ILMN_1651209 8.072172 0.0000000
6 ILMN_1651217 7.842801 0.3543759
[[5]]
ID sample5 PVALUE5
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 10.223217 0.0000000
3 ILMN_1343293 14.959574 0.0000000
4 ILMN_1343294 15.362353 0.0000000
5 ILMN_1651209 8.123367 0.0000000
6 ILMN_1651217 7.800558 0.6465517
[[6]]
ID sample6 PVALUE6
1 ILMN_1343291 15.869485 0.000000000
2 ILMN_1343292 11.389204 0.000000000
3 ILMN_1343293 15.383733 0.000000000
4 ILMN_1343294 15.659235 0.000000000
5 ILMN_1651209 8.057380 0.001445087
6 ILMN_1651217 7.819251 0.539017300
df2:
structure(list(ID = c("ILMN_1343291", "ILMN_1343292", "ILMN_1343293",
"ILMN_1343294", "ILMN_1651209", "ILMN_1651217"), sample1 = c(15.8694849642673,
11.012283668738, 14.995174021271, 15.5837972369044, 8.08964880277197,
7.83406791970529), sample2 = c(15.8286939941996, 10.8407625194439,
14.8818391783215, 15.5951964773238, 8.07397790649028, 7.85669590957356
), sample3 = c(15.8694849642673, 11.453825129904, 14.764465720783,
15.6917233494908, 8.12884795713027, 7.86320578481668), sample4 = c(15.8694849642673,
13.1291568945281, 14.8250076629948, 15.7193468523012, 8.07217210194779,
7.84280084254162), sample5 = c(15.8694849642673, 10.223217362866,
14.9595743905588, 15.362353159395, 8.12336672246356, 7.80055774746275
), sample6 = c(15.8694849642673, 11.3892037666505, 15.3837334429764,
15.65923530912, 8.05738048722836, 7.81925084187836)), .Names = c("ID",
"sample1", "sample2", "sample3", "sample4", "sample5", "sample6"
), class = "data.frame", row.names = c(NA, 6L))
structure(list(ID = c("ILMN_1343291", "ILMN_1343292", "ILMN_1343293",
"ILMN_1343294", "ILMN_1651209", "ILMN_1651217"), PVALUE1 = c(0,
0, 0, 0, 0, 0.4675325), PVALUE2 = c(0, 0, 0, 0, 0, 0.3290415),
PVALUE3 = c(0, 0, 0, 0, 0, 0.3074713), PVALUE4 = c(0, 0,
0, 0, 0, 0.3543759), PVALUE5 = c(0, 0, 0, 0, 0, 0.6465517
), PVALUE6 = c(0, 0, 0, 0, 0.001445087, 0.5390173)), .Names = c("ID",
"PVALUE1", "PVALUE2", "PVALUE3", "PVALUE4", "PVALUE5", "PVALUE6"
), class = "data.frame", row.names = c(NA, 6L))
[[1]]
ID sample1 PVALUE1
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 11.012284 0.0000000
3 ILMN_1343293 14.995174 0.0000000
4 ILMN_1343294 15.583797 0.0000000
5 ILMN_1651209 8.089649 0.0000000
6 ILMN_1651217 7.834068 0.4675325
[[2]]
ID sample2 PVALUE2
1 ILMN_1343291 15.828694 0.0000000
2 ILMN_1343292 10.840763 0.0000000
3 ILMN_1343293 14.881839 0.0000000
4 ILMN_1343294 15.595196 0.0000000
5 ILMN_1651209 8.073978 0.0000000
6 ILMN_1651217 7.856696 0.3290415
[[3]]
ID sample3 PVALUE3
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 11.453825 0.0000000
3 ILMN_1343293 14.764466 0.0000000
4 ILMN_1343294 15.691723 0.0000000
5 ILMN_1651209 8.128848 0.0000000
6 ILMN_1651217 7.863206 0.3074713
[[4]]
ID sample4 PVALUE4
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 13.129157 0.0000000
3 ILMN_1343293 14.825008 0.0000000
4 ILMN_1343294 15.719347 0.0000000
5 ILMN_1651209 8.072172 0.0000000
6 ILMN_1651217 7.842801 0.3543759
[[5]]
ID sample5 PVALUE5
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 10.223217 0.0000000
3 ILMN_1343293 14.959574 0.0000000
4 ILMN_1343294 15.362353 0.0000000
5 ILMN_1651209 8.123367 0.0000000
6 ILMN_1651217 7.800558 0.6465517
[[6]]
ID sample6 PVALUE6
1 ILMN_1343291 15.869485 0.000000000
2 ILMN_1343292 11.389204 0.000000000
3 ILMN_1343293 15.383733 0.000000000
4 ILMN_1343294 15.659235 0.000000000
5 ILMN_1651209 8.057380 0.001445087
6 ILMN_1651217 7.819251 0.539017300
所需输出:
structure(list(ID = c("ILMN_1343291", "ILMN_1343292", "ILMN_1343293",
"ILMN_1343294", "ILMN_1651209", "ILMN_1651217"), sample1 = c(15.8694849642673,
11.012283668738, 14.995174021271, 15.5837972369044, 8.08964880277197,
7.83406791970529), sample2 = c(15.8286939941996, 10.8407625194439,
14.8818391783215, 15.5951964773238, 8.07397790649028, 7.85669590957356
), sample3 = c(15.8694849642673, 11.453825129904, 14.764465720783,
15.6917233494908, 8.12884795713027, 7.86320578481668), sample4 = c(15.8694849642673,
13.1291568945281, 14.8250076629948, 15.7193468523012, 8.07217210194779,
7.84280084254162), sample5 = c(15.8694849642673, 10.223217362866,
14.9595743905588, 15.362353159395, 8.12336672246356, 7.80055774746275
), sample6 = c(15.8694849642673, 11.3892037666505, 15.3837334429764,
15.65923530912, 8.05738048722836, 7.81925084187836)), .Names = c("ID",
"sample1", "sample2", "sample3", "sample4", "sample5", "sample6"
), class = "data.frame", row.names = c(NA, 6L))
structure(list(ID = c("ILMN_1343291", "ILMN_1343292", "ILMN_1343293",
"ILMN_1343294", "ILMN_1651209", "ILMN_1651217"), PVALUE1 = c(0,
0, 0, 0, 0, 0.4675325), PVALUE2 = c(0, 0, 0, 0, 0, 0.3290415),
PVALUE3 = c(0, 0, 0, 0, 0, 0.3074713), PVALUE4 = c(0, 0,
0, 0, 0, 0.3543759), PVALUE5 = c(0, 0, 0, 0, 0, 0.6465517
), PVALUE6 = c(0, 0, 0, 0, 0.001445087, 0.5390173)), .Names = c("ID",
"PVALUE1", "PVALUE2", "PVALUE3", "PVALUE4", "PVALUE5", "PVALUE6"
), class = "data.frame", row.names = c(NA, 6L))
[[1]]
ID sample1 PVALUE1
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 11.012284 0.0000000
3 ILMN_1343293 14.995174 0.0000000
4 ILMN_1343294 15.583797 0.0000000
5 ILMN_1651209 8.089649 0.0000000
6 ILMN_1651217 7.834068 0.4675325
[[2]]
ID sample2 PVALUE2
1 ILMN_1343291 15.828694 0.0000000
2 ILMN_1343292 10.840763 0.0000000
3 ILMN_1343293 14.881839 0.0000000
4 ILMN_1343294 15.595196 0.0000000
5 ILMN_1651209 8.073978 0.0000000
6 ILMN_1651217 7.856696 0.3290415
[[3]]
ID sample3 PVALUE3
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 11.453825 0.0000000
3 ILMN_1343293 14.764466 0.0000000
4 ILMN_1343294 15.691723 0.0000000
5 ILMN_1651209 8.128848 0.0000000
6 ILMN_1651217 7.863206 0.3074713
[[4]]
ID sample4 PVALUE4
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 13.129157 0.0000000
3 ILMN_1343293 14.825008 0.0000000
4 ILMN_1343294 15.719347 0.0000000
5 ILMN_1651209 8.072172 0.0000000
6 ILMN_1651217 7.842801 0.3543759
[[5]]
ID sample5 PVALUE5
1 ILMN_1343291 15.869485 0.0000000
2 ILMN_1343292 10.223217 0.0000000
3 ILMN_1343293 14.959574 0.0000000
4 ILMN_1343294 15.362353 0.0000000
5 ILMN_1651209 8.123367 0.0000000
6 ILMN_1651217 7.800558 0.6465517
[[6]]
ID sample6 PVALUE6
1 ILMN_1343291 15.869485 0.000000000
2 ILMN_1343292 11.389204 0.000000000
3 ILMN_1343293 15.383733 0.000000000
4 ILMN_1343294 15.659235 0.000000000
5 ILMN_1651209 8.057380 0.001445087
6 ILMN_1651217 7.819251 0.539017300
我想合并df1
和df2
的不同列,然后创建所有数据帧的列表。我想通过ID
合并df1
和df2
的第二列,这是数据帧中的第一列并存储在列表中,然后合并df1
和df2
存储在列表中的第三列,依此类推,直到所有列都完成。我知道我可以使用merge()
来合并两个数据帧,并使用list()
来存储。在这里,我需要在循环中这样做。如何才能做到这一点与最低限度的代码,否则我需要写一个漫长的代码来实现这一切。谢谢 这里有一个尝试
Res <- Map(function(x, y) cbind(df1[1], x, y), df1[-1], df2[-1])
tempnames <- rbind.data.frame(names(df1)[-1], names(df2)[-1])
Map(function(x, y) `names<-`(x, c("ID", as.character(y))), Res, tempnames)
# $sample1
# ID sample1 PVALUE1
# 1 ILMN_1343291 15.869485 0.0000000
# 2 ILMN_1343292 11.012284 0.0000000
# 3 ILMN_1343293 14.995174 0.0000000
# 4 ILMN_1343294 15.583797 0.0000000
# 5 ILMN_1651209 8.089649 0.0000000
# 6 ILMN_1651217 7.834068 0.4675325
#
# $sample2
# ID sample2 PVALUE2
# 1 ILMN_1343291 15.828694 0.0000000
# 2 ILMN_1343292 10.840763 0.0000000
# 3 ILMN_1343293 14.881839 0.0000000
# 4 ILMN_1343294 15.595196 0.0000000
# 5 ILMN_1651209 8.073978 0.0000000
# 6 ILMN_1651217 7.856696 0.3290415
...
Res您想合并
还是cbind
?换句话说,这两个数据集是否每行都有相同的ID
s?是的,这里两个数据集都有相同的ID
sWillMap(data.frame,Map(cbind,df1[-1],df2[-1]),ID=df1[1])
或Map(data.frame,ID=df1[1],Map(cbind,df1[-1],df2[-1]))
为您工作?似乎很酷,除了删除原始列名并最后而不是首先分配列ID
。需要保留原始列名和列ID
,谢谢您的回答。我可以知道它是如何工作的吗?如果ID
不一致,我可以使用Map()
合并吗?如果我有很多文件,并且创建所有文件的列表,那么merge
可能会更复杂。上面的案例考虑了一个文件。@ AgHaZuSain我不明白你的意思。上面提到的数据帧<代码> DF1和<代码> DF2,这是在组合创建多个文件的列表之后。现在有一个例子,我有很多文件,比如df1
和df2
,我想创建一个所有文件的列表Map()
创建一个包含两个文件的列表,但我担心是否可以使用list[[length(list)+1]]
来存储文件。