R 使用unite和map整理评估,以便取消嵌套列表数据集的测试
我正在尝试卸载我用pivot_生成的数据集, 其中有多个列需要取消列表。 在完整数据集上,unnest函数不起作用(我得到一个错误:>错误:不兼容的长度:3,2。) 所以我尝试了一个解决办法 数据集的一部分:R 使用unite和map整理评估,以便取消嵌套列表数据集的测试,r,tidyverse,tidyr,R,Tidyverse,Tidyr,我正在尝试卸载我用pivot_生成的数据集, 其中有多个列需要取消列表。 在完整数据集上,unnest函数不起作用(我得到一个错误:>错误:不兼容的长度:3,2。) 所以我尝试了一个解决办法 数据集的一部分: my_data <- structure(list(RNAcentral_id = c("URS000000C731", "URS000000C731", "URS000000C731", "URS00000
my_data <- structure(list(RNAcentral_id = c("URS000000C731", "URS000000C731",
"URS000000C731", "URS000000C731", "URS000001F3AA", "URS000001F3AA",
"URS000001F3AA", "URS000001F3AA", "URS000001F3AA", "URS000001F3AA",
"URS000001F3AA", "URS000001F3AA", "URS000001F3AA", "URS000001F3AA",
"URS0000023ED8", "URS0000023ED8", "URS0000023ED8", "URS0000023ED8",
"URS0000023ED8", "URS0000023ED8", "URS0000023ED8", "URS0000023ED8",
"URS0000023ED8", "URS0000023ED8", "URS0000023ED8", "URS0000023ED8",
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72",
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72",
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72",
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72",
"URS00000527A6", "URS00000527A6", "URS00000527A6", "URS00000527A6",
"URS00000527A6", "URS00000527A6", "URS00000527A6", "URS00000527A6",
"URS00000527A6", "URS000007CAC8", "URS000007CAC8", "URS000007CAC8",
"URS000007CAC8", "URS000007CAC8", "URS000007DA54", "URS000007DA54",
"URS000007DA54", "URS000007DA54", "URS000007DA54", "URS000007DA54",
"URS000007DA54", "URS000007DA54", "URS000007F1D7", "URS000007F1D7",
"URS000007F1D7", "URS000007F1D7", "URS000007F1D7", "URS000007F1D7",
"URS000007F1D7", "URS000007F1D7", "URS000007F1D7", "URS000007F1D7",
"URS0000088F47", "URS0000088F47", "URS0000088F47", "URS0000088F47",
"URS0000088F47", "URS0000088F47", "URS0000088F47", "URS00000B589B",
"URS00000B589B", "URS00000B589B", "URS00000B589B", "URS00000B589B",
"URS00000B589B", "URS00000B589B"), Database = c("ENSEMBL", "ENSEMBL",
"ENSEMBL", "GENCODE", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", "LNCIPEDIA",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENECARDS", "LNCBOOK",
"LNCIPEDIA", "NONCODE", "NONCODE", "NONCODE", "NONCODE", "NONCODE",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL",
"ENSEMBL", "GENCODE", "LNCBOOK", "NONCODE", "NONCODE", "NONCODE",
"NONCODE", "NONCODE", "NONCODE", "NONCODE", "ENSEMBL", "ENSEMBL",
"ENSEMBL", "GENCODE", "GENECARDS", "GENECARDS", "LNCBOOK", "LNCIPEDIA",
"NONCODE", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", "NONCODE",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE",
"LNCBOOK", "NONCODE", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL",
"GENCODE", "LNCBOOK", "NONCODE", "NONCODE", "NONCODE", "NONCODE",
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", "GENECARDS",
"LNCIPEDIA", "ENA", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL",
"ENSEMBL", "GENCODE"), RNA_type = c("lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA",
"lncRNA", "snoRNA", "snoRNA", "snoRNA", "snoRNA", "snoRNA", "snoRNA",
"snoRNA"), gene_name = c("ENSG00000250666.1", "ENSG00000281830.1",
"ENSG00000281377.1", "LINC01596", "ENSG00000242086.8", "ENSG00000280512.2",
"ENSG00000281603.2", "ENSG00000281060.2", "ENSG00000281794.2",
"ENSG00000281915.2", "ENSG00000280993.2", "ENSG00000282953.1",
"MUC20-OT1", "lnc-MUC20-67", "ENSG00000235273.1", "ENSG00000233950.1",
"ENSG00000230089.1", "ENSG00000225188.1", "LOC101929006", "HSALNG0049045",
"lnc-OR14J1-2", "NONHSAG043350.2", "NONHSAG045640.2", "NONHSAG045830.2",
"NONHSAG046018.2", "NONHSAG046538.2", "ENSG00000231860.1", "ENSG00000224328.1",
"ENSG00000236766.1", "ENSG00000224508.1", "ENSG00000236522.1",
"ENSG00000229681.1", "ENSG00000233883.1", "MDC1-AS1", "HSALNG0049184",
"NONHSAG043427.2", "NONHSAG045580.2", "NONHSAG045701.2", "NONHSAG045891.2",
"NONHSAG046074.2", "NONHSAG046228.2", "NONHSAG046589.2", "ENSG00000249981.1",
"ENSG00000276297.1", "ENSG00000280619.1", "AC145141.1", "LOC107987420",
"LOC107987434", "HSALNG0042531", "lnc-BDP1-1", "NONHSAG040656.2",
"ENSG00000242086.8", "ENSG00000280512.2", "ENSG00000281794.2",
"MUC20-OT1", "NONHSAG037073.2", "ENSG00000242086.8", "ENSG00000280512.2",
"ENSG00000281794.2", "ENSG00000281060.2", "ENSG00000282953.1",
"MUC20-OT1", "HSALNG0031832", "NONHSAG037073.2", "ENSG00000224835.1",
"ENSG00000227198.1", "ENSG00000233169.1", "ENSG00000225390.1",
"C6orf47-AS1", "HSALNG0049305", "NONHSAG043504.2", "NONHSAG046125.2",
"NONHSAG046270.2", "NONHSAG046461.2", "ENSG00000272566.1", "ENSG00000280590.1",
"ENSG00000280853.1", "ENSG00000281916.1", "AF250324.1", "ENSG00000272566",
"lnc-FRG2-13", "ACA38 snoRNA", "ENSG00000200816.1", "ENSG00000266847.1",
"ENSG00000263994.1", "ENSG00000264153.1", "ENSG00000263879.1",
"SNORA38")), row.names = c(NA, -88L), class = c("tbl_df", "tbl",
"data.frame"), spec = structure(list(cols = list(RNAcentral_id = structure(list(), class = c("collector_character",
"collector")), Database = structure(list(), class = c("collector_character",
"collector")), external_id = structure(list(), class = c("collector_character",
"collector")), NCBI_taxon_id = structure(list(), class = c("collector_double",
"collector")), RNA_type = structure(list(), class = c("collector_character",
"collector")), gene_name = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = "\t"), class = "col_spec"))
我的解决方法尝试:
mynested_data <- my_data %>%
pivot_wider(names_from = Database, values_from = c(gene_name))
c("ENSEMBL", "GENCODE", "NONCODE", "ENA", "GENECARDS", "LNCBOOK",
"LNCIPEDIA") %>%
set_names(.) %>%
map(~ mynested_data %>%
unnest_wider(.x, names_sep = "_") %>%
unite(col = !!.x, vars(starts_with(!!quo(.x))), sep = ";"))
Error: Must subset columns with a valid subscript vector.
x Subscript has the wrong type `quosures`.
\u2139 It must be numeric or character.
Run `rlang::last_error()` to see where the error occurred.
myu嵌套数据%
pivot_更宽(名称来自=数据库,值来自=c(基因名称))
c(“ENSEMBL”、“GENCODE”、“NONCODE”、“ENA”、“GENECARDS”、“LNCBOOK”,
“保密媒体”)%%>%
设置_名称(%)%%>%
映射(~mynested_data%>%
unnest_加宽(.x,名称_sep=“”)%%>%
unite(col=!!.x,vars(以(!!quo(.x))开头),sep=“;”)
错误:必须使用有效的下标向量子集列。
x下标的类型“quosures”错误。
\u2139必须是数字或字符。
运行`rlang::last_error()`查看错误发生的位置。
在unite中,我还尝试使用col=.x
或col=!!quo(.x)
但我得到了相同的错误
Edit1我期望得到的结果
我这样做是为了得到一个每行(条目)有一个RNAU id的TIBLE,列表“columns”由多个条目组成的字符串用分隔符“;”连接起来。
ENSEMBL一列、GENCODE一列等
我们可以在这里直接使用
pivot\u wide
:
tidyr::pivot_wider(my_data, names_from = Database,
values_from = gene_name, values_fn = toString)
或在带有dcast的数据表中:
library(data.table)
dcast(setDT(my_data), RNA_type + RNAcentral_id~ Database,
value.var = 'gene_name', fun.aggregate = toString)
对不起,我没有说结果。我需要每个RNAcentral_id有一行,这就是为什么我需要它更广泛地使用unnest_,以便在合并到生成的列后使用,并重新创建起始列。@KGeles Ohh..我明白了。所以,也许你需要更广泛的(我的数据,名字来自数据库,值来自基因,值来自于字符串)
?天哪,这是什么魔法:P。就是这样。。。。你能把它也包括在你的答案里吗。作为编辑?当然。更新了答案。
library(data.table)
dcast(setDT(my_data), RNA_type + RNAcentral_id~ Database,
value.var = 'gene_name', fun.aggregate = toString)