Warning: file_get_contents(/data/phpspider/zhask/data//catemap/5/tfs/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 使用unite和map整理评估,以便取消嵌套列表数据集的测试_R_Tidyverse_Tidyr - Fatal编程技术网

R 使用unite和map整理评估,以便取消嵌套列表数据集的测试

R 使用unite和map整理评估,以便取消嵌套列表数据集的测试,r,tidyverse,tidyr,R,Tidyverse,Tidyr,我正在尝试卸载我用pivot_生成的数据集, 其中有多个列需要取消列表。 在完整数据集上,unnest函数不起作用(我得到一个错误:>错误:不兼容的长度:3,2。) 所以我尝试了一个解决办法 数据集的一部分: my_data <- structure(list(RNAcentral_id = c("URS000000C731", "URS000000C731", "URS000000C731", "URS00000

我正在尝试卸载我用pivot_生成的数据集, 其中有多个列需要取消列表。 在完整数据集上,unnest函数不起作用(我得到一个错误:>错误:不兼容的长度:3,2。) 所以我尝试了一个解决办法 数据集的一部分:

 my_data <-  structure(list(RNAcentral_id = c("URS000000C731", "URS000000C731", 
"URS000000C731", "URS000000C731", "URS000001F3AA", "URS000001F3AA", 
"URS000001F3AA", "URS000001F3AA", "URS000001F3AA", "URS000001F3AA", 
"URS000001F3AA", "URS000001F3AA", "URS000001F3AA", "URS000001F3AA", 
"URS0000023ED8", "URS0000023ED8", "URS0000023ED8", "URS0000023ED8", 
"URS0000023ED8", "URS0000023ED8", "URS0000023ED8", "URS0000023ED8", 
"URS0000023ED8", "URS0000023ED8", "URS0000023ED8", "URS0000023ED8", 
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72", 
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72", 
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72", 
"URS0000050C72", "URS0000050C72", "URS0000050C72", "URS0000050C72", 
"URS00000527A6", "URS00000527A6", "URS00000527A6", "URS00000527A6", 
"URS00000527A6", "URS00000527A6", "URS00000527A6", "URS00000527A6", 
"URS00000527A6", "URS000007CAC8", "URS000007CAC8", "URS000007CAC8", 
"URS000007CAC8", "URS000007CAC8", "URS000007DA54", "URS000007DA54", 
"URS000007DA54", "URS000007DA54", "URS000007DA54", "URS000007DA54", 
"URS000007DA54", "URS000007DA54", "URS000007F1D7", "URS000007F1D7", 
"URS000007F1D7", "URS000007F1D7", "URS000007F1D7", "URS000007F1D7", 
"URS000007F1D7", "URS000007F1D7", "URS000007F1D7", "URS000007F1D7", 
"URS0000088F47", "URS0000088F47", "URS0000088F47", "URS0000088F47", 
"URS0000088F47", "URS0000088F47", "URS0000088F47", "URS00000B589B", 
"URS00000B589B", "URS00000B589B", "URS00000B589B", "URS00000B589B", 
"URS00000B589B", "URS00000B589B"), Database = c("ENSEMBL", "ENSEMBL", 
"ENSEMBL", "GENCODE", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", 
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", "LNCIPEDIA", 
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENECARDS", "LNCBOOK", 
"LNCIPEDIA", "NONCODE", "NONCODE", "NONCODE", "NONCODE", "NONCODE", 
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", 
"ENSEMBL", "GENCODE", "LNCBOOK", "NONCODE", "NONCODE", "NONCODE", 
"NONCODE", "NONCODE", "NONCODE", "NONCODE", "ENSEMBL", "ENSEMBL", 
"ENSEMBL", "GENCODE", "GENECARDS", "GENECARDS", "LNCBOOK", "LNCIPEDIA", 
"NONCODE", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", "NONCODE", 
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", 
"LNCBOOK", "NONCODE", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", 
"GENCODE", "LNCBOOK", "NONCODE", "NONCODE", "NONCODE", "NONCODE", 
"ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", "GENCODE", "GENECARDS", 
"LNCIPEDIA", "ENA", "ENSEMBL", "ENSEMBL", "ENSEMBL", "ENSEMBL", 
"ENSEMBL", "GENCODE"), RNA_type = c("lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", "lncRNA", 
"lncRNA", "snoRNA", "snoRNA", "snoRNA", "snoRNA", "snoRNA", "snoRNA", 
"snoRNA"), gene_name = c("ENSG00000250666.1", "ENSG00000281830.1", 
"ENSG00000281377.1", "LINC01596", "ENSG00000242086.8", "ENSG00000280512.2", 
"ENSG00000281603.2", "ENSG00000281060.2", "ENSG00000281794.2", 
"ENSG00000281915.2", "ENSG00000280993.2", "ENSG00000282953.1", 
"MUC20-OT1", "lnc-MUC20-67", "ENSG00000235273.1", "ENSG00000233950.1", 
"ENSG00000230089.1", "ENSG00000225188.1", "LOC101929006", "HSALNG0049045", 
"lnc-OR14J1-2", "NONHSAG043350.2", "NONHSAG045640.2", "NONHSAG045830.2", 
"NONHSAG046018.2", "NONHSAG046538.2", "ENSG00000231860.1", "ENSG00000224328.1", 
"ENSG00000236766.1", "ENSG00000224508.1", "ENSG00000236522.1", 
"ENSG00000229681.1", "ENSG00000233883.1", "MDC1-AS1", "HSALNG0049184", 
"NONHSAG043427.2", "NONHSAG045580.2", "NONHSAG045701.2", "NONHSAG045891.2", 
"NONHSAG046074.2", "NONHSAG046228.2", "NONHSAG046589.2", "ENSG00000249981.1", 
"ENSG00000276297.1", "ENSG00000280619.1", "AC145141.1", "LOC107987420", 
"LOC107987434", "HSALNG0042531", "lnc-BDP1-1", "NONHSAG040656.2", 
"ENSG00000242086.8", "ENSG00000280512.2", "ENSG00000281794.2", 
"MUC20-OT1", "NONHSAG037073.2", "ENSG00000242086.8", "ENSG00000280512.2", 
"ENSG00000281794.2", "ENSG00000281060.2", "ENSG00000282953.1", 
"MUC20-OT1", "HSALNG0031832", "NONHSAG037073.2", "ENSG00000224835.1", 
"ENSG00000227198.1", "ENSG00000233169.1", "ENSG00000225390.1", 
"C6orf47-AS1", "HSALNG0049305", "NONHSAG043504.2", "NONHSAG046125.2", 
"NONHSAG046270.2", "NONHSAG046461.2", "ENSG00000272566.1", "ENSG00000280590.1", 
"ENSG00000280853.1", "ENSG00000281916.1", "AF250324.1", "ENSG00000272566", 
"lnc-FRG2-13", "ACA38 snoRNA", "ENSG00000200816.1", "ENSG00000266847.1", 
"ENSG00000263994.1", "ENSG00000264153.1", "ENSG00000263879.1", 
"SNORA38")), row.names = c(NA, -88L), class = c("tbl_df", "tbl", 
"data.frame"), spec = structure(list(cols = list(RNAcentral_id = structure(list(), class = c("collector_character", 
"collector")), Database = structure(list(), class = c("collector_character", 
"collector")), external_id = structure(list(), class = c("collector_character", 
"collector")), NCBI_taxon_id = structure(list(), class = c("collector_double", 
"collector")), RNA_type = structure(list(), class = c("collector_character", 
"collector")), gene_name = structure(list(), class = c("collector_character", 
"collector"))), default = structure(list(), class = c("collector_guess", 
"collector")), delim = "\t"), class = "col_spec"))
我的解决方法尝试:

mynested_data <- my_data %>% 
         pivot_wider(names_from = Database, values_from = c(gene_name))

c("ENSEMBL", "GENCODE", "NONCODE", "ENA", "GENECARDS", "LNCBOOK", 
  "LNCIPEDIA") %>% 
   set_names(.) %>% 
   map(~ mynested_data %>%  
         unnest_wider(.x, names_sep = "_") %>%
         unite(col = !!.x, vars(starts_with(!!quo(.x))), sep = ";"))

Error: Must subset columns with a valid subscript vector.
x Subscript has the wrong type `quosures`.
\u2139 It must be numeric or character.
Run `rlang::last_error()` to see where the error occurred.
myu嵌套数据%
pivot_更宽(名称来自=数据库,值来自=c(基因名称))
c(“ENSEMBL”、“GENCODE”、“NONCODE”、“ENA”、“GENECARDS”、“LNCBOOK”,
“保密媒体”)%%>%
设置_名称(%)%%>%
映射(~mynested_data%>%
unnest_加宽(.x,名称_sep=“”)%%>%
unite(col=!!.x,vars(以(!!quo(.x))开头),sep=“;”)
错误:必须使用有效的下标向量子集列。
x下标的类型“quosures”错误。
\u2139必须是数字或字符。
运行`rlang::last_error()`查看错误发生的位置。
在unite中,我还尝试使用
col=.x
col=!!quo(.x)
但我得到了相同的错误

Edit1我期望得到的结果 我这样做是为了得到一个每行(条目)有一个RNAU id的TIBLE,列表“columns”由多个条目组成的字符串用分隔符“;”连接起来。 ENSEMBL一列、GENCODE一列等


我们可以在这里直接使用
pivot\u wide

tidyr::pivot_wider(my_data, names_from = Database, 
                    values_from = gene_name, values_fn = toString)
或在带有dcast的
数据表中:

library(data.table)
dcast(setDT(my_data), RNA_type + RNAcentral_id~ Database, 
      value.var = 'gene_name', fun.aggregate = toString)

对不起,我没有说结果。我需要每个RNAcentral_id有一行,这就是为什么我需要它更广泛地使用unnest_,以便在合并到生成的列后使用,并重新创建起始列。@KGeles Ohh..我明白了。所以,也许你需要更广泛的(我的数据,名字来自数据库,值来自基因,值来自于字符串)
?天哪,这是什么魔法:P。就是这样。。。。你能把它也包括在你的答案里吗。作为编辑?当然。更新了答案。
library(data.table)
dcast(setDT(my_data), RNA_type + RNAcentral_id~ Database, 
      value.var = 'gene_name', fun.aggregate = toString)