Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/64.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 查找数据对之间的公共共享值_R - Fatal编程技术网

R 查找数据对之间的公共共享值

R 查找数据对之间的公共共享值,r,R,我是编程新手,刚刚开始学习R,因此我请求大家不要介意我的无知。 我目前正在处理的数据如下所示: 我有以下格式的数据。 例如: 疾病基因符号 疾病AFOXJ1 疾病BMYB 疾病BGATA4 疾病CMYB 疾病DGATA4 大约有250个这样的条目。 我希望看到以下格式的数据: 疾病1常见共享基因符号疾病2 疾病AMYB,FOXJ1疾病B 疾病CMYB疾病B 疾病BGATA4疾病D 我的做法是:我将流程分为3个步骤: 步骤1:将疾病成对组合 第2步:找到与每种疾病相关的基因符号,并将其分配给载体

我是编程新手,刚刚开始学习R,因此我请求大家不要介意我的无知。 我目前正在处理的数据如下所示:

我有以下格式的数据。 例如: 疾病基因符号
疾病AFOXJ1
疾病BMYB
疾病BGATA4
疾病CMYB
疾病DGATA4

大约有250个这样的条目。 我希望看到以下格式的数据:

疾病1常见共享基因符号疾病2

疾病AMYB,FOXJ1疾病B

疾病CMYB疾病B

疾病BGATA4疾病D

我的做法是:我将流程分为3个步骤:

步骤1:将疾病成对组合

第2步:找到与每种疾病相关的基因符号,并将其分配给载体

步骤3:现在对这些创建的载体使用intersect(%n%)函数来查找共享的基因符号

我相信一定有比这更简单的事情

任何帮助都将不胜感激! 多谢各位

问候,,
S

使用
combinat
包的解决方案是:

library(combinat)

#random data
DF <- data.frame(Disease = LETTERS[1:10], Gene = sample(letters[1:4], 10, T))

#> DF
#   Disease Gene
#1        A    a
#2        B    a
#3        C    c
#4        D    b
#5        E    d
#6        F    b
#7        G    c
#8        H    d
#9        I    b
#10       J    d

#all possible combinations of diseases
dis_combns <- combn(DF$Disease, 2)  #see `?combn`

#find common genes between each pair of diseases
commons <- apply(dis_combns, 2, 
       function(x) union(DF$Gene[DF$Disease == x[1]], DF$Gene[DF$Disease == x[2]])) 
#format the list of common genes for easier manipulation later
commons <- unlist(lapply(commons, paste, collapse = " and "))

#result
resultDF <- data.frame(Disease1 = dis_combns[1,], 
                     Common_genes = commons, Disease2 = dis_combns[2,])

#> resultDF
#   Disease1 Common_genes Disease2
#1         A            a        B
#2         A      a and c        C
#3         A      a and b        D
#4         A      a and d        E
#5         A      a and b        F
#6         A      a and c        G
#7         A      a and d        H
#8         A      a and b        I
#9         A      a and d        J
#10        B      a and c        C
#11        B      a and b        D
#12        B      a and d        E
#13        B      a and b        F
#14        B      a and c        G
#....
库(combinat)
#随机数据
DF-DF
#疾病基因
#A
#2 B a
#3C
#4 D b
#5 E d
#6 F b
#7克c
#8hD
#9 I b
#10 J d
#所有可能的疾病组合

dis_combns使用
combinat
包的解决方案是:

library(combinat)

#random data
DF <- data.frame(Disease = LETTERS[1:10], Gene = sample(letters[1:4], 10, T))

#> DF
#   Disease Gene
#1        A    a
#2        B    a
#3        C    c
#4        D    b
#5        E    d
#6        F    b
#7        G    c
#8        H    d
#9        I    b
#10       J    d

#all possible combinations of diseases
dis_combns <- combn(DF$Disease, 2)  #see `?combn`

#find common genes between each pair of diseases
commons <- apply(dis_combns, 2, 
       function(x) union(DF$Gene[DF$Disease == x[1]], DF$Gene[DF$Disease == x[2]])) 
#format the list of common genes for easier manipulation later
commons <- unlist(lapply(commons, paste, collapse = " and "))

#result
resultDF <- data.frame(Disease1 = dis_combns[1,], 
                     Common_genes = commons, Disease2 = dis_combns[2,])

#> resultDF
#   Disease1 Common_genes Disease2
#1         A            a        B
#2         A      a and c        C
#3         A      a and b        D
#4         A      a and d        E
#5         A      a and b        F
#6         A      a and c        G
#7         A      a and d        H
#8         A      a and b        I
#9         A      a and d        J
#10        B      a and c        C
#11        B      a and b        D
#12        B      a and d        E
#13        B      a and b        F
#14        B      a and c        G
#....
库(combinat)
#随机数据
DF-DF
#疾病基因
#A
#2 B a
#3C
#4 D b
#5 E d
#6 F b
#7克c
#8hD
#9 I b
#10 J d
#所有可能的疾病组合

dis_combns使用
combinat
包的解决方案是:

library(combinat)

#random data
DF <- data.frame(Disease = LETTERS[1:10], Gene = sample(letters[1:4], 10, T))

#> DF
#   Disease Gene
#1        A    a
#2        B    a
#3        C    c
#4        D    b
#5        E    d
#6        F    b
#7        G    c
#8        H    d
#9        I    b
#10       J    d

#all possible combinations of diseases
dis_combns <- combn(DF$Disease, 2)  #see `?combn`

#find common genes between each pair of diseases
commons <- apply(dis_combns, 2, 
       function(x) union(DF$Gene[DF$Disease == x[1]], DF$Gene[DF$Disease == x[2]])) 
#format the list of common genes for easier manipulation later
commons <- unlist(lapply(commons, paste, collapse = " and "))

#result
resultDF <- data.frame(Disease1 = dis_combns[1,], 
                     Common_genes = commons, Disease2 = dis_combns[2,])

#> resultDF
#   Disease1 Common_genes Disease2
#1         A            a        B
#2         A      a and c        C
#3         A      a and b        D
#4         A      a and d        E
#5         A      a and b        F
#6         A      a and c        G
#7         A      a and d        H
#8         A      a and b        I
#9         A      a and d        J
#10        B      a and c        C
#11        B      a and b        D
#12        B      a and d        E
#13        B      a and b        F
#14        B      a and c        G
#....
库(combinat)
#随机数据
DF-DF
#疾病基因
#A
#2 B a
#3C
#4 D b
#5 E d
#6 F b
#7克c
#8hD
#9 I b
#10 J d
#所有可能的疾病组合

dis_combns使用
combinat
包的解决方案是:

library(combinat)

#random data
DF <- data.frame(Disease = LETTERS[1:10], Gene = sample(letters[1:4], 10, T))

#> DF
#   Disease Gene
#1        A    a
#2        B    a
#3        C    c
#4        D    b
#5        E    d
#6        F    b
#7        G    c
#8        H    d
#9        I    b
#10       J    d

#all possible combinations of diseases
dis_combns <- combn(DF$Disease, 2)  #see `?combn`

#find common genes between each pair of diseases
commons <- apply(dis_combns, 2, 
       function(x) union(DF$Gene[DF$Disease == x[1]], DF$Gene[DF$Disease == x[2]])) 
#format the list of common genes for easier manipulation later
commons <- unlist(lapply(commons, paste, collapse = " and "))

#result
resultDF <- data.frame(Disease1 = dis_combns[1,], 
                     Common_genes = commons, Disease2 = dis_combns[2,])

#> resultDF
#   Disease1 Common_genes Disease2
#1         A            a        B
#2         A      a and c        C
#3         A      a and b        D
#4         A      a and d        E
#5         A      a and b        F
#6         A      a and c        G
#7         A      a and d        H
#8         A      a and b        I
#9         A      a and d        J
#10        B      a and c        C
#11        B      a and b        D
#12        B      a and d        E
#13        B      a and b        F
#14        B      a and c        G
#....
库(combinat)
#随机数据
DF-DF
#疾病基因
#A
#2 B a
#3C
#4 D b
#5 E d
#6 F b
#7克c
#8hD
#9 I b
#10 J d
#所有可能的疾病组合

dis_combns欢迎使用堆栈溢出。请试着用一个可复制的例子来说明你的情况。您可以阅读欢迎使用堆栈溢出。请试着用一个可复制的例子来说明你的情况。您可以阅读欢迎使用堆栈溢出。请试着用一个可复制的例子来说明你的情况。您可以阅读欢迎使用堆栈溢出。请试着用一个可复制的例子来说明你的情况。你可以阅读,非常感谢!那帮了大忙!非常感谢你!那帮了大忙!非常感谢你!那帮了大忙!非常感谢你!那帮了大忙!