R 基于另一列的列值
我有一些关于这种结构的数据:R 基于另一列的列值,r,R,我有一些关于这种结构的数据: ## Column examples generation bases <- c("A", "T", "C", "G") ID <- c(1,2,3,4,5,6) SNP <- rep (c("F1", "F3", "F4"), each=length(ID)) Al_1 <- sample(bases, length(SNP), replace=T) Al_2 <- sample(bases, length(SNP), replace
## Column examples generation
bases <- c("A", "T", "C", "G")
ID <- c(1,2,3,4,5,6)
SNP <- rep (c("F1", "F3", "F4"), each=length(ID))
Al_1 <- sample(bases, length(SNP), replace=T)
Al_2 <- sample(bases, length(SNP), replace=T)
tipo <- rep(c("."),length(SNP))
## Data frame generation:
ArrDat <- as.data.frame(cbind(ID, SNP, Al_1, Al_2, tipo))
ArrDat <- data.frame(lapply(ArrDat, as.character), stringsAsFactors = F)
OrderArr <- ArrDat[order(ArrDat$ID),]
## Column "tipo" values:
for (i in 1:nrow(OrderArr)) {
if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "T"){
OrderArr$tipo[i] = "a"
} else if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "C"){
OrderArr$tipo[i] = "b"
} else if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "G"){
OrderArr$tipo[i] = "c"
} else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "A"){
OrderArr$tipo[i] = "d"
} else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "C"){
OrderArr$tipo[i] = "e"
} else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "G"){
OrderArr$tipo[i] = "f"
} else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "A"){
OrderArr$tipo[i] = "g"
} else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "T"){
OrderArr$tipo[i] = "h"
} else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "G"){
OrderArr$tipo[i] = "i"
} else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "A"){
OrderArr$tipo[i] = "j"
} else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "T"){
OrderArr$tipo[i] = "k"
} else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "C"){
OrderArr$tipo[i] = "l"
} else if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "A"){
OrderArr$tipo[i] = "STHG.A"
} else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "T"){
OrderArr$tipo[i] = "STHG.T"
} else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "C"){
OrderArr$tipo[i] = "STHG.C"
} else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "G"){
OrderArr$tipo[i] = "STHG.G"
} else {OrderArr$tipo[i] = "x"}
}
我的问题是这些Al_1-Al_2组合的OrderArr$tipo值:A-A、T-T、C-C或G-G。
这些组合的OrderArr$tipo值可能与具有相同OrderArr$SNP值的其他行相同,因此我之前输入的数据应该是:
ID SNP Al_1 Al_2 tipo
1 1 F1 T A d
7 1 F3 C A g
13 1 F4 G C l
2 2 F1 T T d
8 2 F3 C C g
14 2 F4 C C l
如何在代码中实现这一点
非常感谢。我创建了一个数据框架,其中每个SNP只有一个Al_1和Al_2的组合
ID SNP Al_1 Al_2 combo tipo
1 1 F1 A T AT a
2 1 F4 G G GG z
3 1 D2 C T CT h
4 1 D4 T C TC e
5 1 HY7 A A AA z
6 1 HY66 T G TG f
7 1 XZD1 C A CA g
8 1 XZD33 G A GA j
9 2 F1 A A AA z
10 2 F4 C G CG i
11 2 D2 C C CC z
12 2 D4 T C TC e
13 2 HY7 A A AA z
14 2 HY66 G G GG z
15 2 XZD1 C A CA g
16 2 XZD33 G A GA j
17 3 F1 T T TT z
18 3 F4 C C CC z
19 3 D2 C T CT h
20 3 D4 T C TC e
21 3 HY7 A C AC b
22 3 HY66 G G GG z
23 3 XZD1 A A AA z
24 3 XZD33 A A AA z
25 4 F1 A T AT a
26 4 F4 C G CG i
27 4 D2 C T CT h
28 4 D4 T T TT z
29 4 HY7 C C CC z
30 4 HY66 T T TT z
31 4 XZD1 C A CA g
32 4 XZD33 A A AA z
33 5 F1 T T TT z
34 5 F4 C G CG i
35 5 D2 T T TT z
36 5 D4 T T TT z
37 5 HY7 A A AA z
38 5 HY66 T G TG f
39 5 XZD1 A A AA z
40 5 XZD33 G G GG z
41 6 F1 A T AT a
42 6 F4 G G GG z
43 6 D2 T T TT z
44 6 D4 C C CC z
45 6 HY7 C C CC z
46 6 HY66 T T TT z
47 6 XZD1 C C CC z
48 6 XZD33 G A GA j
我想我对你的问题有一个答案
data$combo <- paste0(data$Al_1, data$Al_2)
snp <- unique(data$SNP)
for (i in 1:nrow(data)){
if(data$Al_1[i] == data$Al_2[i]) data$tipo[i] ='z'
else if (data$Al_1[i] == 'A') {
if (data$Al_2[i] == 'T') data$tipo[i] = 'a'
else if (data$Al_2[i] == 'C') data$tipo[i] = 'b'
else if (data$Al_2[i] == 'G') data$tipo[i] = 'c'
}
else if (data$Al_1[i] == 'T') {
if (data$Al_2[i] == 'A') data$tipo[i] = 'd'
else if (data$Al_2[i] == 'C') data$tipo[i] = 'e'
else if (data$Al_2[i] == 'G') data$tipo[i] = 'f'
}
else if (data$Al_1[i] == 'C') {
if (data$Al_2[i] == 'A') data$tipo[i] = 'g'
else if (data$Al_2[i] == 'T') data$tipo[i] = 'h'
else if (data$Al_2[i] == 'G') data$tipo[i] = 'i'
}
else if (data$Al_1[i] == 'G') {
if (data$Al_2[i] == 'A') data$tipo[i] = 'j'
else if (data$Al_2[i] == 'T') data$tipo[i] = 'k'
else if (data$Al_2[i] == 'C') data$tipo[i] = 'l'
}
}
ord.data <- data
ord.data2 <- data.frame()
for (j in 1:length(snp)){
temp <- ord.data[as.numeric(as.factor(ord.data$SNP)) == j, ]
for (h in 1:nrow(temp)){
if (temp$tipo[h] == 'z') {
if (temp$Al_1[h] == 'A') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
else if (temp$Al_1[h] == 'T') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
else if (temp$Al_1[h] == 'C') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
else if (temp$Al_1[h] == 'G') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
}
}
ord.data2 <- rbind(ord.data2, temp)
}
你能用一些假数据和你想得到的目标数据准备一个小数据框吗?STH代表相同的…?我不明白你所说的可能有一个OrderArr$tipo值等于其他行的OrderArr$SNP值。您能告诉我们您的OrderArr$tipo列应该是什么样子吗?STHG是一些东西,只是为了确保if子句起作用。我编辑了这篇文章。我希望我现在说清楚了!SNP、Al_1和Al_2的组合是分离的吗?
data$combo <- paste0(data$Al_1, data$Al_2)
snp <- unique(data$SNP)
for (i in 1:nrow(data)){
if(data$Al_1[i] == data$Al_2[i]) data$tipo[i] ='z'
else if (data$Al_1[i] == 'A') {
if (data$Al_2[i] == 'T') data$tipo[i] = 'a'
else if (data$Al_2[i] == 'C') data$tipo[i] = 'b'
else if (data$Al_2[i] == 'G') data$tipo[i] = 'c'
}
else if (data$Al_1[i] == 'T') {
if (data$Al_2[i] == 'A') data$tipo[i] = 'd'
else if (data$Al_2[i] == 'C') data$tipo[i] = 'e'
else if (data$Al_2[i] == 'G') data$tipo[i] = 'f'
}
else if (data$Al_1[i] == 'C') {
if (data$Al_2[i] == 'A') data$tipo[i] = 'g'
else if (data$Al_2[i] == 'T') data$tipo[i] = 'h'
else if (data$Al_2[i] == 'G') data$tipo[i] = 'i'
}
else if (data$Al_1[i] == 'G') {
if (data$Al_2[i] == 'A') data$tipo[i] = 'j'
else if (data$Al_2[i] == 'T') data$tipo[i] = 'k'
else if (data$Al_2[i] == 'C') data$tipo[i] = 'l'
}
}
ord.data <- data
ord.data2 <- data.frame()
for (j in 1:length(snp)){
temp <- ord.data[as.numeric(as.factor(ord.data$SNP)) == j, ]
for (h in 1:nrow(temp)){
if (temp$tipo[h] == 'z') {
if (temp$Al_1[h] == 'A') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
else if (temp$Al_1[h] == 'T') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
else if (temp$Al_1[h] == 'C') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
else if (temp$Al_1[h] == 'G') {
tempi <- subset(temp, tipo != 'z')
letra <- unique(tempi$tipo)
temp$tipo[h] = letra
}
}
}
ord.data2 <- rbind(ord.data2, temp)
}