Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/67.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/8/file/3.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 基于另一列的列值_R - Fatal编程技术网

R 基于另一列的列值

R 基于另一列的列值,r,R,我有一些关于这种结构的数据: ## Column examples generation bases <- c("A", "T", "C", "G") ID <- c(1,2,3,4,5,6) SNP <- rep (c("F1", "F3", "F4"), each=length(ID)) Al_1 <- sample(bases, length(SNP), replace=T) Al_2 <- sample(bases, length(SNP), replace

我有一些关于这种结构的数据:

## Column examples generation
bases <- c("A", "T", "C", "G")
ID <- c(1,2,3,4,5,6)
SNP <- rep (c("F1", "F3", "F4"), each=length(ID))
Al_1 <- sample(bases, length(SNP), replace=T)
Al_2 <- sample(bases, length(SNP), replace=T)
tipo <- rep(c("."),length(SNP))

## Data frame generation:
ArrDat <- as.data.frame(cbind(ID, SNP, Al_1, Al_2, tipo))
ArrDat <- data.frame(lapply(ArrDat, as.character), stringsAsFactors = F)
OrderArr <- ArrDat[order(ArrDat$ID),]

## Column "tipo" values:
for (i in 1:nrow(OrderArr)) {
if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "T"){
    OrderArr$tipo[i] = "a"
  } else if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "C"){
    OrderArr$tipo[i] = "b"
  } else if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "G"){
    OrderArr$tipo[i] = "c"
  } else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "A"){
    OrderArr$tipo[i] = "d"
  } else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "C"){
    OrderArr$tipo[i] = "e"
  } else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "G"){
    OrderArr$tipo[i] = "f"
  } else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "A"){
    OrderArr$tipo[i] = "g"
  } else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "T"){
    OrderArr$tipo[i] = "h"
  } else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "G"){
    OrderArr$tipo[i] = "i"
  } else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "A"){
    OrderArr$tipo[i] = "j"
  } else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "T"){
    OrderArr$tipo[i] = "k"
  } else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "C"){
    OrderArr$tipo[i] = "l"
  } else if (OrderArr$Al_1[i] == "A" & OrderArr$Al_2[i] == "A"){
    OrderArr$tipo[i] = "STHG.A"
  } else if (OrderArr$Al_1[i] == "T" & OrderArr$Al_2[i] == "T"){
    OrderArr$tipo[i] = "STHG.T"
  } else if (OrderArr$Al_1[i] == "C" & OrderArr$Al_2[i] == "C"){
    OrderArr$tipo[i] = "STHG.C"
  } else if (OrderArr$Al_1[i] == "G" & OrderArr$Al_2[i] == "G"){
    OrderArr$tipo[i] = "STHG.G"
  } else {OrderArr$tipo[i] = "x"}
}
我的问题是这些Al_1-Al_2组合的OrderArr$tipo值:A-A、T-T、C-C或G-G。 这些组合的OrderArr$tipo值可能与具有相同OrderArr$SNP值的其他行相同,因此我之前输入的数据应该是:

   ID SNP Al_1 Al_2   tipo
1   1  F1    T    A      d
7   1  F3    C    A      g
13  1  F4    G    C      l
2   2  F1    T    T      d
8   2  F3    C    C      g
14  2  F4    C    C      l
如何在代码中实现这一点


非常感谢。

我创建了一个数据框架,其中每个SNP只有一个Al_1和Al_2的组合

   ID   SNP Al_1 Al_2 combo tipo
1   1    F1    A    T    AT    a
2   1    F4    G    G    GG    z
3   1    D2    C    T    CT    h
4   1    D4    T    C    TC    e
5   1   HY7    A    A    AA    z
6   1  HY66    T    G    TG    f
7   1  XZD1    C    A    CA    g
8   1 XZD33    G    A    GA    j
9   2    F1    A    A    AA    z
10  2    F4    C    G    CG    i
11  2    D2    C    C    CC    z
12  2    D4    T    C    TC    e
13  2   HY7    A    A    AA    z
14  2  HY66    G    G    GG    z
15  2  XZD1    C    A    CA    g
16  2 XZD33    G    A    GA    j
17  3    F1    T    T    TT    z
18  3    F4    C    C    CC    z
19  3    D2    C    T    CT    h
20  3    D4    T    C    TC    e
21  3   HY7    A    C    AC    b
22  3  HY66    G    G    GG    z
23  3  XZD1    A    A    AA    z
24  3 XZD33    A    A    AA    z
25  4    F1    A    T    AT    a
26  4    F4    C    G    CG    i
27  4    D2    C    T    CT    h
28  4    D4    T    T    TT    z
29  4   HY7    C    C    CC    z
30  4  HY66    T    T    TT    z
31  4  XZD1    C    A    CA    g
32  4 XZD33    A    A    AA    z
33  5    F1    T    T    TT    z
34  5    F4    C    G    CG    i
35  5    D2    T    T    TT    z
36  5    D4    T    T    TT    z
37  5   HY7    A    A    AA    z
38  5  HY66    T    G    TG    f
39  5  XZD1    A    A    AA    z
40  5 XZD33    G    G    GG    z
41  6    F1    A    T    AT    a
42  6    F4    G    G    GG    z
43  6    D2    T    T    TT    z
44  6    D4    C    C    CC    z
45  6   HY7    C    C    CC    z
46  6  HY66    T    T    TT    z
47  6  XZD1    C    C    CC    z
48  6 XZD33    G    A    GA    j
我想我对你的问题有一个答案

data$combo <- paste0(data$Al_1, data$Al_2)
snp <- unique(data$SNP)

for (i in 1:nrow(data)){
  if(data$Al_1[i] == data$Al_2[i]) data$tipo[i] ='z'
  else if (data$Al_1[i] == 'A') {
    if (data$Al_2[i] == 'T') data$tipo[i] = 'a'
    else if (data$Al_2[i] == 'C') data$tipo[i] = 'b'
    else if (data$Al_2[i] == 'G') data$tipo[i] = 'c'
  }
  else if (data$Al_1[i] == 'T') {
    if (data$Al_2[i] == 'A') data$tipo[i] = 'd'
    else if (data$Al_2[i] == 'C') data$tipo[i] = 'e'
    else if (data$Al_2[i] == 'G') data$tipo[i] = 'f'
  }
  else if (data$Al_1[i] == 'C') {
    if (data$Al_2[i] == 'A') data$tipo[i] = 'g'
    else if (data$Al_2[i] == 'T') data$tipo[i] = 'h'
    else if (data$Al_2[i] == 'G') data$tipo[i] = 'i'
  }
  else if (data$Al_1[i] == 'G') {
    if (data$Al_2[i] == 'A') data$tipo[i] = 'j'
    else if (data$Al_2[i] == 'T') data$tipo[i] = 'k'
    else if (data$Al_2[i] == 'C') data$tipo[i] = 'l'
  }
}

ord.data <- data
ord.data2 <- data.frame()

for (j in 1:length(snp)){
  temp <- ord.data[as.numeric(as.factor(ord.data$SNP)) == j, ]
  for (h in 1:nrow(temp)){
    if (temp$tipo[h] == 'z') { 
      if (temp$Al_1[h] == 'A') {
        tempi <- subset(temp, tipo != 'z')
        letra <- unique(tempi$tipo)
        temp$tipo[h] = letra
      }
      else if (temp$Al_1[h] == 'T') {
        tempi <- subset(temp, tipo != 'z')
        letra <- unique(tempi$tipo)
        temp$tipo[h] = letra
      }
      else if (temp$Al_1[h] == 'C') {
        tempi <- subset(temp, tipo != 'z')
        letra <- unique(tempi$tipo)
        temp$tipo[h] = letra
      }
      else if (temp$Al_1[h] == 'G') {
        tempi <- subset(temp, tipo != 'z')
        letra <- unique(tempi$tipo)
        temp$tipo[h] = letra
      }
    }
  }
  ord.data2 <- rbind(ord.data2, temp)
}

你能用一些假数据和你想得到的目标数据准备一个小数据框吗?STH代表相同的…?我不明白你所说的可能有一个OrderArr$tipo值等于其他行的OrderArr$SNP值。您能告诉我们您的OrderArr$tipo列应该是什么样子吗?STHG是一些东西,只是为了确保if子句起作用。我编辑了这篇文章。我希望我现在说清楚了!SNP、Al_1和Al_2的组合是分离的吗?
data$combo <- paste0(data$Al_1, data$Al_2)
snp <- unique(data$SNP)

for (i in 1:nrow(data)){
  if(data$Al_1[i] == data$Al_2[i]) data$tipo[i] ='z'
  else if (data$Al_1[i] == 'A') {
    if (data$Al_2[i] == 'T') data$tipo[i] = 'a'
    else if (data$Al_2[i] == 'C') data$tipo[i] = 'b'
    else if (data$Al_2[i] == 'G') data$tipo[i] = 'c'
  }
  else if (data$Al_1[i] == 'T') {
    if (data$Al_2[i] == 'A') data$tipo[i] = 'd'
    else if (data$Al_2[i] == 'C') data$tipo[i] = 'e'
    else if (data$Al_2[i] == 'G') data$tipo[i] = 'f'
  }
  else if (data$Al_1[i] == 'C') {
    if (data$Al_2[i] == 'A') data$tipo[i] = 'g'
    else if (data$Al_2[i] == 'T') data$tipo[i] = 'h'
    else if (data$Al_2[i] == 'G') data$tipo[i] = 'i'
  }
  else if (data$Al_1[i] == 'G') {
    if (data$Al_2[i] == 'A') data$tipo[i] = 'j'
    else if (data$Al_2[i] == 'T') data$tipo[i] = 'k'
    else if (data$Al_2[i] == 'C') data$tipo[i] = 'l'
  }
}

ord.data <- data
ord.data2 <- data.frame()

for (j in 1:length(snp)){
  temp <- ord.data[as.numeric(as.factor(ord.data$SNP)) == j, ]
  for (h in 1:nrow(temp)){
    if (temp$tipo[h] == 'z') { 
      if (temp$Al_1[h] == 'A') {
        tempi <- subset(temp, tipo != 'z')
        letra <- unique(tempi$tipo)
        temp$tipo[h] = letra
      }
      else if (temp$Al_1[h] == 'T') {
        tempi <- subset(temp, tipo != 'z')
        letra <- unique(tempi$tipo)
        temp$tipo[h] = letra
      }
      else if (temp$Al_1[h] == 'C') {
        tempi <- subset(temp, tipo != 'z')
        letra <- unique(tempi$tipo)
        temp$tipo[h] = letra
      }
      else if (temp$Al_1[h] == 'G') {
        tempi <- subset(temp, tipo != 'z')
        letra <- unique(tempi$tipo)
        temp$tipo[h] = letra
      }
    }
  }
  ord.data2 <- rbind(ord.data2, temp)
}