R 从多个单独变量创建一个合并变量

R 从多个单独变量创建一个合并变量,r,dataframe,recode,categorization,R,Dataframe,Recode,Categorization,任何帮助都将不胜感激 我有一个从PCR板软件导出的文件。我已经对所有等位基因进行了编码,现在将它们合并到一个数据帧中 我需要创建一个新的变量,合并3个等位基因(G1-1、G1-2和G2),以获得最终的基因型 然后我需要计算等位基因的发生率,以生成我需要生成的其他3个APOL1风险变量 Allele logic for final genotype: +/G2 = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) &am

任何帮助都将不胜感激

我有一个从PCR板软件导出的文件。我已经对所有等位基因进行了编码,现在将它们合并到一个数据帧中

我需要创建一个新的变量,合并3个等位基因(G1-1、G1-2和G2),以获得最终的基因型

然后我需要计算等位基因的发生率,以生成我需要生成的其他3个APOL1风险变量


Allele logic for final genotype:

+/G2 = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) & (occurence of (G2) at either G2-1 or G2-2)

+/+ = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) & (G2-1(+) & G2-2(+))

G2/G2 = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) & (G2-1(G2) & G2-2(G2))

G1^GM/+ = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) & (occurence of (G1^I384M) at either G1-2-1 or G1-2-2) & (G2-1(+) & G2-2(+))

G1^G+/+ = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) & (G1-2-1(+) & G1-2-2(+)) & (G2-1(+) & G2-2(+))

G1^GM/G1^GM = (occurence of (G1^S342G) at both G1-1-1 or G1-1-2) & (occurence of (G1^I384M) at both G1-2-1 or G1-2-2) & (G2-1(+) & G2-2(+))

G1^GM/G2 = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) & (occurence of (G1^I384M) at either G1-2-1 or G1-2-2) & (occurence of (G2) at either G2-1 or G2-2)

G1^G+/G2 = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) & (G1-2-1(+) & G1-2-2(+) & (occurence of (G2) at either G2-1 or G2-2)

原始数据帧结构

Classes ‘tbl_df’, ‘tbl’ and 'data.frame':   28 obs. of  6 variables:
 $ G1-1-1   : chr  "+" "+" "+" "+" ...
 $ G1-1-2   : chr  "+" "+" "+" "+" ...
 $ G1-2-1   : chr  "+" "+" "+" "+" ...
 $ G1-2-2   : chr  "+" "+" "+" "+" ...
 $ G2-1     : chr  "+" "+" "+" "+" ...
 $ G2-2     : chr  "G2" "+" "G2" "G2" ...

您可以使用dplyr函数轻松实现逻辑

实现给定的逻辑应该是实现该逻辑所需的代码

library(dplyr)

data <-
  data.frame(
    G1_1_1 = c("+", "+", "+", "+", "+", "+", "+", "G1S342G", "+", "G1S342G", "+", "+", "+", "+",
               "G1S342G", "G1S342G", "+", "G1S342G", "+", "+", "+", "G1S342G", "+", "+",
               "G1S342G", "+", "G1S342G", "G1S342G"),
    G1_1_2 = c("+", "+", "+", "+", "+", "+", "+", "G1S342G", "+", "+", "+", "+", "+", "+", "+",
               "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+"),
    G1_2_1 = c("+", "+", "+", "+", "+", "+", "+", "G1I384M", "+", "G1I384M", "+", "+", "+", "+",
               "G1I384M", "G1I384M", "+", "G1I384M", "+", "+", "+", "+", "+", "+", "G1I384M",
               "+", "G1I384M", "G1I384M"),
    G1_2_2 = c("+", "+", "+", "+", "+", "+", "+", "G1I384M", "+", "+", "+", "+", "+", "+", "+",
               "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+"),
    G2_1 = c("+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+",
             "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+"),
    G2_2 = c("G2", "+", "G2", "G2", "G2", "+", "+", "+", "G2", "G2", "+", "G2", "+", "G2", "G2",
             "+", "+", "+", "G2", "+", "G2", "+", "+", "+", "+", "+", "+", "+"),
    stringsAsFactors = FALSE
  )

result <-
  data %>% mutate(
    "Final genotype of APOL1" = 
      case_when(
        # +/+ = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) & (G2-1(+) & G2-2(+))
        G1_1_1 == "+" & G1_1_2 == "+" & G1_2_1 == "+" & 
          G1_2_2 == "+" & G2_1 == "+" & G2_2 == "+" ~ "+/+",

        # G2/G2 = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) & (G2-1(G2) & G2-2(G2))
        G1_1_1 == "+" & G1_1_2 == "+" & G1_2_1 == "+" & 
          G1_2_2 == "+" & G2_1 == "G2" & G2_2 == "G2" ~ "G2/G2",

        # G1^GM/G1^GM = (occurence of (G1^S342G) at both G1-1-1 or G1-1-2) 
        # & (occurence of (G1^I384M) at both G1-2-1 or G1-2-2) & (G2-1(+) & G2-2(+))
        G1_1_1 == "G1S342G" & G1_1_2 == "G1S342G" & G1_2_1 == "G1I384M" & 
          G1_2_2 == "G1I384M" & G2_1 == "+" & G2_2 == "+" ~ "G1GM/G1GM",

        #+/G2 = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) 
        #& (occurence of (G2) at either G2-1 or G2-2)
        G1_1_1 == "+" & G1_1_2 == "+" & G1_2_1 == "+" & 
          G1_2_2 == "+" & (G2_1 == "G2" | G2_2 == "G2") ~ "+/G2",

        # G1^G+/+ = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) 
        # & (G1-2-1(+) & G1-2-2(+)) & (G2-1(+) & G2-2(+))
        (G1_1_1 == "G1S342G" | G1_1_2 == "G1S342G") & G1_2_1 == "+" & 
          G1_2_2 == "+" & G2_1 == "+" & G2_2 == "+" ~ "G1G+/+",

        # G1^G+/G2 = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) 
        # & (G1-2-1(+) & G1-2-2(+) & (occurence of (G2) at either G2-1 or G2-2)
        (G1_1_1 == "G1S342G" | G1_1_2 == "G1S342G") & G1_2_1 == "+" & 
          G1_2_2 == "+" & (G2_1 == "G2" | G2_2 == "G2") ~ "G1G+/G2",

        # G1^GM/+ = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) & 
        # (occurence of (G1^I384M) at either G1-2-1 or G1-2-2) & (G2-1(+) & G2-2(+))
        (G1_1_1 == "G1S342G" | G1_1_2 == "G1S342G") & 
          (G1_2_1 == "G1I384M" | G1_2_2 == "G1I384M") & 
          G2_1 == "+" & G2_2 == "+" ~ "G1GM/+",

        # G1^GM/G2 = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) & 
        # (occurence of (G1^I384M) at either G1-2-1 or G1-2-2) & 
        # (occurence of (G2) at either G2-1 or G2-2)
        (G1_1_1 == "G1S342G" | G1_1_2 == "G1S342G") & 
          (G1_2_1 == "G1I384M" | G1_2_2 == "G1I384M") & 
          (G2_1 == "G2" | G2_2 == "G2") ~ "G1GM/G2",

        TRUE ~ NA_character_),

    "no APOL1 Risk Alleles" = ifelse(`Final genotype of APOL1` == "+/+", 1, NA),

    "1 APOL1 Risk Alleles" = 
      ifelse(`Final genotype of APOL1` %in% c("+/G2", "G1GM/+", "G1G+/+"), 1, NA),

    "2 APOL1 Risk Alleles" =
      ifelse(`Final genotype of APOL1` %in% c("G1GM/G1GM", "G1GM/G2", "G2/G2"), 1, NA),
  )

glimpse(result)

# Observations: 28
# Variables: 10
# $ G1_1_1                    <chr> "+", "+", "+", "+", "+", "+", "+", "G1S342G", "+", "G1S342G", ...
# $ G1_1_2                    <chr> "+", "+", "+", "+", "+", "+", "+", "G1S342G", "+", "+", "+", "...
# $ G1_2_1                    <chr> "+", "+", "+", "+", "+", "+", "+", "G1I384M", "+", "G1I384M", ...
# $ G1_2_2                    <chr> "+", "+", "+", "+", "+", "+", "+", "G1I384M", "+", "+", "+", "...
# $ G2_1                      <chr> "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+...
# $ G2_2                      <chr> "G2", "+", "G2", "G2", "G2", "+", "+", "+", "G2", "G2", "+", "...
# $ `Final genotype of APOL1` <chr> "+/G2", "+/+", "+/G2", "+/G2", "+/G2", "+/+", "+/+", "G1GM/G1G...
# $ `no APOL1 Risk Alleles`   <dbl> NA, 1, NA, NA, NA, 1, 1, NA, NA, NA, 1, NA, 1, NA, NA, NA, 1, ...
# $ `1 APOL1 Risk Alleles`    <dbl> 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, NA, 1, NA, 1, NA, 1, NA, 1,...
# $ `2 APOL1 Risk Alleles`    <dbl> NA, NA, NA, NA, NA, NA, NA, 1, NA, 1, NA, NA, NA, NA, 1, NA, N...
库(dplyr)

数据您可以使用dplyr函数轻松实现逻辑

实现给定的逻辑应该是实现该逻辑所需的代码

library(dplyr)

data <-
  data.frame(
    G1_1_1 = c("+", "+", "+", "+", "+", "+", "+", "G1S342G", "+", "G1S342G", "+", "+", "+", "+",
               "G1S342G", "G1S342G", "+", "G1S342G", "+", "+", "+", "G1S342G", "+", "+",
               "G1S342G", "+", "G1S342G", "G1S342G"),
    G1_1_2 = c("+", "+", "+", "+", "+", "+", "+", "G1S342G", "+", "+", "+", "+", "+", "+", "+",
               "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+"),
    G1_2_1 = c("+", "+", "+", "+", "+", "+", "+", "G1I384M", "+", "G1I384M", "+", "+", "+", "+",
               "G1I384M", "G1I384M", "+", "G1I384M", "+", "+", "+", "+", "+", "+", "G1I384M",
               "+", "G1I384M", "G1I384M"),
    G1_2_2 = c("+", "+", "+", "+", "+", "+", "+", "G1I384M", "+", "+", "+", "+", "+", "+", "+",
               "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+"),
    G2_1 = c("+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+",
             "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+"),
    G2_2 = c("G2", "+", "G2", "G2", "G2", "+", "+", "+", "G2", "G2", "+", "G2", "+", "G2", "G2",
             "+", "+", "+", "G2", "+", "G2", "+", "+", "+", "+", "+", "+", "+"),
    stringsAsFactors = FALSE
  )

result <-
  data %>% mutate(
    "Final genotype of APOL1" = 
      case_when(
        # +/+ = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) & (G2-1(+) & G2-2(+))
        G1_1_1 == "+" & G1_1_2 == "+" & G1_2_1 == "+" & 
          G1_2_2 == "+" & G2_1 == "+" & G2_2 == "+" ~ "+/+",

        # G2/G2 = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) & (G2-1(G2) & G2-2(G2))
        G1_1_1 == "+" & G1_1_2 == "+" & G1_2_1 == "+" & 
          G1_2_2 == "+" & G2_1 == "G2" & G2_2 == "G2" ~ "G2/G2",

        # G1^GM/G1^GM = (occurence of (G1^S342G) at both G1-1-1 or G1-1-2) 
        # & (occurence of (G1^I384M) at both G1-2-1 or G1-2-2) & (G2-1(+) & G2-2(+))
        G1_1_1 == "G1S342G" & G1_1_2 == "G1S342G" & G1_2_1 == "G1I384M" & 
          G1_2_2 == "G1I384M" & G2_1 == "+" & G2_2 == "+" ~ "G1GM/G1GM",

        #+/G2 = (G1-1-1(+) & G1-1-2(+)) & (G1-2-1(+) & G1-2-2(+)) 
        #& (occurence of (G2) at either G2-1 or G2-2)
        G1_1_1 == "+" & G1_1_2 == "+" & G1_2_1 == "+" & 
          G1_2_2 == "+" & (G2_1 == "G2" | G2_2 == "G2") ~ "+/G2",

        # G1^G+/+ = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) 
        # & (G1-2-1(+) & G1-2-2(+)) & (G2-1(+) & G2-2(+))
        (G1_1_1 == "G1S342G" | G1_1_2 == "G1S342G") & G1_2_1 == "+" & 
          G1_2_2 == "+" & G2_1 == "+" & G2_2 == "+" ~ "G1G+/+",

        # G1^G+/G2 = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) 
        # & (G1-2-1(+) & G1-2-2(+) & (occurence of (G2) at either G2-1 or G2-2)
        (G1_1_1 == "G1S342G" | G1_1_2 == "G1S342G") & G1_2_1 == "+" & 
          G1_2_2 == "+" & (G2_1 == "G2" | G2_2 == "G2") ~ "G1G+/G2",

        # G1^GM/+ = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) & 
        # (occurence of (G1^I384M) at either G1-2-1 or G1-2-2) & (G2-1(+) & G2-2(+))
        (G1_1_1 == "G1S342G" | G1_1_2 == "G1S342G") & 
          (G1_2_1 == "G1I384M" | G1_2_2 == "G1I384M") & 
          G2_1 == "+" & G2_2 == "+" ~ "G1GM/+",

        # G1^GM/G2 = (occurence of (G1^S342G) at either G1-1-1 or G1-1-2) & 
        # (occurence of (G1^I384M) at either G1-2-1 or G1-2-2) & 
        # (occurence of (G2) at either G2-1 or G2-2)
        (G1_1_1 == "G1S342G" | G1_1_2 == "G1S342G") & 
          (G1_2_1 == "G1I384M" | G1_2_2 == "G1I384M") & 
          (G2_1 == "G2" | G2_2 == "G2") ~ "G1GM/G2",

        TRUE ~ NA_character_),

    "no APOL1 Risk Alleles" = ifelse(`Final genotype of APOL1` == "+/+", 1, NA),

    "1 APOL1 Risk Alleles" = 
      ifelse(`Final genotype of APOL1` %in% c("+/G2", "G1GM/+", "G1G+/+"), 1, NA),

    "2 APOL1 Risk Alleles" =
      ifelse(`Final genotype of APOL1` %in% c("G1GM/G1GM", "G1GM/G2", "G2/G2"), 1, NA),
  )

glimpse(result)

# Observations: 28
# Variables: 10
# $ G1_1_1                    <chr> "+", "+", "+", "+", "+", "+", "+", "G1S342G", "+", "G1S342G", ...
# $ G1_1_2                    <chr> "+", "+", "+", "+", "+", "+", "+", "G1S342G", "+", "+", "+", "...
# $ G1_2_1                    <chr> "+", "+", "+", "+", "+", "+", "+", "G1I384M", "+", "G1I384M", ...
# $ G1_2_2                    <chr> "+", "+", "+", "+", "+", "+", "+", "G1I384M", "+", "+", "+", "...
# $ G2_1                      <chr> "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+", "+...
# $ G2_2                      <chr> "G2", "+", "G2", "G2", "G2", "+", "+", "+", "G2", "G2", "+", "...
# $ `Final genotype of APOL1` <chr> "+/G2", "+/+", "+/G2", "+/G2", "+/G2", "+/+", "+/+", "G1GM/G1G...
# $ `no APOL1 Risk Alleles`   <dbl> NA, 1, NA, NA, NA, 1, 1, NA, NA, NA, 1, NA, 1, NA, NA, NA, 1, ...
# $ `1 APOL1 Risk Alleles`    <dbl> 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, NA, 1, NA, 1, NA, 1, NA, 1,...
# $ `2 APOL1 Risk Alleles`    <dbl> NA, NA, NA, NA, NA, NA, NA, 1, NA, 1, NA, NA, NA, NA, 1, NA, N...
库(dplyr)

数据你能提供计算字段背后的逻辑吗?@Ben我添加了等位基因combinations@NaremanDarwish我已经在计算字段后面添加了逻辑,任何洞察都将非常有用appreciated@Ben我一直在寻找一个专门的R包,我会继续寻找,谢谢你的帮助help@Jordan在逻辑中提到G1-2-2,但是没有这样的列。你能提供计算字段背后的逻辑吗?@Ben我添加了等位基因combinations@NaremanDarwish我已经在计算字段后面添加了逻辑,任何洞察都将非常有用appreciated@Ben我一直在寻找一个专门的R包,我会继续寻找,谢谢你的帮助help@Jordan在逻辑中提到了G1-2-2,但是没有这样的专栏。再次感谢您的帮助。这段代码非常有效。对于这个问题,我非常重视您的意见/帮助,因为我有相同的数据-再次感谢您的帮助,这段代码非常有效。我非常重视你的意见/帮助,以解决我在使用相同数据时遇到的问题-