R 在2种条件下突变3列碱基
是否有人知道一种更有效的方法来运行此代码,根据与特定列相关的条件从NA中的3列转换值。例如,使用mutate_at代替mutateR 在2种条件下突变3列碱基,r,dplyr,mutate,R,Dplyr,Mutate,是否有人知道一种更有效的方法来运行此代码,根据与特定列相关的条件从NA中的3列转换值。例如,使用mutate_at代替mutate Data = DATA %>% mutate(Temperature1 = ifelse(Temperature1 < 19 & Cyclon1== "f","NA",Temperature1 )) %>% mutate(Temperature2 = ifelse(Temperature2 < 19 & Cycl
Data = DATA %>%
mutate(Temperature1 = ifelse(Temperature1 < 19 & Cyclon1== "f","NA",Temperature1 )) %>%
mutate(Temperature2 = ifelse(Temperature2 < 19 & Cyclon2== "f","NA",Temperature2 )) %>%
mutate(Temperature3 = ifelse(Temperature3 < 19 & Cyclon3== "f","NA",Temperature3 ))
Data=Data%>%
突变(温度1=ifelse(温度1<19&Cyclon1==“f”,“NA”,温度1))%>%
突变(温度2=ifelse(温度2<19&Cyclon2==“f”,“NA”,温度2))%>%
突变(温度3=ifelse(温度3<19&Cyclon3==“f”、“NA”、温度3))
提前感谢这并不是那么直截了当的,因为你需要将温度1与Cyclon1相匹配,如果你想坚持使用dplyr,那么解决方法就是先旋转更长的时间,然后进行变异并旋转回来。例如,如果您的数据如下所示:
set.seed(111)
DATA = data.frame(Temperature1=runif(100,min=0,max=100),
Temperature2=runif(100,min=0,max=100),
Temperature3=runif(100,min=0,max=100),
Cyclon1 = sample(c("t","f"),100,replace=TRUE),
Cyclon2 = sample(c("t","f"),100,replace=TRUE),
Cyclon3 = sample(c("t","f"),100,replace=TRUE))
然后我们做:
DATA %>% rownames_to_column("id") %>%
pivot_longer(-id,names_to=c(".value","set"),names_pattern="([^0-9]*)([0-9])")
# A tibble: 300 x 4
id set Temperature Cyclon
<chr> <chr> <dbl> <fct>
1 1 1 59.3 t
2 1 2 57.6 f
3 1 3 72.6 t
4 2 1 72.6 t
5 2 2 13.6 t
6 2 3 92.0 f
DATA%>%rownames\u to\u列(“id”)%>%
pivot_longer(-id,names_to=c(“.value”,“set”),names_pattern=“([^0-9]*)([0-9]))
#一个tibble:300x4
id设定温度气旋
159.3吨
2157.6 f
31372.6吨
42172.6吨
5213.6吨
62392.0华氏度
在这一步中,对于每一组(1-3),你都有一个相应的气旋和温度,剩下的是让你变异并再次旋转:
data1 = DATA %>% rownames_to_column("id") %>%
pivot_longer(-id,names_to=c(".value","set"),names_pattern="([^0-9]*)([0-9])") %>%
mutate(Temperature=replace(Temperature,Temperature < 19 & Cyclon== "f",NA)) %>%
pivot_wider(values_from=c(Temperature,Cyclon),names_from=set)
data1=DATA%>%rownames\u to\u column(“id”)%>%
pivot_longer(-id,names_to=c(“.value”,“set”),names_pattern=“([^0-9]*)([0-9])”%>%
突变(温度=替换(温度,温度<19&Cyclon==“f”,NA))%>%
枢轴宽度(值从=c(温度,气旋),名称从=set)
我们可以检查以下值:
head(DATA[DATA$Temperature1 < 19 & DATA$Cyclon1=="f",])
Temperature1 Temperature2 Temperature3 Cyclon1 Cyclon2 Cyclon3
7 1.065785 64.00623 58.11568 f t t
10 9.368152 96.53025 53.62925 f t t
14 4.754785 90.39043 47.44193 f f f
15 15.620252 96.45305 72.74062 f t f
17 17.144369 54.89127 95.85764 f t f
31 5.859646 35.14933 44.92498 f f t
head(data1[DATA$Temperature1 < 19 & DATA$Cyclon1=="f",])
# A tibble: 6 x 7
id Temperature_1 Temperature_2 Temperature_3 Cyclon_1 Cyclon_2 Cyclon_3
<chr> <dbl> <dbl> <dbl> <fct> <fct> <fct>
1 7 NA 64.0 58.1 f t t
2 10 NA 96.5 53.6 f t t
3 14 NA 90.4 47.4 f f f
4 15 NA 96.5 72.7 f t f
5 17 NA 54.9 95.9 f t f
6 31 NA 35.1 44.9 f f t
head(数据[数据$Temperature1<19和数据$Cyclon1==“f”,]))
温度1温度2温度3旋风分离器1旋风分离器2旋风分离器3
7 1.065785 64.00623 58.11568 f t
10 9.368152 96.53025 53.62925 f t
14 4.754785 90.39043 47.44193 f
15.620252 96.45305 72.74062华氏度
17.144369 54.89127 95.85764华氏度
31 5.859646 35.14933 44.92498 f t
头部(数据1[数据$Temperature1<19和数据$Cyclon1==“f”,]))
#一个tibble:6x7
id温度\u 1温度\u 2温度\u 3气旋\u 1气旋\u 2气旋\u 3
1 7 NA 64.0 58.1 f t
2 10 NA 96.5 53.6 f t
3 14 NA 90.4 47.4 f
4 15 NA 96.5 72.7 f t f
517NA 54.995.9F
6 31 NA 35.1 44.9 f t
这并不是那么直截了当,因为你需要将温度1与Cyclon1相匹配,如果你想坚持使用dplyr,那么解决方法就是先旋转更长的时间,然后进行变异并旋转回来。例如,如果您的数据如下所示:
set.seed(111)
DATA = data.frame(Temperature1=runif(100,min=0,max=100),
Temperature2=runif(100,min=0,max=100),
Temperature3=runif(100,min=0,max=100),
Cyclon1 = sample(c("t","f"),100,replace=TRUE),
Cyclon2 = sample(c("t","f"),100,replace=TRUE),
Cyclon3 = sample(c("t","f"),100,replace=TRUE))
然后我们做:
DATA %>% rownames_to_column("id") %>%
pivot_longer(-id,names_to=c(".value","set"),names_pattern="([^0-9]*)([0-9])")
# A tibble: 300 x 4
id set Temperature Cyclon
<chr> <chr> <dbl> <fct>
1 1 1 59.3 t
2 1 2 57.6 f
3 1 3 72.6 t
4 2 1 72.6 t
5 2 2 13.6 t
6 2 3 92.0 f
DATA%>%rownames\u to\u列(“id”)%>%
pivot_longer(-id,names_to=c(“.value”,“set”),names_pattern=“([^0-9]*)([0-9]))
#一个tibble:300x4
id设定温度气旋
159.3吨
2157.6 f
31372.6吨
42172.6吨
5213.6吨
62392.0华氏度
在这一步中,对于每一组(1-3),你都有一个相应的气旋和温度,剩下的是让你变异并再次旋转:
data1 = DATA %>% rownames_to_column("id") %>%
pivot_longer(-id,names_to=c(".value","set"),names_pattern="([^0-9]*)([0-9])") %>%
mutate(Temperature=replace(Temperature,Temperature < 19 & Cyclon== "f",NA)) %>%
pivot_wider(values_from=c(Temperature,Cyclon),names_from=set)
data1=DATA%>%rownames\u to\u column(“id”)%>%
pivot_longer(-id,names_to=c(“.value”,“set”),names_pattern=“([^0-9]*)([0-9])”%>%
突变(温度=替换(温度,温度<19&Cyclon==“f”,NA))%>%
枢轴宽度(值从=c(温度,气旋),名称从=set)
我们可以检查以下值:
head(DATA[DATA$Temperature1 < 19 & DATA$Cyclon1=="f",])
Temperature1 Temperature2 Temperature3 Cyclon1 Cyclon2 Cyclon3
7 1.065785 64.00623 58.11568 f t t
10 9.368152 96.53025 53.62925 f t t
14 4.754785 90.39043 47.44193 f f f
15 15.620252 96.45305 72.74062 f t f
17 17.144369 54.89127 95.85764 f t f
31 5.859646 35.14933 44.92498 f f t
head(data1[DATA$Temperature1 < 19 & DATA$Cyclon1=="f",])
# A tibble: 6 x 7
id Temperature_1 Temperature_2 Temperature_3 Cyclon_1 Cyclon_2 Cyclon_3
<chr> <dbl> <dbl> <dbl> <fct> <fct> <fct>
1 7 NA 64.0 58.1 f t t
2 10 NA 96.5 53.6 f t t
3 14 NA 90.4 47.4 f f f
4 15 NA 96.5 72.7 f t f
5 17 NA 54.9 95.9 f t f
6 31 NA 35.1 44.9 f f t
head(数据[数据$Temperature1<19和数据$Cyclon1==“f”,]))
温度1温度2温度3旋风分离器1旋风分离器2旋风分离器3
7 1.065785 64.00623 58.11568 f t
10 9.368152 96.53025 53.62925 f t
14 4.754785 90.39043 47.44193 f
15.620252 96.45305 72.74062华氏度
17.144369 54.89127 95.85764华氏度
31 5.859646 35.14933 44.92498 f t
头部(数据1[数据$Temperature1<19和数据$Cyclon1==“f”,]))
#一个tibble:6x7
id温度\u 1温度\u 2温度\u 3气旋\u 1气旋\u 2气旋\u 3
1 7 NA 64.0 58.1 f t
2 10 NA 96.5 53.6 f t
3 14 NA 90.4 47.4 f
4 15 NA 96.5 72.7 f t f
517NA 54.995.9F
6 31 NA 35.1 44.9 f t
我假设了一些数据:
DATA <- tibble(Record = LETTERS[1:6],
Temperature1 = c(17:22),
Cyclon1 = rep(c("f", "g"), 3),
Temperature2 = c(17:22),
Cyclon2 = rep(c("f", "g"), 3),
Temperature3 = c(17:22),
Cyclon3 = rep(c("f", "g"), 3))
就我个人而言,我会以LONGDATA的形式保存它。但是如果你真的想要回你的宽款风格
NEWDATA <- LONGDATA %>%
spread(key = Cyclon, value = cValue) %>%
spread(key = Temperature, value = tValue)
NEWDATA
# A tibble: 6 x 7
Record Cyclon1 Cyclon2 Cyclon3 Temperature1 Temperature2 Temperature3
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 A f f f NA NA NA
2 B g g g 18 18 18
3 C f f f 19 19 19
4 D g g g 20 20 20
5 E f f f 21 21 21
6 F g g g 22 22 22
NEWDATA%
排列(键=气旋,值=C值)%>%
排列(键=温度,值=tValue)
新数据
#一个tibble:6x7
记录旋风分离器1旋风分离器2旋风分离器3温度1温度2温度3
1 A f f NA NA NA
2 B g 18 18
3 C f 19 19
4dG2020
5英、法、法21 21
6 F g 22 22
我假设了一些数据:
DATA <- tibble(Record = LETTERS[1:6],
Temperature1 = c(17:22),
Cyclon1 = rep(c("f", "g"), 3),
Temperature2 = c(17:22),
Cyclon2 = rep(c("f", "g"), 3),
Temperature3 = c(17:22),
Cyclon3 = rep(c("f", "g"), 3))