需要帮助扩展R中的函数和for循环吗
我有以下带有for循环的函数:需要帮助扩展R中的函数和for循环吗,r,function,for-loop,dataframe,R,Function,For Loop,Dataframe,我有以下带有for循环的函数: getSequences <- function(input.seq){ peptide.result <- c() for (i in 1:nrow(peptides.df)) { peptide.seq <- substr(input.seq, peptides.df$StartAA[i], peptides.df$EndAA[i]) peptide.info <- data.frame(cbind(peptide.na
getSequences <- function(input.seq){
peptide.result <- c()
for (i in 1:nrow(peptides.df)) {
peptide.seq <- substr(input.seq, peptides.df$StartAA[i], peptides.df$EndAA[i])
peptide.info <- data.frame(cbind(peptide.name = peptides.df$Name[i], peptide.seq))
peptide.result <- rbind(peptide.result, peptide.info)
}
return(peptide.result)
}
test.results <- getSequences(input.seq)
电流输出。结果:
peptide.name peptide.sequence
peptide_1 QNYWEHPYQNSDVYRPINEHREHP
peptide_2 QNSDVYRPINEHREHPKEYEYPLH
peptide_3 INEHREHPKEYEYPLHQEHTYQQE
我如何扩展它以获取一个包含样本及其输入序列的数据帧。对于每个样本及其序列,我想生成一组肽,就像示例中的a一样
新输入:带有样本_序列的数据帧(带有输入序列的200个样本)
新产品:样品肽
sample1 peptide_1 QNYWEHPYQNSDVYRPINEHREHP
sample1 peptide_2 QNSDVYRPINEHREHPKEYEYPLH
sample1 peptide_3 INEHREHPKEYEYPLHQEHTYQQE
sample2 peptide_1 QNYWEHPYQNSDVYRPINEHREHP
sample2 peptide_2 QNSDVYRPINEHREHPKEYEYPLH
sample2 peptide_3 INEHREHPKEYEYPLHQEHTYQQE
sample3 peptide_1 QNYWEHPYQNSDVYRPINEHREHP
sample3 peptide_2 QNSDVYRPINEHREHPKEYEYPLH
sample3 peptide_3 INEHREHPKEYEYPLHQEHTYQQE
...
sample200 peptide_1 QNYWEHPYQNSDVYRPINEHREHP
sample200 peptide_2 QNSDVYRPINEHREHPKEYEYPLH
sample200 peptide_3 INEHREHPKEYEYPLHQEHTYQQE
您可以使用
tidyr
和dplyr
避免循环。您可以使用交叉
扩展所有可能肽的样本序列。然后,使用substr
library(dplyr);library(tidyr)
peptides.df <- read.table(text=" Name StartAA EndAA
peptide_1 25 48
peptide_2 33 56
peptide_3 41 64",header=TRUE,stringsAsFactors=FALSE)
sample_sequences <-read.table(text=" sample sequence
sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE",header=TRUE,stringsAsFactors=FALSE)
crossing(sample_sequences,peptides.df)%>%
mutate(peptide.sequence=substr(sequence, StartAA, EndAA))
sample sequence Name StartAA EndAA peptide.sequence
1 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1 25 48 QNYWEHPYQNSDVYRPINEHREHP
2 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2 33 56 QNSDVYRPINEHREHPKEYEYPLH
3 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3 41 64 INEHREHPKEYEYPLHQEHTYQQE
4 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1 25 48 QNYWEHPYQNSDVYRPINEHREHP
5 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2 33 56 QNSDVYRPINEHREHPKEYEYPLH
6 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3 41 64 INEHREHPKEYEYPLHQEHTYQQE
7 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1 25 48 QNYWEHPYQNSDVYRPINEHREHP
8 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2 33 56 QNSDVYRPINEHREHPKEYEYPLH
9 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3 41 64 INEHREHPKEYEYPLHQEHTYQQE
库(dplyr);图书馆(tidyr)
peptides.df在较高级别上,您只需要在getSequences
函数周围再循环一次for(s in sample_sequences){getSequences()}
,对吗?sapply(df$sample_sequences,getSequences)
应该这样做,尽管输出格式会略有不同。大家好,谢谢你们的帮助。我最终改变了解决问题的方法,以满足用户需求。然而,在我尝试学习R编程时,我使用了你的每一个建议。我的新方法是根据用户输入的两个坐标(Coord1,Coord2)对输入序列进行子集。library(dplyr)subset.sample.seq%mutate(Sequence=subset(Sequence,Coord1,Coord2)这里是正确显示的我的新代码:library(dplyr)subset.sample.seq%mutate(Sequence=substr(Sequence,Coord1,Coord2))
sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
...
sample200 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample1 peptide_1 QNYWEHPYQNSDVYRPINEHREHP
sample1 peptide_2 QNSDVYRPINEHREHPKEYEYPLH
sample1 peptide_3 INEHREHPKEYEYPLHQEHTYQQE
sample2 peptide_1 QNYWEHPYQNSDVYRPINEHREHP
sample2 peptide_2 QNSDVYRPINEHREHPKEYEYPLH
sample2 peptide_3 INEHREHPKEYEYPLHQEHTYQQE
sample3 peptide_1 QNYWEHPYQNSDVYRPINEHREHP
sample3 peptide_2 QNSDVYRPINEHREHPKEYEYPLH
sample3 peptide_3 INEHREHPKEYEYPLHQEHTYQQE
...
sample200 peptide_1 QNYWEHPYQNSDVYRPINEHREHP
sample200 peptide_2 QNSDVYRPINEHREHPKEYEYPLH
sample200 peptide_3 INEHREHPKEYEYPLHQEHTYQQE
library(dplyr);library(tidyr)
peptides.df <- read.table(text=" Name StartAA EndAA
peptide_1 25 48
peptide_2 33 56
peptide_3 41 64",header=TRUE,stringsAsFactors=FALSE)
sample_sequences <-read.table(text=" sample sequence
sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE",header=TRUE,stringsAsFactors=FALSE)
crossing(sample_sequences,peptides.df)%>%
mutate(peptide.sequence=substr(sequence, StartAA, EndAA))
sample sequence Name StartAA EndAA peptide.sequence
1 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1 25 48 QNYWEHPYQNSDVYRPINEHREHP
2 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2 33 56 QNSDVYRPINEHREHPKEYEYPLH
3 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3 41 64 INEHREHPKEYEYPLHQEHTYQQE
4 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1 25 48 QNYWEHPYQNSDVYRPINEHREHP
5 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2 33 56 QNSDVYRPINEHREHPKEYEYPLH
6 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3 41 64 INEHREHPKEYEYPLHQEHTYQQE
7 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1 25 48 QNYWEHPYQNSDVYRPINEHREHP
8 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2 33 56 QNSDVYRPINEHREHPKEYEYPLH
9 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3 41 64 INEHREHPKEYEYPLHQEHTYQQE