需要帮助扩展R中的函数和for循环吗_R_Function_For Loop_Dataframe

需要帮助扩展R中的函数和for循环吗

r function for-loop dataframe

需要帮助扩展R中的函数和for循环吗,r,function,for-loop,dataframe,R,Function,For Loop,Dataframe,我有以下带有for循环的函数： getSequences <- function(input.seq){ peptide.result <- c() for (i in 1:nrow(peptides.df)) { peptide.seq <- substr(input.seq, peptides.df$StartAA[i], peptides.df$EndAA[i]) peptide.info <- data.frame(cbind(peptide.na

我有以下带有for循环的函数：

getSequences <- function(input.seq){
peptide.result <- c()
for (i in 1:nrow(peptides.df)) {
    peptide.seq <- substr(input.seq, peptides.df$StartAA[i], peptides.df$EndAA[i])
    peptide.info <- data.frame(cbind(peptide.name = peptides.df$Name[i], peptide.seq)) 
    peptide.result <- rbind(peptide.result, peptide.info)
}  
    return(peptide.result)
}

test.results <- getSequences(input.seq)

电流输出。结果：

peptide.name    peptide.sequence
peptide_1   QNYWEHPYQNSDVYRPINEHREHP
peptide_2   QNSDVYRPINEHREHPKEYEYPLH
peptide_3   INEHREHPKEYEYPLHQEHTYQQE

我如何扩展它以获取一个包含样本及其输入序列的数据帧。对于每个样本及其序列，我想生成一组肽，就像示例中的a一样

新输入：带有样本_序列的数据帧（带有输入序列的200个样本）

新产品：样品肽

sample1 peptide_1   QNYWEHPYQNSDVYRPINEHREHP
sample1 peptide_2   QNSDVYRPINEHREHPKEYEYPLH
sample1 peptide_3   INEHREHPKEYEYPLHQEHTYQQE
sample2 peptide_1   QNYWEHPYQNSDVYRPINEHREHP
sample2 peptide_2   QNSDVYRPINEHREHPKEYEYPLH
sample2 peptide_3   INEHREHPKEYEYPLHQEHTYQQE
sample3 peptide_1   QNYWEHPYQNSDVYRPINEHREHP
sample3 peptide_2   QNSDVYRPINEHREHPKEYEYPLH
sample3 peptide_3   INEHREHPKEYEYPLHQEHTYQQE
...
sample200   peptide_1   QNYWEHPYQNSDVYRPINEHREHP
sample200   peptide_2   QNSDVYRPINEHREHPKEYEYPLH
sample200   peptide_3   INEHREHPKEYEYPLHQEHTYQQE

您可以使用

tidyr

和

dplyr

避免循环。您可以使用

交叉

扩展所有可能肽的样本序列。然后，使用

substr

library(dplyr);library(tidyr)
peptides.df <- read.table(text="   Name StartAA EndAA
peptide_1   25    48
peptide_2   33    56
peptide_3   41    64",header=TRUE,stringsAsFactors=FALSE)

sample_sequences <-read.table(text=" sample sequence
sample1     MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample2     MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample3     MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE",header=TRUE,stringsAsFactors=FALSE)

crossing(sample_sequences,peptides.df)%>%
  mutate(peptide.sequence=substr(sequence, StartAA, EndAA))

   sample                                                         sequence      Name StartAA EndAA         peptide.sequence
1 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1      25    48 QNYWEHPYQNSDVYRPINEHREHP
2 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2      33    56 QNSDVYRPINEHREHPKEYEYPLH
3 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3      41    64 INEHREHPKEYEYPLHQEHTYQQE
4 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1      25    48 QNYWEHPYQNSDVYRPINEHREHP
5 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2      33    56 QNSDVYRPINEHREHPKEYEYPLH
6 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3      41    64 INEHREHPKEYEYPLHQEHTYQQE
7 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1      25    48 QNYWEHPYQNSDVYRPINEHREHP
8 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2      33    56 QNSDVYRPINEHREHPKEYEYPLH
9 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3      41    64 INEHREHPKEYEYPLHQEHTYQQE

库（dplyr）；图书馆（tidyr）
peptides.df在较高级别上，您只需要在getSequences
函数周围再循环一次for（s in sample_sequences）{getSequences（）}
，对吗？sapply（df$sample_sequences，getSequences）
应该这样做，尽管输出格式会略有不同。大家好，谢谢你们的帮助。我最终改变了解决问题的方法，以满足用户需求。然而，在我尝试学习R编程时，我使用了你的每一个建议。我的新方法是根据用户输入的两个坐标（Coord1，Coord2）对输入序列进行子集。library（dplyr）subset.sample.seq%mutate（Sequence=subset（Sequence，Coord1，Coord2）这里是正确显示的我的新代码：library（dplyr）subset.sample.seq%mutate（Sequence=substr（Sequence，Coord1，Coord2））
sample1     MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample2     MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample3     MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
...
sample200   MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE

sample1 peptide_1   QNYWEHPYQNSDVYRPINEHREHP
sample1 peptide_2   QNSDVYRPINEHREHPKEYEYPLH
sample1 peptide_3   INEHREHPKEYEYPLHQEHTYQQE
sample2 peptide_1   QNYWEHPYQNSDVYRPINEHREHP
sample2 peptide_2   QNSDVYRPINEHREHPKEYEYPLH
sample2 peptide_3   INEHREHPKEYEYPLHQEHTYQQE
sample3 peptide_1   QNYWEHPYQNSDVYRPINEHREHP
sample3 peptide_2   QNSDVYRPINEHREHPKEYEYPLH
sample3 peptide_3   INEHREHPKEYEYPLHQEHTYQQE
...
sample200   peptide_1   QNYWEHPYQNSDVYRPINEHREHP
sample200   peptide_2   QNSDVYRPINEHREHPKEYEYPLH
sample200   peptide_3   INEHREHPKEYEYPLHQEHTYQQE

library(dplyr);library(tidyr)
peptides.df <- read.table(text="   Name StartAA EndAA
peptide_1   25    48
peptide_2   33    56
peptide_3   41    64",header=TRUE,stringsAsFactors=FALSE)

sample_sequences <-read.table(text=" sample sequence
sample1     MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample2     MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE
sample3     MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE",header=TRUE,stringsAsFactors=FALSE)

crossing(sample_sequences,peptides.df)%>%
  mutate(peptide.sequence=substr(sequence, StartAA, EndAA))

   sample                                                         sequence      Name StartAA EndAA         peptide.sequence
1 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1      25    48 QNYWEHPYQNSDVYRPINEHREHP
2 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2      33    56 QNSDVYRPINEHREHPKEYEYPLH
3 sample1 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3      41    64 INEHREHPKEYEYPLHQEHTYQQE
4 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1      25    48 QNYWEHPYQNSDVYRPINEHREHP
5 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2      33    56 QNSDVYRPINEHREHPKEYEYPLH
6 sample2 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3      41    64 INEHREHPKEYEYPLHQEHTYQQE
7 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_1      25    48 QNYWEHPYQNSDVYRPINEHREHP
8 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_2      33    56 QNSDVYRPINEHREHPKEYEYPLH
9 sample3 MRKLYCVLLLSAFEFTYMINFGRGQNYWEHPYQNSDVYRPINEHREHPKEYEYPLHQEHTYQQE peptide_3      41    64 INEHREHPKEYEYPLHQEHTYQQE