R 基于条件语句的矩阵元素条件更新_R

R 基于条件语句的矩阵元素条件更新

R 基于条件语句的矩阵元素条件更新,r,R,我确信我只是犯了一个简单的错误我有一个很大的矩阵3307592x9，我需要遍历它，如果第8列（字符/字符串）==9（字符/字符串）（不区分大小写），那么第3-7列（数字0-1）需要是1-self。我编写的代码是： for (i in 1:3307592){ if(grepl(chr2SnpFreqNorm[i,8], chr2SnpFreqNorm[i,9], ignore.case=TRUE)){ chr2SnpFreqNorm[i,3] <- 1 - chr2

我确信我只是犯了一个简单的错误

我有一个很大的矩阵3307592x9，我需要遍历它，如果第8列（字符/字符串）==9（字符/字符串）（不区分大小写），那么第3-7列（数字0-1）需要是1-self。我编写的代码是：

for (i in 1:3307592){
    if(grepl(chr2SnpFreqNorm[i,8], chr2SnpFreqNorm[i,9], ignore.case=TRUE)){
        chr2SnpFreqNorm[i,3] <- 1 - chr2SnpFreqNorm[i,3]
        chr2SnpFreqNorm[i,4] <- 1 - chr2SnpFreqNorm[i,4]
        chr2SnpFreqNorm[i,5] <- 1 - chr2SnpFreqNorm[i,5]
        chr2SnpFreqNorm[i,6] <- 1 - chr2SnpFreqNorm[i,6]
        chr2SnpFreqNorm[i,7] <- 1 - chr2SnpFreqNorm[i,7]
    }
}

for

不是您在

中的朋友，这里有一个使用

apply

和条件索引的解决方案

## create some toy data    
matrix( ncol=5, nrow = 100, c( runif(300), sample(c('A','G','C','T','a','c','g','t'), replace=T, 200))) -> data

flip_allele_freqs <- function(x) { 
## function will return 1-x on any x that looks like a number less than 1
    n = as.numeric(x)
    if ( is.na(n) ) { ## cant convert to numeric, must be str
        return(x)
    }
    if (n < 1) {
        return( 1 - n )
    } else {
        return x
    }
}

## apply the flip alleles function to the rows where the two last columns are equal
##fold the new data back into the old matrix

data[toupper(data[,5]) == toupper(data[,4]),] <- 
    apply(data[toupper(data[,5]) == toupper(data[,4]),], c(1,2), flip_allele_freqs)

##创建一些玩具数据
矩阵（ncol=5，nrow=100，c（runif（300），样本（c（'A'，'G'，'c'，'T'，'A'，'c'，'G'，'T'），替换=T，200））->数据
翻转等位基因频率在R
中不是你的朋友，这里有一个使用apply
和条件索引的解决方案
## create some toy data    
matrix( ncol=5, nrow = 100, c( runif(300), sample(c('A','G','C','T','a','c','g','t'), replace=T, 200))) -> data

flip_allele_freqs <- function(x) { 
## function will return 1-x on any x that looks like a number less than 1
    n = as.numeric(x)
    if ( is.na(n) ) { ## cant convert to numeric, must be str
        return(x)
    }
    if (n < 1) {
        return( 1 - n )
    } else {
        return x
    }
}

## apply the flip alleles function to the rows where the two last columns are equal
##fold the new data back into the old matrix

data[toupper(data[,5]) == toupper(data[,4]),] <- 
    apply(data[toupper(data[,5]) == toupper(data[,4]),], c(1,2), flip_allele_freqs)

##创建一些玩具数据
矩阵（ncol=5，nrow=100，c（runif（300），样本（c（'A'，'G'，'c'，'T'，'A'，'c'，'G'，'T'），替换=T，200））->数据
翻转等位基因频率首先查看数据：
DF <- structure(list(ID = c("rs187078949", "rs191522553", "rs149483862", 
"rs150919307", "rs186644623", "rs193294418", "rs185496709", "rs188771313", 
"rs192945962", "rs184397180"), pos = c(10133L, 10140L, 10286L, 
10297L, 10315L, 10345L, 10386L, 10419L, 10425L, 10431L), ceuChr2SnpFreq = c(0.070588235, 
0.005882353, 0.1, 0.147058824, 0, 0.017647059, 0.082352941, 0.229411765, 
0.1, 0.064705882), chsChr2SnpFreq = c(0, 0, 0.135, 0.07, 0, 0, 
0.02, 0.085, 0.02, 0.005), lwkChr2SnpFreq = c(0.030927835, 0.005154639, 
0.226804124, 0.113402062, 0, 0.036082474, 0.087628866, 0.056701031, 
0.015463918, 0.036082474), tsiChr2SnpFreq = c(0.035714286, 0.005102041, 
0.239795918, 0.168367347, 0, 0.030612245, 0.035714286, 0.147959184, 
0.091836735, 0.015306122), yriChr2SnpFreq = c(0.045454545, 0.005681818, 
0.170454545, 0.130681818, 0.005681818, 0.028409091, 0.113636364, 
0.090909091, 0.034090909, 0.045454545), ALT = c("A", "A", "A", 
"T", "G", "A", "T", "G", "G", "T"), AA = c("a", "C", "t", "t", 
"C", "G", "t", "G", "c", "a")), .Names = c("ID", "pos", "ceuChr2SnpFreq", 
"chsChr2SnpFreq", "lwkChr2SnpFreq", "tsiChr2SnpFreq", "yriChr2SnpFreq", 
"ALT", "AA"), row.names = c("1", "2", "3", "4", "5", "6", "7", 
"8", "9", "10"), class = "data.frame")

DF首先是您的数据：
DF <- structure(list(ID = c("rs187078949", "rs191522553", "rs149483862", 
"rs150919307", "rs186644623", "rs193294418", "rs185496709", "rs188771313", 
"rs192945962", "rs184397180"), pos = c(10133L, 10140L, 10286L, 
10297L, 10315L, 10345L, 10386L, 10419L, 10425L, 10431L), ceuChr2SnpFreq = c(0.070588235, 
0.005882353, 0.1, 0.147058824, 0, 0.017647059, 0.082352941, 0.229411765, 
0.1, 0.064705882), chsChr2SnpFreq = c(0, 0, 0.135, 0.07, 0, 0, 
0.02, 0.085, 0.02, 0.005), lwkChr2SnpFreq = c(0.030927835, 0.005154639, 
0.226804124, 0.113402062, 0, 0.036082474, 0.087628866, 0.056701031, 
0.015463918, 0.036082474), tsiChr2SnpFreq = c(0.035714286, 0.005102041, 
0.239795918, 0.168367347, 0, 0.030612245, 0.035714286, 0.147959184, 
0.091836735, 0.015306122), yriChr2SnpFreq = c(0.045454545, 0.005681818, 
0.170454545, 0.130681818, 0.005681818, 0.028409091, 0.113636364, 
0.090909091, 0.034090909, 0.045454545), ALT = c("A", "A", "A", 
"T", "G", "A", "T", "G", "G", "T"), AA = c("a", "C", "t", "t", 
"C", "G", "t", "G", "c", "a")), .Names = c("ID", "pos", "ceuChr2SnpFreq", 
"chsChr2SnpFreq", "lwkChr2SnpFreq", "tsiChr2SnpFreq", "yriChr2SnpFreq", 
"ALT", "AA"), row.names = c("1", "2", "3", "4", "5", "6", "7", 
"8", "9", "10"), class = "data.frame")

DF在base R中，您可以简单地执行
flip <- Vectorize(grepl)(chr2SnpFreqNorm[,8], chr2SnpFreqNorm[,9], ignore.case=TRUE)

chr2SnpFreqNorm[flip,3:7] <- 1 - chr2SnpFreqNorm[filp,3:7]

在base R中，您可以简单地执行以下操作
flip <- Vectorize(grepl)(chr2SnpFreqNorm[,8], chr2SnpFreqNorm[,9], ignore.case=TRUE)

chr2SnpFreqNorm[flip,3:7] <- 1 - chr2SnpFreqNorm[filp,3:7]

您的主要错误是使用for
循环而不是向量化操作。（我建议使用package data.table作为您的数据大小。）我也不清楚为什么要使用grepl
。tolower
和=
的组合应该足够了。如果您愿意，可以更容易地演示如何执行此操作。例如，XianceStor您的示例数据是无用的，因为其中从未满足条件。修复了示例数据我已修改了答案以使用它。您的主要错误是使用for
循环而不是矢量化操作。（我建议使用package data.table作为您的数据大小。）我也不清楚为什么要使用grepl
。tolower
和=
的组合应该足够了。如果你愿意的话，我会更容易告诉你怎么做。像XianceStor这样的东西你的样本数据是无用的，因为它从来没有满足过条件。修复了样本数据我修改了我的答案来使用它。我很确定是这样做的。非常感谢。如果您使用ifelse
而不是if
和else
，则无需使用apply
（这只会使速度变慢）。我很确定这是正确的。非常感谢。如果使用ifelse
而不是if
和else
，则无需使用apply
（这只会使速度变慢）。我必须查找：=只需读取and。我必须查找：=只需读取and。