比较R中两个数据集的散点图
我想用散点图比较两个相同长度的数据集,然后根据指定p值的基因列表给差异表达的基因上色。更具体地说,它如下所示:比较R中两个数据集的散点图,r,dataset,data-visualization,scatter-plot,R,Dataset,Data Visualization,Scatter Plot,我想用散点图比较两个相同长度的数据集,然后根据指定p值的基因列表给差异表达的基因上色。更具体地说,它如下所示: 将x轴替换为数据A,y轴替换为数据B和有效p值根据原始图中的轴范围为[0,1]这一事实,我认为您希望绘制两种条件和两组测试下p值的散点图 以下是打印本身的操作方式: # Generate an artificial dataset library(MASS) set.seed(1) # Suitably chosen covariance matrix covariancematr
将x轴替换为数据A,y轴替换为数据B和有效p值根据原始图中的轴范围为[0,1]这一事实,我认为您希望绘制两种条件和两组测试下p值的散点图 以下是打印本身的操作方式:
# Generate an artificial dataset
library(MASS)
set.seed(1)
# Suitably chosen covariance matrix
covariancematrix <- matrix(c(0.02,0.019,0.019,0.02), nrow=2)
#> print(cov2cor(covariancematrix))
# [,1] [,2]
#[1,] 1.00 0.95
#[2,] 0.95 1.00
# Randomize 20,000 observations and constraint them to a p-value like range
pvalues <- mvrnorm(20000, mu=c(0.5,0.5), Sigma=covariancematrix)
colnames(pvalues) <- c("p value condition 1", "p value condition 2")
rownames(pvalues) <- paste("Probe/gene id", 1:20000)
# p-values should be within range [0,1]
pvalues[pvalues<0] <- 0
pvalues[pvalues>1] <- 1
#> str(pvalues)
# num [1:20000, 1:2] 0.461 0.54 0.52 0.518 0.61 ...
# - attr(*, "dimnames")=List of 2
# ..$ : chr [1:20000] "Probe/gene id 1" "Probe/gene id 2" "Probe/gene id 3" "Probe/gene id 4" ...
# ..$ : chr [1:2] "p value condition 1" "p value condition 2"
#> head(pvalues)
# p value condition 1 p value condition 2
#Probe/gene id 1 0.4614707 0.4767356
#Probe/gene id 2 0.5398754 0.5583752
#Probe/gene id 3 0.5196980 0.5251556
#Probe/gene id 4 0.5182167 0.4471374
#Probe/gene id 5 0.6097540 0.5745387
#Probe/gene id 6 0.4652409 0.3940416
# The plotting call itself
plot( # If this is a 2-column matrix, then first column pvalues[,1] will be by default the x-axis and second column pvalue[,2] the y-axis
# Can be a matrix with 2 columns or a data.frame with 2 columns
x = pvalues,
# Analogous if you have two separated vectors instead of two columns, change to something like:
# x = pvalues[,1], # x-axis observation values
# y = pvalues[,2], # y-axis observation values
# x- and y-axis ranges [0,1]
xlim=c(0,1),
ylim=c(0,1),
# Select filled dots as the symbols
pch=16,
# Conditional color vector based on where the observation is located
col=apply(pvalues, MARGIN=1, FUN=function(x) { ifelse(1*x[1] +0.05 > x[2] & 1*x[1] -0.05 < x[2],
# Color for dots inside the area (semi-transparent black)
"#00000075",
# Color for dots outside the area (semi-transparent blue)
"#0000FF75")
} ),
# Axis labels
xlab="p values in differences condition 1",
ylab="p values in differences condition 2"
)
# Draw the lines, formula:
# y = 1*x + 0.05
abline(a=0.05, b=1, col="red", lwd=2)
# y = 1*x - 0.05
abline(a=-0.05, b=1, col="red", lwd=2)
#生成一个人工数据集
图书馆(弥撒)
种子(1)
#适当选择协方差矩阵
协方差矩阵打印(cov2cor(协方差矩阵))
# [,1] [,2]
#[1,] 1.00 0.95
#[2,] 0.95 1.00
#随机化20000个观察值,并将其限制在类似p值的范围内
p值从原始图中的轴范围为[0,1]这一事实推断,我认为您希望在两种条件和两组测试下绘制p值的散点图
以下是打印本身的操作方式:
# Generate an artificial dataset
library(MASS)
set.seed(1)
# Suitably chosen covariance matrix
covariancematrix <- matrix(c(0.02,0.019,0.019,0.02), nrow=2)
#> print(cov2cor(covariancematrix))
# [,1] [,2]
#[1,] 1.00 0.95
#[2,] 0.95 1.00
# Randomize 20,000 observations and constraint them to a p-value like range
pvalues <- mvrnorm(20000, mu=c(0.5,0.5), Sigma=covariancematrix)
colnames(pvalues) <- c("p value condition 1", "p value condition 2")
rownames(pvalues) <- paste("Probe/gene id", 1:20000)
# p-values should be within range [0,1]
pvalues[pvalues<0] <- 0
pvalues[pvalues>1] <- 1
#> str(pvalues)
# num [1:20000, 1:2] 0.461 0.54 0.52 0.518 0.61 ...
# - attr(*, "dimnames")=List of 2
# ..$ : chr [1:20000] "Probe/gene id 1" "Probe/gene id 2" "Probe/gene id 3" "Probe/gene id 4" ...
# ..$ : chr [1:2] "p value condition 1" "p value condition 2"
#> head(pvalues)
# p value condition 1 p value condition 2
#Probe/gene id 1 0.4614707 0.4767356
#Probe/gene id 2 0.5398754 0.5583752
#Probe/gene id 3 0.5196980 0.5251556
#Probe/gene id 4 0.5182167 0.4471374
#Probe/gene id 5 0.6097540 0.5745387
#Probe/gene id 6 0.4652409 0.3940416
# The plotting call itself
plot( # If this is a 2-column matrix, then first column pvalues[,1] will be by default the x-axis and second column pvalue[,2] the y-axis
# Can be a matrix with 2 columns or a data.frame with 2 columns
x = pvalues,
# Analogous if you have two separated vectors instead of two columns, change to something like:
# x = pvalues[,1], # x-axis observation values
# y = pvalues[,2], # y-axis observation values
# x- and y-axis ranges [0,1]
xlim=c(0,1),
ylim=c(0,1),
# Select filled dots as the symbols
pch=16,
# Conditional color vector based on where the observation is located
col=apply(pvalues, MARGIN=1, FUN=function(x) { ifelse(1*x[1] +0.05 > x[2] & 1*x[1] -0.05 < x[2],
# Color for dots inside the area (semi-transparent black)
"#00000075",
# Color for dots outside the area (semi-transparent blue)
"#0000FF75")
} ),
# Axis labels
xlab="p values in differences condition 1",
ylab="p values in differences condition 2"
)
# Draw the lines, formula:
# y = 1*x + 0.05
abline(a=0.05, b=1, col="red", lwd=2)
# y = 1*x - 0.05
abline(a=-0.05, b=1, col="red", lwd=2)
#生成一个人工数据集
图书馆(弥撒)
种子(1)
#适当选择协方差矩阵
协方差矩阵打印(cov2cor(协方差矩阵))
# [,1] [,2]
#[1,] 1.00 0.95
#[2,] 0.95 1.00
#随机化20000个观察值,并将其限制在类似p值的范围内
pvalues您尝试过什么?请给我们一个例子。@sgibb:我试过使用plot(x~y);abline(lm(x~y)),但这也是不可复制的。@sgibb:如果你是指数据的dput,它相当大,不想把它挤到这里。我尝试使用的代码与上面的代码类似,但我只想知道它是如何完成的。p值的黑色/蓝色看起来不错。你在坚持什么?另外,在生成随机数时,请尝试使用set.seed(1)
(或任何其他数字),以使结果完全可重复。您尝试过什么?请给我们一个例子。@sgibb:我试过使用plot(x~y);abline(lm(x~y)),但这也是不可复制的。@sgibb:如果你是指数据的dput,它相当大,不想把它挤到这里。我尝试使用的代码与上面的代码类似,但我只想知道它是如何完成的。p值的黑色/蓝色看起来不错。你在坚持什么?另外,在生成随机数时,请尝试使用set.seed(1)
(或任何其他数字),以使结果完全可重复。
# Generate an artificial dataset
library(MASS)
set.seed(1)
# Suitably chosen covariance matrix
covariancematrix <- matrix(c(0.02,0.019,0.019,0.02), nrow=2)
#> print(cov2cor(covariancematrix))
# [,1] [,2]
#[1,] 1.00 0.95
#[2,] 0.95 1.00
# Randomize 20,000 observations and constraint them to a p-value like range
pvalues <- mvrnorm(20000, mu=c(0.5,0.5), Sigma=covariancematrix)
colnames(pvalues) <- c("p value condition 1", "p value condition 2")
rownames(pvalues) <- paste("Probe/gene id", 1:20000)
# p-values should be within range [0,1]
pvalues[pvalues<0] <- 0
pvalues[pvalues>1] <- 1
#> str(pvalues)
# num [1:20000, 1:2] 0.461 0.54 0.52 0.518 0.61 ...
# - attr(*, "dimnames")=List of 2
# ..$ : chr [1:20000] "Probe/gene id 1" "Probe/gene id 2" "Probe/gene id 3" "Probe/gene id 4" ...
# ..$ : chr [1:2] "p value condition 1" "p value condition 2"
#> head(pvalues)
# p value condition 1 p value condition 2
#Probe/gene id 1 0.4614707 0.4767356
#Probe/gene id 2 0.5398754 0.5583752
#Probe/gene id 3 0.5196980 0.5251556
#Probe/gene id 4 0.5182167 0.4471374
#Probe/gene id 5 0.6097540 0.5745387
#Probe/gene id 6 0.4652409 0.3940416
# The plotting call itself
plot( # If this is a 2-column matrix, then first column pvalues[,1] will be by default the x-axis and second column pvalue[,2] the y-axis
# Can be a matrix with 2 columns or a data.frame with 2 columns
x = pvalues,
# Analogous if you have two separated vectors instead of two columns, change to something like:
# x = pvalues[,1], # x-axis observation values
# y = pvalues[,2], # y-axis observation values
# x- and y-axis ranges [0,1]
xlim=c(0,1),
ylim=c(0,1),
# Select filled dots as the symbols
pch=16,
# Conditional color vector based on where the observation is located
col=apply(pvalues, MARGIN=1, FUN=function(x) { ifelse(1*x[1] +0.05 > x[2] & 1*x[1] -0.05 < x[2],
# Color for dots inside the area (semi-transparent black)
"#00000075",
# Color for dots outside the area (semi-transparent blue)
"#0000FF75")
} ),
# Axis labels
xlab="p values in differences condition 1",
ylab="p values in differences condition 2"
)
# Draw the lines, formula:
# y = 1*x + 0.05
abline(a=0.05, b=1, col="red", lwd=2)
# y = 1*x - 0.05
abline(a=-0.05, b=1, col="red", lwd=2)