在其他两列条件下的几列之间进行R-T试验
我试图通过5个变量(翅膀、翅膀、体重、喙和跗骨)计算df中每个物种的性别差异 我尝试了几十种代码(我是乞丐),但我不断地出错,或者代码没有给我想要的输出,等等 这差不多是我所得到的最接近的结果:在其他两列条件下的几列之间进行R-T试验,r,automation,tidyverse,R,Automation,Tidyverse,我试图通过5个变量(翅膀、翅膀、体重、喙和跗骨)计算df中每个物种的性别差异 我尝试了几十种代码(我是乞丐),但我不断地出错,或者代码没有给我想要的输出,等等 这差不多是我所得到的最接近的结果: library(tidyverse) library(broom) df %>% select(SPECIES, SEX, WING, WINGPRI, WEIGHT, BEAK, TARSUS) %>% gather(key = variable, value
library(tidyverse)
library(broom)
df %>%
select(SPECIES, SEX, WING, WINGPRI, WEIGHT, BEAK, TARSUS) %>%
gather(key = variable, value=value, -SEX, -SPECIES) %>%
group_by(SEX, SPECIES, variable) %>%
summarise(value = list(value)) %>%
na.rm=TRUE %>%
tibble::rowid_to_column() %>%
spread(SPECIES, value) %>%
group_by(variable) %>%
mutate(p_value = t.test(unlist("F"), unlist("M"))$p.value,
t_value = t.test(unlist("F"), unlist("M"))$statistic)
但在解决了许多错误后,我无法克服这一错误错误:is.data.frame(df)不正确
任何关于如何解决这段代码或一个完全不同的代码的想法都会让我非常感激
> dput(sample)
structure(list(RING = c("A264874", "A432586", "O92477", "B9124",
"C95571", "A395011", "C88213", "C58443", "A95422", "C58409"),
SPECIES = c("CARDUELIS CARDUELIS", "SYLVIA ATRICAPILLA",
"ESTRILDA ASTRILD", "ALCEDO ATTHIS", "CHLORIS CHLORIS", "FRINGILLA COELEBS",
"SYLVIA ATRICAPILLA", "CHLORIS CHLORIS", "SYLVIA ATRICAPILLA",
"PARUS MAJOR"), SEX = c("U", "M", "F", "F", "F", "F", "F",
"M", "F", "M"), AGE = c(2L, 3L, 3L, 3L, 4L, 2L, 4L, 4L, 6L,
3L), FAT = c(0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 1L, 0L), WEIGHT = c(148,
185, 85, 32, 225, 20, 245, 22, 197, 19), WING = c(775, 69,
45, 76, 82, 84, 77, 83, 69, 72), WINGPRI = c(58L, NA, 32L,
NA, NA, NA, 57L, 64L, 52L, 54L), BEAK = c(156, 132, 86, NA,
NA, 138, 125, 13, 145, 125), TARSUS = c(148, 199, 146, NA,
NA, 178, 18, 177, 207, 205), BROODPATCH = c(0L, NA, 0L, 0L,
0L, NA, 0L, 0L, 0L, 0L), MUSCLE = c(2L, 3L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L), PROGRAM = c("MAI", "MAI", "MAI", "MIGRA<c7><c3>O",
"PEEC", "MAI", "MAI", "PEEC", "MAI", "MIGRA<c7><c3>O")), .Names = c("RING",
"SPECIES", "SEX", "AGE", "FAT", "WEIGHT", "WING", "WINGPRI",
"BEAK", "TARSUS", "BROODPATCH", "MUSCLE", "PROGRAM"), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
>dput(示例)
结构(列表(环=c(“A264874”、“A432586”、“O92477”、“B9124”),
“C95571”、“A395011”、“C88213”、“C58443”、“A95422”、“C58409”),
物种=c(“卡杜埃利斯卡杜埃利斯”,“阿特里卡皮拉西尔维亚”,
“ESTRILDA ASTRILD”、“ALCEDO ATTHIS”、“CHLORIS Chlors”、“FRINGILLA COELEBS”,
“阿特里卡皮拉SYLVIA ATRICAPILLA”、“克罗斯克罗斯”、“阿特里卡皮拉SYLVIA”,
“帕鲁斯大调”),性别=c(“U”、“M”、“F”、“F”、“F”、“F”,
“M”,“F”,“M”),年龄=c(2L,3L,3L,4L,2L,4L,4L,4L,6L,
脂肪=c(0L,0L,0L,NA,0L,0L,0L,0L,1L,0L),重量=c(148,
185,85,32225,20245,22197,19),机翼=c(775,69,
45,76,82,84,77,83,69,72),WINGPRI=c(58L,NA,32L,
NA,NA,NA,57L,64L,52L,54L),喙=c(156,132,86,NA,
NA,138125,13145125),跗骨=c(148199146,NA,
NA,178,18,177,207,205),亲斑=c(0L,NA,0L,0L,
0L,NA,0L,0L,0L,0L,肌肉=c(2L,3L,2L,2L,2L,2L,
2L,1L,2L,2L),程序=c(“MAI”,“MAI”,“MAI”,“MIGRAO”,
“PEEC”、“MAI”、“MAI”、“PEEC”、“MAI”、“MIGRAO”),.Names=c(“戒指”,
“物种”、“性别”、“年龄”、“脂肪”、“体重”、“翅膀”、“翅膀”和“翅膀PRI”,
“喙”、“跗骨”、“窝斑”、“肌肉”、“程序”),row.names=c(NA,
-10L),类=c(“待定”、“待定”、“数据帧”))
以下是一个基于您发布的数据的示例
library(tidyverse)
library(broom)
df %>%
select(SPECIES, SEX, WING, WINGPRI, WEIGHT, BEAK, TARSUS) %>%
gather(key = variable, value=value, -SEX, -SPECIES) %>%
filter(SEX != "U") %>%
group_by(variable) %>%
summarise(tt = list(tidy(t.test(value ~ SEX)))) %>%
unnest()
# # A tibble: 5 x 11
# variable estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
# 1 BEAK 33.5 124. 90 0.822 0.483 2.47 -113. 180. Welch Two Sample t-test two.sided
# 2 TARSUS -56.4 137. 194. -1.33 0.270 3.25 -186. 73.2 Welch Two Sample t-test two.sided
# 3 WEIGHT 58.7 134 75.3 0.857 0.436 4.32 -126. 243. Welch Two Sample t-test two.sided
# 4 WING -2.5 72.2 74.7 -0.346 0.740 6.87 -19.6 14.6 Welch Two Sample t-test two.sided
# 5 WINGPRI -12 47 59 -1.31 0.281 2.99 -41.1 17.1 Welch Two Sample t-test two.sided
库(tidyverse)
图书馆(扫帚)
df%>%
选择(物种、性别、翅、翅、体重、喙、跗)%>%
聚集(键=变量,值=值,-性别,-物种)%>%
过滤器(性别!=“U”)%>%
分组依据(变量)%>%
总结(tt=list(整洁(t.test(value~SEX)))%>%
unnest()
##tibble:5 x 11
#变量估计估计1估计2统计p值参数形态低形态高方法备选方案
#
#1喙33.5 124。90 0.822 0.483 2.47 -113. 180韦尔奇双面双样本t检验
#2跗骨-56.4 137。194. -1.33 0.270 3.25 -186. 73.2韦尔奇双侧双样本t检验
#3重量58.7 134 75.3 0.857 0.436 4.32-126。243韦尔奇双面双样本t检验
#4 WING-2.5 72.2 74.7-0.346 0.740 6.87-19.6 14.6 Welch双侧双样本t检验
#5 WINGPRI-12 47 59-1.31 0.281 2.99-41.1 17.1 Welch双面双样本t检验
我也不能按物种
分组,因为我没有足够的观察数据来进行t.test
测试
请注意,t.test
自动忽略NA
值,但您可以添加..%>%在执行测试之前,在代码中省略()
请注意,tt=list(tidy(t.test(value~SEX))
将根据您的分组自动使用相应的子数据集。但是,如果您使用tt=list(tidy(t.test(value~SEX,data=))
它每次都会使用整个数据集。同胞们好
假设列表是一个数据帧,您试图在列表上运行代码。我不希望您首先尝试将df
转换为数据帧,然后它们运行代码。我建议你试试这个例子,看看它是怎么回事:
# Creates single vectors for test
RING = c("A264874", "A432586", "O92477", "B9124",
"C95571", "A395011", "C88213", "C58443", "A95422", "C58409")
SPECIES = c("CARDUELIS CARDUELIS", "SYLVIA ATRICAPILLA",
"ESTRILDA ASTRILD", "ALCEDO ATTHIS", "CHLORIS CHLORIS", "FRINGILLA COELEBS",
"SYLVIA ATRICAPILLA", "CHLORIS CHLORIS", "SYLVIA ATRICAPILLA",
"PARUS MAJOR")
SEX = c("U", "M", "F", "F", "F", "F", "F",
"M", "F", "M")
AGE = c(2L, 3L, 3L, 3L, 4L, 2L, 4L, 4L, 6L,3L)
FAT = c(0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 1L, 0L)
WEIGHT = c(148,185, 85, 32, 225, 20, 245, 22, 197, 19)
WING = c(775, 69, 45, 76, 82, 84, 77, 83, 69, 72)
#List them all
df <- list(WEIGHT,FAT,AGE,SEX,SPECIES,RING,WING)
# Create dataframe
data.frame <- do.call(cbind.data.frame, df)
colnames(data.frame) <- c("WEIGHT","FAT","AGE","SEX","SPECIES","RING","WING")
#为测试创建单个向量
环=c(“A264874”、“A432586”、“O92477”、“B9124”,
“C95571”、“A395011”、“C88213”、“C58443”、“A95422”、“C58409”)
物种=c(“卡杜埃利斯卡杜埃利斯”,“阿特里卡皮拉西尔维亚”,
“ESTRILDA ASTRILD”、“ALCEDO ATTHIS”、“CHLORIS Chlors”、“FRINGILLA COELEBS”,
“阿特里卡皮拉SYLVIA ATRICAPILLA”、“克罗斯克罗斯”、“阿特里卡皮拉SYLVIA”,
“大帕鲁斯”)
性别=c(“U”、“M”、“F”、“F”、“F”、“F”、“F”、“F”,
“M”、“F”、“M”)
年龄=c(2L、3L、3L、3L、4L、2L、4L、4L、6L、3L)
FAT=c(0L、0L、0L、NA、0L、0L、0L、0L、0L、1L、0L)
重量=c(148185,85,32225,20245,22197,19)
机翼=c(775,69,45,76,82,84,77,83,69,72)
#把它们都列出来
df使用另一个库:
library(matrixTests)
vars <- c("WING", "WINGPRI", "WEIGHT", "BEAK", "TARSUS")
col_t_welch(sample[sample$SEX=="F", vars], sample[sample$SEX=="M", vars])
obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y stderr df statistic pvalue conf.low conf.high alternative mean.null conf.level
WING 6 3 9 72.16667 74.66667 -2.50000 204.5667 54.33333 7.225341 6.873827 -0.3460044 0.7396874 -19.64900 14.64900 two.sided 0 0.95
WINGPRI 3 2 5 47.00000 59.00000 -12.00000 175.0000 50.00000 9.128709 2.985075 -1.3145341 0.2805428 -41.13398 17.13398 two.sided 0 0.95
WEIGHT 6 3 9 134.00000 75.33333 58.66667 10074.4000 9022.33333 68.458097 4.318219 0.8569719 0.4364231 -126.00493 243.33827 two.sided 0 0.95
BEAK 4 3 7 123.50000 90.00000 33.50000 693.6667 4459.00000 40.740029 2.471493 0.8222871 0.4826893 -113.34042 180.34042 two.sided 0 0.95
TARSUS 4 3 7 137.25000 193.66667 -56.41667 6940.9167 217.33333 42.516745 3.247234 -1.3269282 0.2701275 -186.07859 73.24525 two.sided 0 0.95
库(matrixTests)
vars考虑base
包,该包随R的每次安装一起提供,并在R的每次会话中加载,不需要base::library()
。具体来说,使用其by
(面向对象的tapply
包装器)按物种对数据帧进行子集,并使用sapply
构建t-stat和p-value矩阵
由于OP的数据没有足够的观察值用于t.test
,下面生成一个数据集
set.seed(10102018)
species_df <- data.frame(
SPECIES = sample(unique(df$SPECIES), 500, replace=TRUE),
SEX = sample(c("F", "M"), 500, replace=TRUE),
WING = runif(500) * 100,
WINGPRI = runif(500) * 100,
WEIGHT = runif(500) * 100,
BEAK = runif(500) * 100,
TARSUS = runif(500) * 100,
stringsAsFactors = FALSE
)
# NAMED LIST OF MATRICES
mat_list <- by(species_df, species_df$SPECIES, function(sub){
sapply(c("WING", "WINGPRI", "WEIGHT", "BEAK", "TARSUS"), function(col) {
tryCatch({
output <- t.test(sub[sub$SEX=="F", col], sub[sub$SEX=="M", col])
c(p_value = output$p.value, t_value = output$statistic)
}, warning = function(w) c(p_value=NA, t_value=NA)
, error = function(e) c(p_value=NA, t_value=NA))
})
})
为什么…%>%na.rm=TRUE%%>%…
?您希望这段代码做什么?为什么rowid\u to\u column()
之后在任何地方都不使用该信息?我将发布一个解决方案,希望能对您有所帮助:)这正是我想要的,尽管我在尝试将此代码应用于完整df(2600次观察)时遇到了一些错误在wrapup期间出错:没有足够的“x”观察值
或错误:无法使用矩阵或数组进行列索引
这是否意味着我的数据不够大,无法运行t检验?谢谢一些物种没有太多的观察结果,可能是因为这个原因代码没有运行?在sapply
调用中包装一个tryCatch
,为那些失败的t测试返回NAs。请参见编辑。非常感谢,我已经挣扎了这么久了!很高兴听到。高兴的
mat_list
# species_df$SPECIES: ALCEDO ATTHIS
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.7273165 0.8382798 0.3180979 0.6450270 0.3856571
# t_value.t 0.3501749 -0.2048995 -1.0055505 0.4629014 -0.8733496
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: CARDUELIS CARDUELIS
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.5200729 0.8520463 0.3370721 0.8189008 0.1212502
# t_value.t 0.6470729 -0.1873091 0.9678003 0.2299977 1.5716422
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: CHLORIS CHLORIS
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.1115453 0.5689228 0.94825726 0.5989776 0.9108546
# t_value.t -1.6129915 -0.5725928 0.06514506 -0.5284384 0.1124033
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: ESTRILDA ASTRILD
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.09291222 0.7700545 0.6859697 0.1958938 0.6452502
# t_value.t 1.70719717 0.2935269 0.4062293 1.3054498 0.4624954
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: FRINGILLA COELEBS
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.06157204 0.8636649 0.2183259 0.4757378 0.274626
# t_value.t 1.89924201 0.1723255 1.2416417 0.7170863 1.101813
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: PARUS MAJOR
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.96688923 0.5857059 0.1140328 0.5055508 0.5747242
# t_value.t 0.04168846 0.5481212 1.6046303 -0.6694396 0.5643418
# ----------------------------------------------------------------------------------------
# species_df$SPECIES: SYLVIA ATRICAPILLA
# WING WINGPRI WEIGHT BEAK TARSUS
# p_value 0.4350621 0.5446387 0.7073097 0.3911381 0.7631614
# t_value.t -0.7851506 0.6091449 0.3770283 0.8628441 -0.3024993