Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/78.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 如何获得一辆车的mpg低于其他车的概率?_R_Probability_Normal Distribution - Fatal编程技术网

R 如何获得一辆车的mpg低于其他车的概率?

R 如何获得一辆车的mpg低于其他车的概率?,r,probability,normal-distribution,R,Probability,Normal Distribution,我用这个来得到汽车mpg的平均值和标准偏差 df1 <- mtcars; df1$rownames = rownames(df1) df2 <- mtcars; df2$rownames = rownames(df2) df2$mpg = df2$mpg + rnorm(nrow(df2),0,3) data = rbind(df1, df2) df1这里比较了dplyr/data.table返回概率的方法 library(dplyr) library(data.table) li

我用这个来得到汽车mpg的平均值和标准偏差

df1 <- mtcars; df1$rownames = rownames(df1)
df2 <- mtcars; df2$rownames = rownames(df2)
df2$mpg = df2$mpg + rnorm(nrow(df2),0,3)
data = rbind(df1, df2)

df1这里比较了
dplyr/data.table
返回概率的方法

library(dplyr)
library(data.table)
library(tidyr)
library(tibble)
# // input data
df <- mtcars[1] %>% 
         rownames_to_column("car")
库(dplyr)
库(数据表)
图书馆(tidyr)
图书馆(tibble)
#//输入数据
df%
行名称到列(“car”)
-测试

# // dplyr
system.time({
        out <- df %>%
                uncount(10000, .id = "run") %>%
                rowwise() %>%
                mutate(sim_mpg = rpois(1, lambda = mpg)) %>%

                group_by(run) %>%
                arrange(sim_mpg) %>%
                mutate(lowest_mpg = row_number() == 1) %>%

                group_by(car) %>%
                summarize(chance_lowest = mean(lowest_mpg),
                orig_mpg = first(mpg))

})
# user system elapsed
# 1.715 0.074 1.787


# // data.table
system.time({

     df_expand <- setDT(df)[rep(seq_len(.N), 10000)][, run := rowid(car)]

     out2 <- df_expand[, sim_mpg := rpois(1, lambda = mpg), 1:nrow(df_expand)
        ][order(sim_mpg), lowest_mpg := seq_len(.N) == 1 ,run
        ][, .(chance_lowest = mean(lowest_mpg), orig_mpg = first(mpg)), .(car)]
})
# user system elapsed
# 0.704 0.050 0.757
sum(out$chance_lowest)
#[1] 1
#//dplyr
系统时间({
超出%
取消计数(10000,.id=“run”)%>%
行()
变异(sim_mpg=RPOI(1,lambda=mpg))%>%
分组依据(运行)%>%
排列(模拟mpg)%>%
变异(最低值=行数()==1)%>%
组别(汽车)%>%
总结(机会最低=平均值(最低平均值),
初始值=第一个(mpg))
})
#用户系统运行时间
# 1.715 0.074 1.787
#//data.table
系统时间({
df_扩展
library(dplyr)
library(data.table)
library(tidyr)
library(tibble)
# // input data
df <- mtcars[1] %>% 
         rownames_to_column("car")
# // dplyr
system.time({
        out <- df %>%
                uncount(10000, .id = "run") %>%
                rowwise() %>%
                mutate(sim_mpg = rpois(1, lambda = mpg)) %>%

                group_by(run) %>%
                arrange(sim_mpg) %>%
                mutate(lowest_mpg = row_number() == 1) %>%

                group_by(car) %>%
                summarize(chance_lowest = mean(lowest_mpg),
                orig_mpg = first(mpg))

})
# user system elapsed
# 1.715 0.074 1.787


# // data.table
system.time({

     df_expand <- setDT(df)[rep(seq_len(.N), 10000)][, run := rowid(car)]

     out2 <- df_expand[, sim_mpg := rpois(1, lambda = mpg), 1:nrow(df_expand)
        ][order(sim_mpg), lowest_mpg := seq_len(.N) == 1 ,run
        ][, .(chance_lowest = mean(lowest_mpg), orig_mpg = first(mpg)), .(car)]
})
# user system elapsed
# 0.704 0.050 0.757
sum(out$chance_lowest)
#[1] 1