Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/81.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 查看函数引导到引导估计使用的值_R_Statistics Bootstrap - Fatal编程技术网

R 查看函数引导到引导估计使用的值

R 查看函数引导到引导估计使用的值,r,statistics-bootstrap,R,Statistics Bootstrap,我编写了下面的代码,以获得平均值的自举估计值。我的目标是通过boot软件包中的boot功能查看从数据集中选择的数字,最好是按照选择的顺序 数据集只包含三个数字:1、10和100,我只使用两个引导样本 估计的平均值为23.5,下面的代码表明六个数字包括一个“1”、四个“10”和一个“100”。然而,这些数字有30种可能的组合,其平均值为23.5 我有没有办法确定这30种可能的组合中哪一种是两个引导样本中实际出现的组合 library(boot) set.seed(1234) dat <-

我编写了下面的代码,以获得平均值的自举估计值。我的目标是通过
boot
软件包中的
boot
功能查看从数据集中选择的数字,最好是按照选择的顺序

数据集只包含三个数字:1、10和100,我只使用两个引导样本

估计的平均值为23.5,下面的代码表明六个数字包括一个“1”、四个“10”和一个“100”。然而,这些数字有30种可能的组合,其平均值为23.5

我有没有办法确定这30种可能的组合中哪一种是两个引导样本中实际出现的组合

library(boot)

set.seed(1234)

dat <- c(1, 10, 100)
av  <- function(dat, i) { sum(dat[i])/length(dat[i]) }
av.boot <- boot(dat, av, R = 2)
av.boot
#
# ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
# Call:
# boot(data = dat, statistic = av, R = 2)
#
#
# Bootstrap Statistics :
#     original  bias    std. error
# t1*       37   -13.5    19.09188
#

mean(dat) + -13.5 
# [1] 23.5

# The two samples must have contained one '1', four '10' and one '100',
# but there are 30 possibilities.
# Which of these 30 possible sequences actual occurred?

# This code shows there must have been one '1', four '10' and one '100'
# and shows the 30 possible combinations

my.combos <- expand.grid(V1  = c(1, 10, 100),
                         V2  = c(1, 10, 100),
                         V3  = c(1, 10, 100),
                         V4  = c(1, 10, 100),
                         V5  = c(1, 10, 100),
                         V6  = c(1, 10, 100))

my.means <- apply(my.combos, 1, function(x) {( (x[1] + x[2] + x[3])/3 + (x[4] + x[5] + x[6])/3 ) / 2 })

possible.samples <- my.combos[my.means == 23.5,]
dim(possible.samples)

n.1   <- rowSums(possible.samples == 1)
n.10  <- rowSums(possible.samples == 10)
n.100 <- rowSums(possible.samples == 100)

n.1[1]
n.10[1]
n.100[1]

length(unique(n.1))   == 1
length(unique(n.10))  == 1
length(unique(n.100)) == 1
库(启动)
种子集(1234)

dat我认为您可以使用下面的代码确定采样的数字和采样顺序。您必须从
boot
包中提取函数
normal.array
,然后将该函数粘贴到
R
代码中。然后指定
n
R
strata
的值,其中
n
是数据集中的观察数,
R
是所需的复制样本数

我不知道这种方法有多普遍,但我尝试了几个简单的例子,包括下面的例子

library(boot)

set.seed(1234)

dat <- c(1, 10, 100, 1000)
av  <- function(dat, i) { sum(dat[i])/length(dat[i]) }
av.boot <- boot(dat, av, R = 3)
av.boot
#
# ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
# Call:
# boot(data = dat, statistic = av, R = 3)
#
#
# Bootstrap Statistics :
#     original  bias    std. error
# t1*   277.75  -127.5    132.2405
# 
# 

mean(dat) + -127.5
# [1] 150.25

# boot:::ordinary.array

ordinary.array <- function (n, R, strata) 
{
    inds <- as.integer(names(table(strata)))
    if (length(inds) == 1L) {
        output <- sample.int(n, n * R, replace = TRUE)
        dim(output) <- c(R, n)
    }
    else {
        output <- matrix(as.integer(0L), R, n)
        for (is in inds) {
            gp <- seq_len(n)[strata == is]
            output[, gp] <- if (length(gp) == 1) 
                rep(gp, R)
            else bsample(gp, R * length(gp))
        }
    }
    output
}

# I think the function ordinary.array determines which elements 
# of the data are sampled in each of the R samples

set.seed(1234)
ordinary.array(n=4,R=3,1)

#      [,1] [,2] [,3] [,4]
# [1,]    1    3    1    3
# [2,]    3    4    1    3
# [3,]    3    3    3    3
#
# which equals:

((1+100+1+100) / 4  +  (100+1000+1+100) / 4  +  (100+100+100+100) / 4) / 3

# [1] 150.25
库(启动)
种子集(1234)
dat