将镶嵌面网格(ggplot2)与denscomp(FitDistripPlus)相结合

将镶嵌面网格(ggplot2)与denscomp(FitDistripPlus)相结合,r,ggplot2,fitdistrplus,R,Ggplot2,Fitdistrplus,首先,我是个新手。我正在尝试将密度图应用于数据中的不同组。使用FitDistripPlus,我为所有数据创建了一个分布密度图 plot(my_data, pch=20) plotdist(my_data$Capture_Rate, histo = TRUE, demp = TRUE) fit_w <- fitdist(my_data$Capture_Rate, "weibull") fit_g <- fitdist(my_data$Capture_Rate, "gamma") f

首先,我是个新手。我正在尝试将密度图应用于数据中的不同组。使用FitDistripPlus,我为所有数据创建了一个分布密度图

plot(my_data, pch=20)

plotdist(my_data$Capture_Rate, histo = TRUE, demp = TRUE)

fit_w <- fitdist(my_data$Capture_Rate, "weibull")
fit_g <- fitdist(my_data$Capture_Rate, "gamma")
fit_ln <- fitdist(my_data$Capture_Rate, "lnorm")

par(mfrow=c(2,2))
plot.legend <- c("Weibull", "lognormal", "gamma")
denscomp(list(fit_w, fit_ln, fit_g), legendtext = plot.legend)
绘图(my_数据,pch=20)
plotdist(我的数据$Capture\u速率,histo=TRUE,demp=TRUE)

拟合w因此,经验密度的解将比理论分布的解稍微容易一些。首先,让我们设置一些虚拟数据,因为我们没有您的数据可供使用

set.seed(123)
# Setup some facets
idx <- expand.grid(c("A", "B"), c("C", "D"))

# For each facet, generate some numbers
df <- apply(idx, 1, function(x){
  data.frame(row = x[[1]],
             col = x[[2]],
             # chose 10 as mean, since Weibull can't be negative
             x = rnorm(100, 10))
})
df <- do.call(rbind, df)
看起来是这样的:

因为我们没有任何理论密度的ggplot stat函数-至少不是面板特定的函数-我们必须在单独的数据框中预先计算理论分布的xy坐标。如下所示:

# Loop over facets
dists <- apply(idx, 1, function(i){
  # Grab data belonging to facet
  dat <- df$x[df$row == i[[1]] & df$col == i[[2]]]

  # Setup x-values
  xseq <- seq(min(dat), max(dat), length.out = 100)

  # Specify distributions of interest
  dists <- c("weibull", "lnorm", "gamma")

  # Loop over distributions
  fits <- lapply(setNames(dists, dists), function(dist) {

    # Estimate parameters
    ests <- fitdist(dat, dist)$estimate

    # Get y-values
    y <- do.call(paste0("d", dist), c(list(x = xseq), as.list(ests)))

    # Multiplied by length(dat) to match absolute counts
    y * length(dat)
  })

  # Format everything neatly in a data.frame
  out <- lapply(dists, function(j) {
    data.frame(row = i[[1]],
               col = i[[2]],
               x = xseq,
               y = fits[[j]],
               distr = j)
  })

  # Combine all distributions
  do.call(rbind, out)
})

# Combine all facets
dists <- do.call(rbind, dists)

根据您自己的数据进行必要的调整。祝你好运

编辑:现在使用示例数据

假设
df
是发布
dput()输出的data.frame。我已经包含了一个条件,检查刻面数据的长度是否大于2,方差是否为非零,以便跳过我们无论如何都无法进行任何估计的数据。此外,我还将变量名转换为与data.frame中的命名方式兼容

idx <- expand.grid(levels(df$Season), levels(df$sYear))

# Loop over facets
dists <- apply(idx, 1, function(i){
  dat <- df$Capture_Rate[df$Season == i[[1]] & df$sYear == i[[2]]]
  print(length(dat))
  if (length(dat) < 2 | var(dat) == 0) {
    return(NULL)
  }
  xseq <- seq(min(dat), max(dat), length.out = 100)
  dists <- c("weibull", "lnorm", "gamma")
  fits <- lapply(setNames(dists, dists), function(dist) {
    ests <- fitdist(dat, dist)$estimate
    y <- do.call(paste0("d", dist), c(list(x = xseq), as.list(ests)))
    y * length(dat)
  })
  out <- lapply(dists, function(j) {
    data.frame(Season = i[[1]],
               sYear = i[[2]],
               x = xseq,
               y = fits[[j]],
               distr = j)
  })
  do.call(rbind, out)
})
dists <- do.call(rbind, dists)

ggplot(df, aes(x=Capture_Rate, fill=sYear))+
  geom_histogram(binwidth = .025,
                 alpha = .5,
                 position = "identity") +
  geom_line(data = dists, aes(x, y * .025, colour = distr), inherit.aes = FALSE) +
  facet_grid(Season ~ sYear)

因此,经验密度的解要比理论分布的解稍微容易一些。首先,让我们设置一些虚拟数据,因为我们没有您的数据可供使用

set.seed(123)
# Setup some facets
idx <- expand.grid(c("A", "B"), c("C", "D"))

# For each facet, generate some numbers
df <- apply(idx, 1, function(x){
  data.frame(row = x[[1]],
             col = x[[2]],
             # chose 10 as mean, since Weibull can't be negative
             x = rnorm(100, 10))
})
df <- do.call(rbind, df)
看起来是这样的:

因为我们没有任何理论密度的ggplot stat函数-至少不是面板特定的函数-我们必须在单独的数据框中预先计算理论分布的xy坐标。如下所示:

# Loop over facets
dists <- apply(idx, 1, function(i){
  # Grab data belonging to facet
  dat <- df$x[df$row == i[[1]] & df$col == i[[2]]]

  # Setup x-values
  xseq <- seq(min(dat), max(dat), length.out = 100)

  # Specify distributions of interest
  dists <- c("weibull", "lnorm", "gamma")

  # Loop over distributions
  fits <- lapply(setNames(dists, dists), function(dist) {

    # Estimate parameters
    ests <- fitdist(dat, dist)$estimate

    # Get y-values
    y <- do.call(paste0("d", dist), c(list(x = xseq), as.list(ests)))

    # Multiplied by length(dat) to match absolute counts
    y * length(dat)
  })

  # Format everything neatly in a data.frame
  out <- lapply(dists, function(j) {
    data.frame(row = i[[1]],
               col = i[[2]],
               x = xseq,
               y = fits[[j]],
               distr = j)
  })

  # Combine all distributions
  do.call(rbind, out)
})

# Combine all facets
dists <- do.call(rbind, dists)

根据您自己的数据进行必要的调整。祝你好运

编辑:现在使用示例数据

假设
df
是发布
dput()输出的data.frame。我已经包含了一个条件,检查刻面数据的长度是否大于2,方差是否为非零,以便跳过我们无论如何都无法进行任何估计的数据。此外,我还将变量名转换为与data.frame中的命名方式兼容

idx <- expand.grid(levels(df$Season), levels(df$sYear))

# Loop over facets
dists <- apply(idx, 1, function(i){
  dat <- df$Capture_Rate[df$Season == i[[1]] & df$sYear == i[[2]]]
  print(length(dat))
  if (length(dat) < 2 | var(dat) == 0) {
    return(NULL)
  }
  xseq <- seq(min(dat), max(dat), length.out = 100)
  dists <- c("weibull", "lnorm", "gamma")
  fits <- lapply(setNames(dists, dists), function(dist) {
    ests <- fitdist(dat, dist)$estimate
    y <- do.call(paste0("d", dist), c(list(x = xseq), as.list(ests)))
    y * length(dat)
  })
  out <- lapply(dists, function(j) {
    data.frame(Season = i[[1]],
               sYear = i[[2]],
               x = xseq,
               y = fits[[j]],
               distr = j)
  })
  do.call(rbind, out)
})
dists <- do.call(rbind, dists)

ggplot(df, aes(x=Capture_Rate, fill=sYear))+
  geom_histogram(binwidth = .025,
                 alpha = .5,
                 position = "identity") +
  geom_line(data = dists, aes(x, y * .025, colour = distr), inherit.aes = FALSE) +
  facet_grid(Season ~ sYear)

idx你需要三个理论密度的密度,还是经验密度足够?最好三个你需要三个理论密度的密度,还是经验密度足够?最好三个谢谢你的详细解释。我将致力于将我的数据实现到此代码中,并让您知道结果如何。我为我的新手身份道歉,但我似乎无法使我的数据与您的代码相匹配。我肯定我错过了一些简单的东西。我编辑了我的问题以包含一些样本数据。你介意帮我把数据映射到你的代码吗?谢谢。您介意在打印数据框时用
dput(您的数据框)
而不是输出来发布数据吗?我不能轻易地把它转换成我可以在R中使用的东西。谢谢你的详细解释。我将致力于将我的数据实现到此代码中,并让您知道结果如何。我为我的新手身份道歉,但我似乎无法使我的数据与您的代码相匹配。我肯定我错过了一些简单的东西。我编辑了我的问题以包含一些样本数据。你介意帮我把数据映射到你的代码吗?谢谢。您介意在打印数据框时用
dput(您的数据框)
而不是输出来发布数据吗?我不能轻易地把它转换成我可以在R中使用的东西。
idx <- expand.grid(levels(df$Season), levels(df$sYear))

# Loop over facets
dists <- apply(idx, 1, function(i){
  dat <- df$Capture_Rate[df$Season == i[[1]] & df$sYear == i[[2]]]
  print(length(dat))
  if (length(dat) < 2 | var(dat) == 0) {
    return(NULL)
  }
  xseq <- seq(min(dat), max(dat), length.out = 100)
  dists <- c("weibull", "lnorm", "gamma")
  fits <- lapply(setNames(dists, dists), function(dist) {
    ests <- fitdist(dat, dist)$estimate
    y <- do.call(paste0("d", dist), c(list(x = xseq), as.list(ests)))
    y * length(dat)
  })
  out <- lapply(dists, function(j) {
    data.frame(Season = i[[1]],
               sYear = i[[2]],
               x = xseq,
               y = fits[[j]],
               distr = j)
  })
  do.call(rbind, out)
})
dists <- do.call(rbind, dists)

ggplot(df, aes(x=Capture_Rate, fill=sYear))+
  geom_histogram(binwidth = .025,
                 alpha = .5,
                 position = "identity") +
  geom_line(data = dists, aes(x, y * .025, colour = distr), inherit.aes = FALSE) +
  facet_grid(Season ~ sYear)