R：基于模拟结果填充数据帧的列_R_Dataframe

R：基于模拟结果填充数据帧的列

r dataframe

R：基于模拟结果填充数据帧的列,r,dataframe,R,Dataframe,问题继续：我想创建一个数据框架，其中包含掷两个骰子的可能结果。其要点是单独运行模拟，并用结果的数量填充数据框。我编写了以下代码来创建数据帧： # Create variables in data frame dice1 <- sort(rep(1:6,6)) dice2 <- rep(1:6,6) dicesum <- dice1 + dice2 # Assign variables to data frame df <- data.frame(dice1, dic

问题继续：

我想创建一个数据框架，其中包含掷两个骰子的可能结果。其要点是单独运行模拟，并用结果的数量填充数据框。我编写了以下代码来创建数据帧：

# Create variables in data frame
dice1 <- sort(rep(1:6,6))
dice2 <- rep(1:6,6)
dicesum <- dice1 + dice2

# Assign variables to data frame
df <- data.frame(dice1, dice2, dicesum)

# Remove duplicates
inx <- duplicated(t(apply(df, 1, sort)))
df <- df[!inx, ]
rownames(df) <- 1:nrow(df)

# initiate a column that holds the simulation outcome count
df["count"] <- numeric(nrow(df))

> str(df)
'data.frame':   21 obs. of  4 variables:
 $ dice1  : int  1 1 1 1 1 1 2 2 2 2 ...
 $ dice2  : int  1 2 3 4 5 6 2 3 4 5 ...
 $ dicesum: int  2 3 4 5 6 7 4 5 6 7 ...
 $ count  : num  0 0 0 0 0 0 0 0 0 0 ...

> head(df)
  dice1 dice2 dicesum count
1     1     1       2     0
2     1     2       3     0
3     1     3       4     0
4     1     4       5     0
5     1     5       6     0
6     1     6       7     0


# Simulate dice rolls
sim_dice1 <- sample(1:6, 100, replace = T)
sim_dice2 <- sample(1:6, 100, replace = T)

# Data frame with simulations
rolls <- data.frame(sim_dice1, sim_dice2)

> str(rolls)
'data.frame':   100 obs. of  2 variables:
 $ sim_dice1: int  2 1 5 2 4 2 1 4 6 1 ...
 $ sim_dice2: int  6 5 4 1 4 5 4 5 6 2 ...

> head(rolls)
  sim_dice1 sim_dice2
1         2         6
2         1         5
3         5         4
4         2         1
5         4         4
6         2         5

用模拟结果填充df中计数列的最佳方法是什么？请注意，模拟数据帧是重复的结果-我考虑1，6和6，1个重复的结果。

< P>这就是你正在寻找的：

> # reduce to 10 simulation for illustration
> set.seed(17699398)
> sim_dice1 <- sample(1:6, 10, replace = T)
> sim_dice2 <- sample(1:6, 10, replace = T)
> 
> sim_sum <- sim_dice1 + sim_dice2
> 
> # print for illustration
> cbind(sim_dice1, sim_dice2, sim_sum)
      sim_dice1 sim_dice2 sim_sum
 [1,]         6         5      11
 [2,]         3         1       4
 [3,]         3         2       5
 [4,]         6         5      11
 [5,]         3         6       9
 [6,]         3         2       5
 [7,]         1         5       6
 [8,]         1         2       3
 [9,]         2         4       6
[10,]         2         2       4
> 
> # make table
> sim_outcome <- table(sim_sum)
> sim_outcome
sim_sum
 3  4  5  6  9 11 
 1  2  2  2  1  2 
> 
> 
> # use that df and returned object from table function is sorted
> df$count[match(as.integer(names(sim_outcome)), df$dicesum)] <- sim_outcome
> 
> df
   dice1 dice2 dicesum count
1      1     1       2     0
2      1     2       3     1
3      1     3       4     2
4      1     4       5     2
5      1     5       6     2
6      1     6       7     0
7      2     2       4     0
8      2     3       5     0
9      2     4       6     0
10     2     5       7     0
11     2     6       8     0
12     3     3       6     0
13     3     4       7     0
14     3     5       8     0
15     3     6       9     1
16     4     4       8     0
17     4     5       9     0
18     4     6      10     0
19     5     5      10     0
20     5     6      11     2
21     6     6      12     0

我们可以使用dplyr包来完成这项任务

library(dplyr)

# Create and count the number of each Group
rolls2 <- rolls %>%
  rowwise() %>%
  mutate(Group = toString(sort(c(sim_dice1, sim_dice2)))) %>%
  ungroup() %>%
  count(Group)

# Create the Group name
df2 <- df %>%
  rowwise() %>%
  mutate(Group = toString(sort(c(dice1, dice2))))

# Perform merge between df2 and rolls2
df3 <- df2 %>%
  left_join(rolls2, by = "Group") %>%
  select(-Group) %>%
  rename(count = n) %>%
  replace(is.na(.), 0)

df3
Source: local data frame [21 x 4]
Groups: <by row>

# A tibble: 21 x 4
   dice1 dice2 dicesum count
   <int> <int>   <int> <dbl>
 1     1     1       2     0
 2     1     2       3     5
 3     1     3       4     5
 4     1     4       5     8
 5     1     5       6     4
 6     1     6       7     5
 7     2     2       4     2
 8     2     3       5     8
 9     2     4       6     7
10     2     5       7     7
# ... with 11 more rows

资料

我会用骰子一、骰子二创建一个数据帧，使用循环或应用函数运行转鼓，每次都返回结果。然后，当所有操作完成后，按dice1、dice2%>%SummaryCount=n、dicesum=sumdice1、dice2对您进行分组，您将拥有一个包含您所需内容的表。3、6、6、3、4、5和5、4都具有总和9。但我觉得OP希望3、6和6、3成为一组，而4、5和5、4则是另一组。我不确定，但也不确定。他确实指出，模拟数据帧是重复的结果-我认为1，6和6，1是一个重复的结果。因此，我认为1,6和6,1应该是相同的。

# Create variables in data frame
dice1 <- sort(rep(1:6,6))
dice2 <- rep(1:6,6)
dicesum <- dice1 + dice2

# Assign variables to data frame
df <- data.frame(dice1, dice2, dicesum)

# Remove duplicates
inx <- duplicated(t(apply(df, 1, sort)))
df <- df[!inx, ]
rownames(df) <- 1:nrow(df)

# Set seed for the reproducibility
set.seed(123)

# Simulate dice rolls
sim_dice1 <- sample(1:6, 100, replace = T)
sim_dice2 <- sample(1:6, 100, replace = T)

# Data frame with simulations
rolls <- data.frame(sim_dice1, sim_dice2)