R 从一个子集数据帧计算频率、平均值、wavg的更智能的方法?

R 从一个子集数据帧计算频率、平均值、wavg的更智能的方法?,r,dplyr,R,Dplyr,我有一个数据框,我已经细分了 以下是《代码》一瞥(oc\U LV) 我想为变量的频率、平均值和加权平均值创建一个表() 我是这样做的 cuisine <- c("Chinese", "Thai", "Japanese") open_fr <- c(196, 54, 51) closed_fr <- c(86, 34, 38) open_avg <- c(196/(196+86), 54/(54+36), 51/(51+38)) closed_avg <- c

我有一个数据框,我已经细分了

以下是《代码》一瞥(oc\U LV)

我想为变量
的频率、平均值和加权平均值创建一个
表()

我是这样做的

    cuisine <- c("Chinese", "Thai", "Japanese")
open_fr <- c(196, 54, 51)
closed_fr <- c(86, 34, 38)
open_avg <- c(196/(196+86), 54/(54+36), 51/(51+38))
closed_avg <- c(86/(196+86), 90/(282+90+89), 38/(51+38))
open_wavg <- c(282/(282+90+89) * 196/(196+86), 90/(282+90+89) * 54/(54+36), 89/(282+90+89) * 51/(51+38)) 
closed_wavg <-c(282/(282+90+89) * 86/(196+86), 90/(282+90+89) * 36/(54+36), 89/(282+90+89) * 38/(51+38))


open_closed_LV <- data.frame(cuisine, open_fr, closed_fr, open_avg, closed_avg, open_wavg, closed_wavg) 
open_closed_LV$open_avg <-round(open_closed_LV$open_avg, digits = 2)
open_closed_LV$closed_avg <-round(open_closed_LV$closed_avg, digits = 2)
open_closed_LV$open_wavg <-round(open_closed_LV$open_wavg, digits = 2)
open_closed_LV$closed_wavg <-round(open_closed_LV$closed_wavg, digits = 2)

烹饪你的意思是这样的吗

库(dplyr)
图书馆(tidyr)
df%
价差(未结,n)%>%
`colnames%
变异(开放式平均=开放式fr/行和([,-c(1,2)]),
closed_avg=closed_fr/行和([,-c(1,2)]),
open_wavg=行和([,-c(1,2)])/和([,-c(1,2)])*open_avg,
closed_wavg=行和([,-c(1,2)])/和([,-c(1,2)])*closed_avg)
df
输出为:

# A tibble: 3 x 8
# Groups:   city [1]
       city  cuisine closed_fr open_fr open_avg closed_avg open_wavg closed_wavg
      <chr>    <chr>     <int>   <int>    <dbl>      <dbl>     <dbl>       <dbl>
1 Las Vegas  Chinese        86     196     0.70       0.30      0.43       0.187
2 Las Vegas Japanese        38      51     0.57       0.43      0.11       0.083
3 Las Vegas     Thai        34      54     0.61       0.39      0.12       0.074

你是说这样的事吗

库(dplyr)
图书馆(tidyr)
df%
价差(未结,n)%>%
`colnames%
变异(开放式平均=开放式fr/行和([,-c(1,2)]),
closed_avg=closed_fr/行和([,-c(1,2)]),
open_wavg=行和([,-c(1,2)])/和([,-c(1,2)])*open_avg,
closed_wavg=行和([,-c(1,2)])/和([,-c(1,2)])*closed_avg)
df
输出为:

# A tibble: 3 x 8
# Groups:   city [1]
       city  cuisine closed_fr open_fr open_avg closed_avg open_wavg closed_wavg
      <chr>    <chr>     <int>   <int>    <dbl>      <dbl>     <dbl>       <dbl>
1 Las Vegas  Chinese        86     196     0.70       0.30      0.43       0.187
2 Las Vegas Japanese        38      51     0.57       0.43      0.11       0.083
3 Las Vegas     Thai        34      54     0.61       0.39      0.12       0.074

我认为您想要的东西可能可以通过组合
dplyr
tidyr
来实现

差不多

with_cols <- df %>%
  group_by(city, categories, is_open) %>%
  summarise(n=sum(n)) %>%
  spread(is_open, n)

> with_cols
# A tibble: 3 x 4
# Groups:   city, categories [3]
       city categories   `0`   `1`
*     <chr>      <chr> <int> <int>
1 Las Vegas    Chinese    86   196
2 Las Vegas   Japanese    38    51
3 Las Vegas       Thai    34    54

我认为您想要的东西可能可以通过组合
dplyr
tidyr
来实现

差不多

with_cols <- df %>%
  group_by(city, categories, is_open) %>%
  summarise(n=sum(n)) %>%
  spread(is_open, n)

> with_cols
# A tibble: 3 x 4
# Groups:   city, categories [3]
       city categories   `0`   `1`
*     <chr>      <chr> <int> <int>
1 Las Vegas    Chinese    86   196
2 Las Vegas   Japanese    38    51
3 Las Vegas       Thai    34    54

我们如何编写
colnames@PIG查找
?重命名
我们如何编写
colnames@PIG查找
?重命名

#sample data
> dput(oc_LV)
structure(list(city = c("Las Vegas", "Las Vegas", "Las Vegas", 
"Las Vegas", "Las Vegas", "Las Vegas"), is_open = c("1", "0", 
"1", "1", "0", "0"), categories = c("Chinese", "Chinese", "Thai", 
"Japanese", "Japanese", "Thai"), n = c(196L, 86L, 54L, 51L, 38L, 
34L)), .Names = c("city", "is_open", "categories", "n"), row.names = c(NA, 
-6L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = c("city", 
"is_open"), drop = TRUE, indices = list(c(1L, 4L, 5L), c(0L, 
2L, 3L)), group_sizes = c(3L, 3L), biggest_group_size = 3L, labels = structure(list(
    city = c("Las Vegas", "Las Vegas"), is_open = c("0", "1")), .Names = c("city", 
"is_open"), row.names = c(NA, -2L), class = "data.frame", vars = c("city", 
"is_open"), drop = TRUE))
with_cols <- df %>%
  group_by(city, categories, is_open) %>%
  summarise(n=sum(n)) %>%
  spread(is_open, n)

> with_cols
# A tibble: 3 x 4
# Groups:   city, categories [3]
       city categories   `0`   `1`
*     <chr>      <chr> <int> <int>
1 Las Vegas    Chinese    86   196
2 Las Vegas   Japanese    38    51
3 Las Vegas       Thai    34    54
with_cols <- with_cols %>% rename(open=`1`, closed=`0`)
with_cols %>% mutate(open_avg = open / (open + closed))