R 从一个子集数据帧计算频率、平均值、wavg的更智能的方法?
我有一个数据框,我已经细分了 以下是《代码》一瞥(oc\U LV) 我想为变量R 从一个子集数据帧计算频率、平均值、wavg的更智能的方法?,r,dplyr,R,Dplyr,我有一个数据框,我已经细分了 以下是《代码》一瞥(oc\U LV) 我想为变量的频率、平均值和加权平均值创建一个表() 我是这样做的 cuisine <- c("Chinese", "Thai", "Japanese") open_fr <- c(196, 54, 51) closed_fr <- c(86, 34, 38) open_avg <- c(196/(196+86), 54/(54+36), 51/(51+38)) closed_avg <- c
的频率、平均值和加权平均值创建一个表()
我是这样做的
cuisine <- c("Chinese", "Thai", "Japanese")
open_fr <- c(196, 54, 51)
closed_fr <- c(86, 34, 38)
open_avg <- c(196/(196+86), 54/(54+36), 51/(51+38))
closed_avg <- c(86/(196+86), 90/(282+90+89), 38/(51+38))
open_wavg <- c(282/(282+90+89) * 196/(196+86), 90/(282+90+89) * 54/(54+36), 89/(282+90+89) * 51/(51+38))
closed_wavg <-c(282/(282+90+89) * 86/(196+86), 90/(282+90+89) * 36/(54+36), 89/(282+90+89) * 38/(51+38))
open_closed_LV <- data.frame(cuisine, open_fr, closed_fr, open_avg, closed_avg, open_wavg, closed_wavg)
open_closed_LV$open_avg <-round(open_closed_LV$open_avg, digits = 2)
open_closed_LV$closed_avg <-round(open_closed_LV$closed_avg, digits = 2)
open_closed_LV$open_wavg <-round(open_closed_LV$open_wavg, digits = 2)
open_closed_LV$closed_wavg <-round(open_closed_LV$closed_wavg, digits = 2)
烹饪你的意思是这样的吗
库(dplyr)
图书馆(tidyr)
df%
价差(未结,n)%>%
`colnames%
变异(开放式平均=开放式fr/行和([,-c(1,2)]),
closed_avg=closed_fr/行和([,-c(1,2)]),
open_wavg=行和([,-c(1,2)])/和([,-c(1,2)])*open_avg,
closed_wavg=行和([,-c(1,2)])/和([,-c(1,2)])*closed_avg)
df
输出为:
# A tibble: 3 x 8
# Groups: city [1]
city cuisine closed_fr open_fr open_avg closed_avg open_wavg closed_wavg
<chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
1 Las Vegas Chinese 86 196 0.70 0.30 0.43 0.187
2 Las Vegas Japanese 38 51 0.57 0.43 0.11 0.083
3 Las Vegas Thai 34 54 0.61 0.39 0.12 0.074
你是说这样的事吗
库(dplyr)
图书馆(tidyr)
df%
价差(未结,n)%>%
`colnames%
变异(开放式平均=开放式fr/行和([,-c(1,2)]),
closed_avg=closed_fr/行和([,-c(1,2)]),
open_wavg=行和([,-c(1,2)])/和([,-c(1,2)])*open_avg,
closed_wavg=行和([,-c(1,2)])/和([,-c(1,2)])*closed_avg)
df
输出为:
# A tibble: 3 x 8
# Groups: city [1]
city cuisine closed_fr open_fr open_avg closed_avg open_wavg closed_wavg
<chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
1 Las Vegas Chinese 86 196 0.70 0.30 0.43 0.187
2 Las Vegas Japanese 38 51 0.57 0.43 0.11 0.083
3 Las Vegas Thai 34 54 0.61 0.39 0.12 0.074
我认为您想要的东西可能可以通过组合dplyr
和tidyr
来实现
差不多
with_cols <- df %>%
group_by(city, categories, is_open) %>%
summarise(n=sum(n)) %>%
spread(is_open, n)
> with_cols
# A tibble: 3 x 4
# Groups: city, categories [3]
city categories `0` `1`
* <chr> <chr> <int> <int>
1 Las Vegas Chinese 86 196
2 Las Vegas Japanese 38 51
3 Las Vegas Thai 34 54
我认为您想要的东西可能可以通过组合dplyr
和tidyr
来实现
差不多
with_cols <- df %>%
group_by(city, categories, is_open) %>%
summarise(n=sum(n)) %>%
spread(is_open, n)
> with_cols
# A tibble: 3 x 4
# Groups: city, categories [3]
city categories `0` `1`
* <chr> <chr> <int> <int>
1 Las Vegas Chinese 86 196
2 Las Vegas Japanese 38 51
3 Las Vegas Thai 34 54
我们如何编写colnames@PIG查找?重命名我们如何编写colnames@PIG查找?重命名
#sample data
> dput(oc_LV)
structure(list(city = c("Las Vegas", "Las Vegas", "Las Vegas",
"Las Vegas", "Las Vegas", "Las Vegas"), is_open = c("1", "0",
"1", "1", "0", "0"), categories = c("Chinese", "Chinese", "Thai",
"Japanese", "Japanese", "Thai"), n = c(196L, 86L, 54L, 51L, 38L,
34L)), .Names = c("city", "is_open", "categories", "n"), row.names = c(NA,
-6L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = c("city",
"is_open"), drop = TRUE, indices = list(c(1L, 4L, 5L), c(0L,
2L, 3L)), group_sizes = c(3L, 3L), biggest_group_size = 3L, labels = structure(list(
city = c("Las Vegas", "Las Vegas"), is_open = c("0", "1")), .Names = c("city",
"is_open"), row.names = c(NA, -2L), class = "data.frame", vars = c("city",
"is_open"), drop = TRUE))
with_cols <- df %>%
group_by(city, categories, is_open) %>%
summarise(n=sum(n)) %>%
spread(is_open, n)
> with_cols
# A tibble: 3 x 4
# Groups: city, categories [3]
city categories `0` `1`
* <chr> <chr> <int> <int>
1 Las Vegas Chinese 86 196
2 Las Vegas Japanese 38 51
3 Las Vegas Thai 34 54
with_cols <- with_cols %>% rename(open=`1`, closed=`0`)
with_cols %>% mutate(open_avg = open / (open + closed))